int32p.inc 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by the Free Pascal development team
  4. This file contains some helper routines for longint and dword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$define FPC_SYSTEM_HAS_MUL_DWORD}
  12. function fpc_mul_dword( f1, f2: dword; checkoverflow: boolean ): dword; [public,alias: 'FPC_MUL_DWORD']; compilerproc;
  13. begin
  14. { routine contributed by Max Nazhalov
  15. 32-bit multiplications summary:
  16. f1 = A1*$10000+A0
  17. f2 = B1*$10000+B0
  18. (A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
  19. A1*B1 [only needed for overflow checking; overflow if <>0]
  20. A1*B0
  21. A0*B1
  22. A0:B0
  23. }
  24. asm
  25. mov cx,word[f1]
  26. mov ax,word[f1+2]
  27. mov di,word[f2]
  28. mov si,word[f2+2]
  29. cmp checkoverflow,0
  30. jne @@checked
  31. mul di
  32. xchg ax,si
  33. mul cx
  34. add si,ax
  35. mov ax,di
  36. mul cx
  37. add dx,si
  38. jmp @@done
  39. @@checked:
  40. test ax,ax
  41. jz @@skip
  42. test si,si
  43. jnz @@done
  44. mul di
  45. test dx,dx
  46. jnz @@done
  47. @@skip:
  48. xchg ax,si
  49. mul cx
  50. test dx,dx
  51. jnz @@done
  52. add si,ax
  53. jc @@done
  54. mov ax,di
  55. mul cx
  56. add dx,si
  57. jc @@done
  58. // checked and succeed
  59. mov checkoverflow,0
  60. @@done:
  61. mov word[result],ax
  62. mov word[result+2],dx
  63. end [ 'ax','cx','dx','si','di' ];
  64. if checkoverflow then
  65. HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
  66. end;
  67. {$define FPC_SYSTEM_HAS_DIV_DWORD}
  68. function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
  69. begin
  70. { routine contributed by Max Nazhalov }
  71. result := 0;
  72. if n=0 then
  73. HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
  74. asm
  75. mov ax,word [z]
  76. mov dx,word [z+2]
  77. mov bx,word [n]
  78. mov cx,word [n+2]
  79. // check for underflow: z<n
  80. mov si,dx
  81. cmp ax,bx
  82. sbb si,cx
  83. jc @@3
  84. // select one of 3 trivial cases
  85. test cx,cx
  86. jnz @@1
  87. cmp dx,bx
  88. jnc @@0
  89. // (i) single division: n<=0xFFFF, z<=(n<<16)-1
  90. div bx
  91. mov word [result],ax
  92. jmp @@3
  93. @@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
  94. // q1 := [0:z1] div n; r := [0:z1] mod n;
  95. // q0 := [r:z0] div n;
  96. xchg ax,cx
  97. xchg ax,dx
  98. { dx=0, ax=z1, cx=z0 }
  99. div bx
  100. xchg ax,cx
  101. { dx=r, ax=z0, cx=q1 }
  102. div bx
  103. mov word [result],ax
  104. mov word [result+2],cx
  105. jmp @@3
  106. @@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
  107. // Special case of the generic "schoolbook" division [see e.g. Knuth]:
  108. // 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
  109. // n>=0x10000 -> m<=15
  110. // 2. adjust divident accordingly: [z2:z1:z0] := z<<m
  111. // m<=15 -> z2<=0x7FFF
  112. // implementation: instead do >> dropping n0 and z0
  113. mov si,bx // save n0
  114. mov di,cx // save n1
  115. test ch,ch
  116. jz @@2
  117. mov bl,bh
  118. mov bh,cl
  119. mov cl,ch
  120. mov al,ah
  121. mov ah,dl
  122. mov dl,dh
  123. xor dh,dh
  124. @@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
  125. shr cl,1
  126. rcr bx,1
  127. shr dx,1
  128. rcr ax,1
  129. test cl,cl
  130. jnz @@2
  131. // 3. estimate quotient: q_hat := [z2:z1]/n1
  132. // Division never overflows since z2<=0x7FFF and n1>0x7FFF
  133. div bx
  134. // 4. multiply & subtract calculating remainder:
  135. // r := z-n*q_hat (z and n are original)
  136. // 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
  137. // theoretically, 0..2 iterations are required [see e.g. Knuth];
  138. // in practice, with such initial data, at most one iteration
  139. // is needed (no disproof has been found yet; and if it will
  140. // ever be found -- it also should raise doubts about the i386
  141. // fpc_div_qword helper again; see FPC mantis #23963)
  142. mov cx,ax // save q_hat
  143. mul si
  144. mov bx,ax
  145. mov si,dx
  146. mov ax,cx
  147. mul di
  148. xor di,di
  149. add ax,si
  150. adc dx,di // [dx:ax:bx] := n*q_hat; di=0
  151. mov si,word [z]
  152. sub si,bx
  153. mov si,word [z+2]
  154. sbb si,ax
  155. sbb di,dx
  156. sbb cx,0
  157. // 6. done: q := [0:cx]
  158. mov word [result],cx
  159. @@3:
  160. end;
  161. end;
  162. {$define FPC_SYSTEM_HAS_MOD_DWORD}
  163. function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
  164. begin
  165. { routine contributed by Max Nazhalov }
  166. result := z;
  167. if n=0 then
  168. HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
  169. asm
  170. mov ax,word [z]
  171. mov dx,word [z+2]
  172. mov bx,word [n]
  173. mov cx,word [n+2]
  174. // check for underflow: z<n
  175. mov si,dx
  176. cmp ax,bx
  177. sbb si,cx
  178. jc @@4
  179. // select one of 3 trivial cases
  180. test cx,cx
  181. jnz @@1
  182. cmp dx,bx
  183. jnc @@0
  184. // (i) single division: n<=0xFFFF, z<=(n<<16)-1
  185. div bx
  186. jmp @@3 // r=cx:dx (cx=0)
  187. @@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
  188. // q1 := [0:z1] div n; r := [0:z1] mod n;
  189. // q0 := [r:z0] div n; r := [r:z0] mod n;
  190. xchg ax,cx
  191. xchg ax,dx
  192. { dx=0, ax=z1, cx=z0 }
  193. div bx
  194. mov ax,cx
  195. xor cx,cx
  196. { dx=r, ax=z0, cx=0 }
  197. div bx
  198. jmp @@3 // r=cx:dx (cx=0)
  199. @@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
  200. // Special case of the generic "schoolbook" division [see e.g. Knuth]:
  201. // 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
  202. // n>=0x10000 -> m<=15
  203. // 2. adjust divident accordingly: [z2:z1:z0] := z<<m
  204. // m<=15 -> z2<=0x7FFF
  205. // implementation: instead do >> dropping n0 and z0
  206. mov si,bx // save n0
  207. mov di,cx // save n1
  208. test ch,ch
  209. jz @@2
  210. mov bl,bh
  211. mov bh,cl
  212. mov cl,ch
  213. mov al,ah
  214. mov ah,dl
  215. mov dl,dh
  216. xor dh,dh
  217. @@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
  218. shr cl,1
  219. rcr bx,1
  220. shr dx,1
  221. rcr ax,1
  222. test cl,cl
  223. jnz @@2
  224. // 3. estimate quotient: q_hat := [z2:z1]/n1
  225. // Division never overflows since z2<=0x7FFF and n1>0x7FFF
  226. div bx
  227. // 4. multiply & subtract calculating remainder:
  228. // r := z-n*q_hat (z and n are original)
  229. // 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
  230. // theoretically, 0..2 iterations are required [see e.g. Knuth];
  231. // in practice, with such initial data, at most one iteration
  232. // is needed (no disproof has been found yet; and if it will
  233. // ever be found -- it also should raise doubts about the i386
  234. // fpc_div_qword helper again; see FPC mantis #23963)
  235. mov cx,ax // save q_hat
  236. mul si
  237. mov bx,ax
  238. mov si,dx
  239. mov ax,cx
  240. mul di
  241. xor di,di
  242. add ax,si
  243. adc dx,di // [dx:ax:bx] := n*q_hat; di=0
  244. mov si,word [z]
  245. mov cx,word [z+2]
  246. sub si,bx
  247. sbb cx,ax
  248. sbb di,dx
  249. mov dx,si
  250. jnc @@3
  251. add dx,word [n]
  252. adc cx,word [n+2]
  253. @@3: // done: r=cx:dx
  254. mov word [result],dx
  255. mov word [result+2],cx
  256. @@4:
  257. end;
  258. end;