int32p.inc 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2013 by the Free Pascal development team
  4. This file contains some helper routines for longint and dword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$define FPC_SYSTEM_HAS_MUL_DWORD}
  12. function fpc_mul_dword( f1, f2: dword ): dword; [public,alias: 'FPC_MUL_DWORD']; compilerproc;
  13. begin
  14. { routine contributed by Max Nazhalov
  15. 32-bit multiplications summary:
  16. f1 = A1*$10000+A0
  17. f2 = B1*$10000+B0
  18. (A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
  19. A1*B1 [only needed for overflow checking; overflow if <>0]
  20. A1*B0
  21. A0*B1
  22. A0:B0
  23. }
  24. asm
  25. mov cx,word[f1]
  26. mov ax,word[f1+2]
  27. mov di,word[f2]
  28. mov si,word[f2+2]
  29. mul di
  30. xchg ax,si
  31. mul cx
  32. add si,ax
  33. xchg ax,di { 1 byte shorter than mov }
  34. mul cx
  35. add dx,si
  36. mov word[result],ax
  37. mov word[result+2],dx
  38. end [ 'ax','cx','dx','si','di' ];
  39. end;
  40. function fpc_mul_dword_checkoverflow( f1, f2: dword ): dword; [public,alias: 'FPC_MUL_DWORD_CHECKOVERFLOW']; compilerproc;
  41. begin
  42. { routine contributed by Max Nazhalov
  43. 32-bit multiplications summary:
  44. f1 = A1*$10000+A0
  45. f2 = B1*$10000+B0
  46. (A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
  47. A1*B1 [only needed for overflow checking; overflow if <>0]
  48. A1*B0
  49. A0*B1
  50. A0:B0
  51. }
  52. asm
  53. mov cx,word[f1]
  54. mov ax,word[f1+2]
  55. mov di,word[f2]
  56. mov si,word[f2+2]
  57. test ax,ax
  58. jz @@skip
  59. test si,si
  60. jnz @@overflow
  61. mul di
  62. test dx,dx
  63. jnz @@overflow
  64. @@skip:
  65. xchg ax,si
  66. mul cx
  67. test dx,dx
  68. jnz @@overflow
  69. add si,ax
  70. jc @@overflow
  71. xchg ax,di { 1 byte shorter than mov }
  72. mul cx
  73. add dx,si
  74. jc @@overflow
  75. // checked and succeed
  76. mov word[result],ax
  77. mov word[result+2],dx
  78. jmp @@done
  79. @@overflow:
  80. call FPC_OVERFLOW
  81. @@done:
  82. end [ 'ax','cx','dx','si','di' ];
  83. end;
  84. {$define FPC_SYSTEM_HAS_DIV_DWORD}
  85. function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
  86. begin
  87. { routine contributed by Max Nazhalov }
  88. result := 0;
  89. if n=0 then
  90. HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
  91. asm
  92. mov ax,word [z]
  93. mov dx,word [z+2]
  94. mov bx,word [n]
  95. mov cx,word [n+2]
  96. // check for underflow: z<n
  97. mov si,dx
  98. cmp ax,bx
  99. sbb si,cx
  100. jc @@3
  101. // select one of 3 trivial cases
  102. test cx,cx
  103. jnz @@1
  104. cmp dx,bx
  105. jnc @@0
  106. // (i) single division: n<=0xFFFF, z<=(n<<16)-1
  107. div bx
  108. mov word [result],ax
  109. jmp @@3
  110. @@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
  111. // q1 := [0:z1] div n; r := [0:z1] mod n;
  112. // q0 := [r:z0] div n;
  113. xchg ax,cx
  114. xchg ax,dx
  115. { dx=0, ax=z1, cx=z0 }
  116. div bx
  117. xchg ax,cx
  118. { dx=r, ax=z0, cx=q1 }
  119. div bx
  120. mov word [result],ax
  121. mov word [result+2],cx
  122. jmp @@3
  123. @@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
  124. // Special case of the generic "schoolbook" division [see e.g. Knuth]:
  125. // 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
  126. // n>=0x10000 -> m<=15
  127. // 2. adjust divident accordingly: [z2:z1:z0] := z<<m
  128. // m<=15 -> z2<=0x7FFF
  129. // implementation: instead do >> dropping n0 and z0
  130. mov si,bx // save n0
  131. mov di,cx // save n1
  132. test ch,ch
  133. jz @@2
  134. mov bl,bh
  135. mov bh,cl
  136. mov cl,ch
  137. mov al,ah
  138. mov ah,dl
  139. mov dl,dh
  140. xor dh,dh
  141. @@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
  142. shr cl,1
  143. rcr bx,1
  144. shr dx,1
  145. rcr ax,1
  146. test cl,cl
  147. jnz @@2
  148. // 3. estimate quotient: q_hat := [z2:z1]/n1
  149. // Division never overflows since z2<=0x7FFF and n1>0x7FFF
  150. div bx
  151. // 4. multiply & subtract calculating remainder:
  152. // r := z-n*q_hat (z and n are original)
  153. // 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
  154. // theoretically, 0..2 iterations are required [see e.g. Knuth];
  155. // in practice, with such initial data, at most one iteration
  156. // is needed (no disproof has been found yet; and if it will
  157. // ever be found -- it also should raise doubts about the i386
  158. // fpc_div_qword helper again; see FPC mantis #23963)
  159. mov cx,ax // save q_hat
  160. mul si
  161. mov bx,ax
  162. mov si,dx
  163. mov ax,cx
  164. mul di
  165. xor di,di
  166. add ax,si
  167. adc dx,di // [dx:ax:bx] := n*q_hat; di=0
  168. mov si,word [z]
  169. sub si,bx
  170. mov si,word [z+2]
  171. sbb si,ax
  172. sbb di,dx
  173. sbb cx,0
  174. // 6. done: q := [0:cx]
  175. mov word [result],cx
  176. @@3:
  177. end;
  178. end;
  179. {$define FPC_SYSTEM_HAS_MOD_DWORD}
  180. function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
  181. begin
  182. { routine contributed by Max Nazhalov }
  183. result := z;
  184. if n=0 then
  185. HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
  186. asm
  187. mov ax,word [z]
  188. mov dx,word [z+2]
  189. mov bx,word [n]
  190. mov cx,word [n+2]
  191. // check for underflow: z<n
  192. mov si,dx
  193. cmp ax,bx
  194. sbb si,cx
  195. jc @@4
  196. // select one of 3 trivial cases
  197. test cx,cx
  198. jnz @@1
  199. cmp dx,bx
  200. jnc @@0
  201. // (i) single division: n<=0xFFFF, z<=(n<<16)-1
  202. div bx
  203. jmp @@3 // r=cx:dx (cx=0)
  204. @@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
  205. // q1 := [0:z1] div n; r := [0:z1] mod n;
  206. // q0 := [r:z0] div n; r := [r:z0] mod n;
  207. xchg ax,cx
  208. xchg ax,dx
  209. { dx=0, ax=z1, cx=z0 }
  210. div bx
  211. mov ax,cx
  212. xor cx,cx
  213. { dx=r, ax=z0, cx=0 }
  214. div bx
  215. jmp @@3 // r=cx:dx (cx=0)
  216. @@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
  217. // Special case of the generic "schoolbook" division [see e.g. Knuth]:
  218. // 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
  219. // n>=0x10000 -> m<=15
  220. // 2. adjust divident accordingly: [z2:z1:z0] := z<<m
  221. // m<=15 -> z2<=0x7FFF
  222. // implementation: instead do >> dropping n0 and z0
  223. mov si,bx // save n0
  224. mov di,cx // save n1
  225. test ch,ch
  226. jz @@2
  227. mov bl,bh
  228. mov bh,cl
  229. mov cl,ch
  230. mov al,ah
  231. mov ah,dl
  232. mov dl,dh
  233. xor dh,dh
  234. @@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
  235. shr cl,1
  236. rcr bx,1
  237. shr dx,1
  238. rcr ax,1
  239. test cl,cl
  240. jnz @@2
  241. // 3. estimate quotient: q_hat := [z2:z1]/n1
  242. // Division never overflows since z2<=0x7FFF and n1>0x7FFF
  243. div bx
  244. // 4. multiply & subtract calculating remainder:
  245. // r := z-n*q_hat (z and n are original)
  246. // 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
  247. // theoretically, 0..2 iterations are required [see e.g. Knuth];
  248. // in practice, with such initial data, at most one iteration
  249. // is needed (no disproof has been found yet; and if it will
  250. // ever be found -- it also should raise doubts about the i386
  251. // fpc_div_qword helper again; see FPC mantis #23963)
  252. mov cx,ax // save q_hat
  253. mul si
  254. mov bx,ax
  255. mov si,dx
  256. mov ax,cx
  257. mul di
  258. xor di,di
  259. add ax,si
  260. adc dx,di // [dx:ax:bx] := n*q_hat; di=0
  261. mov si,word [z]
  262. mov cx,word [z+2]
  263. sub si,bx
  264. sbb cx,ax
  265. sbb di,dx
  266. mov dx,si
  267. jnc @@3
  268. add dx,word [n]
  269. adc cx,word [n+2]
  270. @@3: // done: r=cx:dx
  271. mov word [result],dx
  272. mov word [result+2],cx
  273. @@4:
  274. end;
  275. end;