int64p.inc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team
  4. This file contains some helper routines for int64 and qword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$Q- no overflow checking }
  12. {$R- no range checking }
  13. function div_qword_throwdivbyzero(n,z : qword) : qword;
  14. begin
  15. HandleErrorFrame(200,get_frame);
  16. end;
  17. {$define FPC_SYSTEM_HAS_DIV_INT64}
  18. function fpc_div_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_DIV_INT64']; compilerproc;
  19. { n = [esp + 12], z = [esp + 4]. }
  20. asm
  21. push %ebx
  22. push %esi
  23. push %edi
  24. { the following piece of code is taken from the }
  25. { AMD Athlon Processor x86 Code Optimization manual }
  26. movl 12+16(%esp),%ecx { ecx = hi(n) }
  27. movl 12+12(%esp),%ebx { ebx = lo(n) }
  28. movl 12+8(%esp),%edx { edx = hi(z) }
  29. movl 12+4(%esp),%eax { eax = lo(z) }
  30. movl %ecx,%esi
  31. xorl %edx,%esi
  32. sarl $31,%esi
  33. movl %edx,%edi
  34. sarl $31,%edi
  35. xorl %edi,%eax
  36. xorl %edi,%edx
  37. subl %edi,%eax
  38. sbbl %edi,%edx
  39. movl %ecx,%edi
  40. sarl $31,%edi
  41. xorl %edi,%ebx
  42. xorl %edi,%ecx
  43. subl %edi,%ebx
  44. sbbl %edi,%ecx
  45. jnz .Lbigdivisor
  46. cmpl %ebx,%edx
  47. jae .Ltwo_divs
  48. divl %ebx
  49. .Lmake_sign_zero_hi:
  50. xorl %edx,%edx
  51. .Lmake_sign:
  52. xorl %esi,%eax
  53. xorl %esi,%edx
  54. subl %esi,%eax
  55. sbbl %esi,%edx
  56. pop %edi
  57. pop %esi
  58. pop %ebx
  59. ret $16
  60. .Ltwo_divs:
  61. test %ebx,%ebx { Zero division ends up here with ebx = 0. }
  62. jz .Ldivzero
  63. movl %eax,%ecx
  64. movl %edx,%eax
  65. xorl %edx,%edx
  66. divl %ebx
  67. xchgl %ecx,%eax
  68. divl %ebx
  69. movl %ecx,%edx
  70. jmp .Lmake_sign
  71. .Lbigdivisor:
  72. movl %eax,12+4(%esp) { Reuse n~z stack space. }
  73. movl %ebx,12+8(%esp)
  74. movl %edx,12+12(%esp)
  75. movl %ecx,%edi
  76. shrl $1,%edx
  77. rcrl $1,%eax
  78. rorl $1,%edi
  79. rcrl $1,%ebx
  80. bsrl %ecx,%ecx
  81. shrdl %cl,%edi,%ebx
  82. shrdl %cl,%edx,%eax
  83. shrl %cl,%edx
  84. roll $1,%edi
  85. divl %ebx
  86. movl 12+4(%esp),%ebx
  87. movl %eax,%ecx
  88. imull %eax,%edi
  89. mull 12+8(%esp)
  90. addl %edi,%edx
  91. subl %eax,%ebx
  92. movl %ecx,%eax
  93. movl 12+12(%esp),%ecx
  94. sbbl %edx,%ecx
  95. sbbl $0,%eax
  96. jmp .Lmake_sign_zero_hi
  97. .Ldivzero:
  98. pop %edi
  99. pop %esi
  100. pop %ebx
  101. jmp div_qword_throwdivbyzero
  102. end;
  103. {$define FPC_SYSTEM_HAS_MOD_INT64}
  104. function fpc_mod_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_MOD_INT64']; compilerproc;
  105. { n = [esp + 12], z = [esp + 4]. }
  106. asm
  107. push %ebx
  108. push %esi
  109. push %edi
  110. { the following piece of code is taken from the }
  111. { AMD Athlon Processor x86 Code Optimization manual }
  112. movl 12+16(%esp),%ecx
  113. movl 12+12(%esp),%ebx
  114. movl 12+8(%esp),%edx
  115. movl 12+4(%esp),%eax
  116. movl %edx,%esi
  117. sarl $31,%esi
  118. movl %edx,%edi
  119. sarl $31,%edi
  120. xorl %edi,%eax
  121. xorl %edi,%edx
  122. subl %edi,%eax
  123. sbbl %edi,%edx
  124. movl %ecx,%edi
  125. sarl $31,%edi
  126. xorl %edi,%ebx
  127. xorl %edi,%ecx
  128. subl %edi,%ebx
  129. sbbl %edi,%ecx
  130. jnz .Lbig_divisor
  131. cmpl %ebx,%edx
  132. jae .Ltwo_divs
  133. divl %ebx
  134. movl %edx,%eax
  135. movl %ecx,%edx
  136. .Lmake_sign:
  137. xorl %esi,%eax
  138. xorl %esi,%edx
  139. subl %esi,%eax
  140. sbbl %esi,%edx
  141. pop %edi
  142. pop %esi
  143. pop %ebx
  144. ret $16
  145. .Ltwo_divs:
  146. test %ebx,%ebx { Zero division ends up here with ebx = 0. }
  147. jz .Ldivzero
  148. movl %eax,%ecx
  149. movl %edx,%eax
  150. xorl %edx,%edx
  151. divl %ebx
  152. movl %ecx,%eax
  153. divl %ebx
  154. movl %edx,%eax
  155. xorl %edx,%edx
  156. jmp .Lmake_sign
  157. .Lbig_divisor:
  158. movl %eax,12+4(%esp) { Reuse n~z stack space. }
  159. movl %ebx,12+8(%esp)
  160. movl %edx,12+12(%esp)
  161. movl %ecx,12+16(%esp)
  162. movl %ecx,%edi
  163. shrl $1,%edx
  164. rcrl $1,%eax
  165. rorl $1,%edi
  166. rcrl $1,%ebx
  167. bsrl %ecx,%ecx
  168. shrdl %cl,%edi,%ebx
  169. shrdl %cl,%edx,%eax
  170. shrl %cl,%edx
  171. roll $1,%edi
  172. divl %ebx
  173. movl 12+4(%esp),%ebx
  174. movl %eax,%ecx
  175. imull %eax,%edi
  176. mull 12+8(%esp)
  177. addl %edi,%edx
  178. subl %eax,%ebx
  179. movl 12+12(%esp),%ecx
  180. sbbl %edx,%ecx
  181. sbbl %eax,%eax
  182. movl 12+16(%esp),%edx
  183. andl %eax,%edx
  184. andl 12+8(%esp),%eax
  185. addl %ebx,%eax
  186. adcl %ecx,%edx
  187. jmp .Lmake_sign
  188. .Ldivzero:
  189. pop %edi
  190. pop %esi
  191. pop %ebx
  192. jmp div_qword_throwdivbyzero
  193. end;
  194. {$define FPC_SYSTEM_HAS_DIV_QWORD}
  195. function fpc_div_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
  196. { n = [esp + 12], z = [esp + 4]. }
  197. asm
  198. { the following piece of code is taken from the }
  199. { AMD Athlon Processor x86 Code Optimization manual }
  200. movl 16(%esp),%ecx { ecx = hi(n) }
  201. test %ecx,%ecx
  202. jnz .Lqworddivbigdivisor
  203. movl 12(%esp),%ecx { ecx = lo(n) }
  204. movl 8(%esp),%edx { edx = hi(z) }
  205. cmpl %ecx,%edx
  206. jae .Lqworddivtwo_divs
  207. movl 4(%esp),%eax { eax = lo(z) }
  208. divl %ecx
  209. xorl %edx,%edx
  210. ret $16
  211. .Lqworddivtwo_divs:
  212. test %ecx,%ecx { Zero division ends up here with ecx = 0. }
  213. jz div_qword_throwdivbyzero
  214. movl %edx,%eax
  215. xorl %edx,%edx
  216. divl %ecx
  217. push %eax { eax = future hi(result); remember }
  218. movl 4+4(%esp),%eax { eax = lo(z) }
  219. divl %ecx
  220. pop %edx
  221. ret $16
  222. .Lqworddivbigdivisor:
  223. push %ebx
  224. push %esi
  225. push %edi
  226. movl 12+12(%esp),%ebx { ebx = lo(n) }
  227. movl 12+8(%esp),%edx { edx = hi(z) }
  228. movl 12+4(%esp),%eax { eax = lo(z) }
  229. movl %ecx,%edi
  230. shrl $1,%edx
  231. rcrl $1,%eax
  232. rorl $1,%edi
  233. rcrl $1,%ebx
  234. bsrl %ecx,%ecx
  235. shrdl %cl,%edi,%ebx
  236. shrdl %cl,%edx,%eax
  237. shrl %cl,%edx
  238. roll $1,%edi
  239. divl %ebx
  240. movl 12+4(%esp),%ebx
  241. movl %eax,%esi // save quotient to esi
  242. imull %eax,%edi
  243. mull 12+12(%esp)
  244. addl %edi,%edx
  245. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  246. movl 12+8(%esp),%edi // edi:ebx = dividend
  247. subl %eax,%ebx
  248. movb $0,%al
  249. sbbl %edx,%edi
  250. sbbb %cl,%al
  251. sbbl $0,%esi
  252. xorl %edx,%edx
  253. movl %esi,%eax
  254. pop %edi
  255. pop %esi
  256. pop %ebx
  257. end;
  258. {$define FPC_SYSTEM_HAS_MOD_QWORD}
  259. function fpc_mod_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
  260. { n = [esp + 12], z = [esp + 4]. }
  261. asm
  262. { the following piece of code is taken from the }
  263. { AMD Athlon Processor x86 Code Optimization manual }
  264. movl 16(%esp),%ecx { ecx = hi(n) }
  265. movl 8(%esp),%edx { edx = hi(z) }
  266. test %ecx,%ecx
  267. jnz .Lqwordmodr_big_divisior
  268. movl 12(%esp),%ecx { ecx = lo(n) }
  269. movl 4(%esp),%eax { eax = lo(z) }
  270. cmpl %ecx,%edx
  271. jae .Lqwordmodr_two_divs
  272. divl %ecx
  273. movl %edx,%eax
  274. xorl %edx,%edx
  275. ret $16
  276. .Lqwordmodr_two_divs:
  277. test %ecx,%ecx { Zero division ends up here with ecx = 0. }
  278. jz div_qword_throwdivbyzero
  279. movl %edx,%eax
  280. xorl %edx,%edx
  281. divl %ecx
  282. movl 4(%esp),%eax { eax = lo(z) }
  283. divl %ecx
  284. movl %edx,%eax
  285. xorl %edx,%edx
  286. ret $16
  287. .Lqwordmodr_big_divisior:
  288. push %ebx
  289. push %edi
  290. movl 8+12(%esp),%ebx { ebx = lo(n) }
  291. movl 8+4(%esp),%eax { eax = lo(z) }
  292. movl %ecx,%edi
  293. shrl $1,%edx
  294. rcrl $1,%eax
  295. rorl $1,%edi
  296. rcrl $1,%ebx
  297. bsrl %ecx,%ecx
  298. shrdl %cl,%edi,%ebx
  299. shrdl %cl,%edx,%eax
  300. shrl %cl,%edx
  301. roll $1,%edi
  302. divl %ebx
  303. movl 8+4(%esp),%ebx { lo(z) }
  304. imull %eax,%edi
  305. mull 8+12(%esp) { lo(n) }
  306. addl %edi,%edx
  307. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  308. movl 8+8(%esp),%edi { hi(z) }
  309. subl %eax,%ebx // subtract (quotient*divisor) from dividend
  310. movb $0,%al
  311. sbbl %edx,%edi
  312. sbbb %cl,%al // if carry is set now, the quotient was off by 1,
  313. // and we need to add divisor to result
  314. movl 8+12(%esp),%eax { lo(n) }
  315. sbbl %edx,%edx
  316. andl %edx,%eax
  317. andl 8+16(%esp),%edx { hi(n) }
  318. addl %ebx,%eax
  319. adcl %edi,%edx
  320. pop %edi
  321. pop %ebx
  322. end;
  323. {$define FPC_SYSTEM_HAS_MUL_QWORD}
  324. function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
  325. begin
  326. { the following piece of code is taken from the
  327. AMD Athlon Processor x86 Code Optimization manual }
  328. asm
  329. movl f1+4,%edx
  330. movl f2+4,%ecx
  331. orl %ecx,%edx
  332. movl f2,%edx
  333. movl f1,%eax
  334. jnz .Lqwordmultwomul
  335. { if both upper dwords are =0 then it cannot overflow }
  336. mull %edx
  337. jmp .Lqwordmulready
  338. .Lqwordmultwomul:
  339. imul f1+4,%edx
  340. imul %eax,%ecx
  341. addl %edx,%ecx
  342. mull f2
  343. add %ecx,%edx
  344. .Lqwordmulready:
  345. movl %eax,__RESULT
  346. movl %edx,__RESULT+4
  347. .Lend:
  348. end [ 'eax','edx','ecx'];
  349. end;
  350. function mul_qword_throwoverflow(f1,f2 : qword) : qword;
  351. begin
  352. HandleErrorFrame(215,get_frame);
  353. end;
  354. function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
  355. { f1 = [esp + 12], f2 = [esp + 4]. }
  356. asm
  357. { the following piece of code is taken from the
  358. AMD Athlon Processor x86 Code Optimization manual }
  359. movl 16(%esp),%edx { edx = hi(f1) }
  360. movl 8(%esp),%ecx { ecx = hi(f2) }
  361. orl %ecx,%edx
  362. movl 4(%esp),%edx { edx = lo(f2) }
  363. movl 12(%esp),%eax { eax = lo(f1) }
  364. jnz .Loverflowchecked
  365. { if both upper dwords are =0 then it cannot overflow }
  366. mull %edx
  367. ret $16
  368. .Loverflowed:
  369. jmp mul_qword_throwoverflow
  370. .Loverflowchecked:
  371. { if both upper dwords are <>0 then it overflows always }
  372. test %ecx,%ecx
  373. jz .Loverok1
  374. cmpl $0,16(%esp)
  375. jnz .Loverflowed
  376. .Loverok1:
  377. { overflow checked code }
  378. movl 16(%esp),%eax { eax = hi(f1) }
  379. mull 4(%esp)
  380. movl %eax,%ecx
  381. jc .Loverflowed
  382. movl 12(%esp),%eax { eax = lo(f1) }
  383. mull 8(%esp)
  384. jc .Loverflowed
  385. addl %eax,%ecx
  386. jc .Loverflowed
  387. movl 4(%esp),%eax
  388. mull 12(%esp)
  389. addl %ecx,%edx
  390. jc .Loverflowed
  391. end;