int64p.inc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 1999-2000 by the Free Pascal development team
  4. This file contains some helper routines for int64 and qword
  5. See the file COPYING.FPC, included in this distribution,
  6. for details about the copyright.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  10. **********************************************************************}
  11. {$Q- no overflow checking }
  12. {$R- no range checking }
  13. {$define FPC_SYSTEM_HAS_DIV_INT64}
  14. function fpc_div_int64(n,z : int64) : int64;assembler;[public,alias: 'FPC_DIV_INT64']; compilerproc;
  15. var
  16. saveebx,saveedi,saveesi : longint;
  17. asm
  18. movl %ebx,saveebx
  19. movl %esi,saveesi
  20. movl %edi,saveedi
  21. { the following piece of code is taken from the }
  22. { AMD Athlon Processor x86 Code Optimization manual }
  23. movl n+4,%ecx
  24. movl n,%ebx
  25. movl %ecx,%eax
  26. orl %ebx,%eax
  27. jnz .Lnodivzero
  28. movl %ebp,%edx
  29. movl $200,%eax
  30. call HandleErrorFrame
  31. jmp .Lexit
  32. .Lnodivzero:
  33. movl z+4,%edx
  34. movl z,%eax
  35. movl %ecx,%esi
  36. xorl %edx,%esi
  37. sarl $31,%esi
  38. movl %edx,%edi
  39. sarl $31,%edi
  40. xorl %edi,%eax
  41. xorl %edi,%edx
  42. subl %edi,%eax
  43. sbbl %edi,%edx
  44. movl %ecx,%edi
  45. sarl $31,%edi
  46. xorl %edi,%ebx
  47. xorl %edi,%ecx
  48. subl %edi,%ebx
  49. sbbl %edi,%ecx
  50. jnz .Lbigdivisor
  51. cmpl %ebx,%edx
  52. jae .Ltwo_divs
  53. divl %ebx
  54. movl %ecx,%edx
  55. xorl %esi,%eax
  56. xorl %esi,%edx
  57. subl %esi,%eax
  58. sbbl %esi,%edx
  59. jmp .Lexit
  60. .Ltwo_divs:
  61. movl %eax,%ecx
  62. movl %edx,%eax
  63. xorl %edx,%edx
  64. divl %ebx
  65. xchgl %ecx,%eax
  66. divl %ebx
  67. movl %ecx,%edx
  68. jmp .Lmake_sign
  69. .Lbigdivisor:
  70. subl $12,%esp
  71. movl %eax,(%esp)
  72. movl %ebx,4(%esp)
  73. movl %edx,8(%esp)
  74. movl %ecx,%edi
  75. shrl $1,%edx
  76. rcrl $1,%eax
  77. rorl $1,%edi
  78. rcrl $1,%ebx
  79. bsrl %ecx,%ecx
  80. shrdl %cl,%edi,%ebx
  81. shrdl %cl,%edx,%eax
  82. shrl %cl,%edx
  83. roll $1,%edi
  84. divl %ebx
  85. movl (%esp),%ebx
  86. movl %eax,%ecx
  87. imull %eax,%edi
  88. mull 4(%esp)
  89. addl %edi,%edx
  90. subl %eax,%ebx
  91. movl %ecx,%eax
  92. movl 8(%esp),%ecx
  93. sbbl %edx,%ecx
  94. sbbl $0,%eax
  95. xorl %edx,%edx
  96. addl $12,%esp
  97. .Lmake_sign:
  98. xorl %esi,%eax
  99. xorl %esi,%edx
  100. subl %esi,%eax
  101. sbbl %esi,%edx
  102. .Lexit:
  103. movl saveebx,%ebx
  104. movl saveesi,%esi
  105. movl saveedi,%edi
  106. end;
  107. {$define FPC_SYSTEM_HAS_MOD_INT64}
  108. function fpc_mod_int64(n,z : int64) : int64;assembler;[public,alias: 'FPC_MOD_INT64']; compilerproc;
  109. var
  110. saveebx,saveedi,saveesi : longint;
  111. asm
  112. movl %ebx,saveebx
  113. movl %esi,saveesi
  114. movl %edi,saveedi
  115. { the following piece of code is taken from the }
  116. { AMD Athlon Processor x86 Code Optimization manual }
  117. movl n+4,%ecx
  118. movl n,%ebx
  119. movl %ecx,%eax
  120. orl %ebx,%eax
  121. jnz .Lnodivzero
  122. movl %ebp,%edx
  123. movl $200,%eax
  124. call HandleErrorFrame
  125. jmp .Lexit
  126. .Lnodivzero:
  127. movl z+4,%edx
  128. movl z,%eax
  129. movl %edx,%esi
  130. sarl $31,%esi
  131. movl %edx,%edi
  132. sarl $31,%edi
  133. xorl %edi,%eax
  134. xorl %edi,%edx
  135. subl %edi,%eax
  136. sbbl %edi,%edx
  137. movl %ecx,%edi
  138. sarl $31,%edi
  139. xorl %edi,%ebx
  140. xorl %edi,%ecx
  141. subl %edi,%ebx
  142. sbbl %edi,%ecx
  143. jnz .Lbig_divisor
  144. cmpl %ebx,%edx
  145. jae .Ltwo_divs
  146. divl %ebx
  147. movl %edx,%eax
  148. movl %ecx,%edx
  149. xorl %esi,%eax
  150. xorl %esi,%edx
  151. subl %esi,%eax
  152. sbbl %esi,%edx
  153. jmp .Lexit
  154. .Ltwo_divs:
  155. movl %eax,%ecx
  156. movl %edx,%eax
  157. xorl %edx,%edx
  158. divl %ebx
  159. movl %ecx,%eax
  160. divl %ebx
  161. movl %edx,%eax
  162. xorl %edx,%edx
  163. jmp .Lmake_sign
  164. .Lbig_divisor:
  165. subl $16,%esp
  166. movl %eax,(%esp)
  167. movl %ebx,4(%esp)
  168. movl %edx,8(%esp)
  169. movl %ecx,12(%esp)
  170. movl %ecx,%edi
  171. shrl $1,%edx
  172. rcrl $1,%eax
  173. rorl $1,%edi
  174. rcrl $1,%ebx
  175. bsrl %ecx,%ecx
  176. shrdl %cl,%edi,%ebx
  177. shrdl %cl,%edx,%eax
  178. shrl %cl,%edx
  179. roll $1,%edi
  180. divl %ebx
  181. movl (%esp),%ebx
  182. movl %eax,%ecx
  183. imull %eax,%edi
  184. mull 4(%esp)
  185. addl %edi,%edx
  186. subl %eax,%ebx
  187. movl 8(%esp),%ecx
  188. sbbl %edx,%ecx
  189. sbbl %eax,%eax
  190. movl 12(%esp),%edx
  191. andl %eax,%edx
  192. andl 4(%esp),%eax
  193. addl %ebx,%eax
  194. adcl %ecx,%edx
  195. addl $16,%esp
  196. .Lmake_sign:
  197. xorl %esi,%eax
  198. xorl %esi,%edx
  199. subl %esi,%eax
  200. sbbl %esi,%edx
  201. .Lexit:
  202. movl saveebx,%ebx
  203. movl saveesi,%esi
  204. movl saveedi,%edi
  205. end;
  206. {$define FPC_SYSTEM_HAS_DIV_QWORD}
  207. function fpc_div_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
  208. var
  209. saveebx,saveedi,saveesi : longint;
  210. asm
  211. movl %ebx,saveebx
  212. movl %esi,saveesi
  213. movl %edi,saveedi
  214. { the following piece of code is taken from the }
  215. { AMD Athlon Processor x86 Code Optimization manual }
  216. movl n+4,%ecx
  217. movl n,%ebx
  218. movl %ecx,%eax
  219. orl %ebx,%eax
  220. jnz .Lnodivzero
  221. movl %ebp,%edx
  222. movl $200,%eax
  223. call HandleErrorFrame
  224. jmp .Lexit
  225. .Lnodivzero:
  226. movl z+4,%edx
  227. movl z,%eax
  228. testl %ecx,%ecx
  229. jnz .Lqworddivbigdivisor
  230. cmpl %ebx,%edx
  231. jae .Lqworddivtwo_divs
  232. divl %ebx
  233. movl %ecx,%edx
  234. jmp .Lexit
  235. .Lqworddivtwo_divs:
  236. movl %eax,%ecx
  237. movl %edx,%eax
  238. xorl %edx,%edx
  239. divl %ebx
  240. xchgl %ecx,%eax
  241. divl %ebx
  242. movl %ecx,%edx
  243. jmp .Lexit
  244. .Lqworddivbigdivisor:
  245. movl %ecx,%edi
  246. shrl $1,%edx
  247. rcrl $1,%eax
  248. rorl $1,%edi
  249. rcrl $1,%ebx
  250. bsrl %ecx,%ecx
  251. shrdl %cl,%edi,%ebx
  252. shrdl %cl,%edx,%eax
  253. shrl %cl,%edx
  254. roll $1,%edi
  255. divl %ebx
  256. movl z,%ebx
  257. movl %eax,%esi // save quotient to esi
  258. imull %eax,%edi
  259. mull n
  260. addl %edi,%edx
  261. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  262. movl z+4,%edi // edi:ebx = dividend
  263. subl %eax,%ebx
  264. movb $0,%al
  265. sbbl %edx,%edi
  266. sbbb %cl,%al
  267. sbbl $0,%esi
  268. xorl %edx,%edx
  269. movl %esi,%eax
  270. .Lexit:
  271. movl saveebx,%ebx
  272. movl saveesi,%esi
  273. movl saveedi,%edi
  274. end;
  275. {$define FPC_SYSTEM_HAS_MOD_QWORD}
  276. function fpc_mod_qword(n,z : qword) : qword;assembler;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
  277. var
  278. saveebx,saveedi : longint;
  279. asm
  280. movl %ebx,saveebx
  281. movl %edi,saveedi
  282. { the following piece of code is taken from the }
  283. { AMD Athlon Processor x86 Code Optimization manual }
  284. movl n+4,%ecx
  285. movl n,%ebx
  286. movl %ecx,%eax
  287. orl %ebx,%eax
  288. jnz .Lnodivzero
  289. movl %ebp,%edx
  290. movl $200,%eax
  291. call HandleErrorFrame
  292. jmp .Lexit
  293. .Lnodivzero:
  294. movl z+4,%edx
  295. movl z,%eax
  296. testl %ecx,%ecx
  297. jnz .Lqwordmodr_big_divisior
  298. cmpl %ebx,%edx
  299. jae .Lqwordmodr_two_divs
  300. divl %ebx
  301. movl %edx,%eax
  302. movl %ecx,%edx
  303. jmp .Lexit
  304. .Lqwordmodr_two_divs:
  305. movl %eax,%ecx
  306. movl %edx,%eax
  307. xorl %edx,%edx
  308. divl %ebx
  309. movl %ecx,%eax
  310. divl %ebx
  311. movl %edx,%eax
  312. xorl %edx,%edx
  313. jmp .Lexit
  314. .Lqwordmodr_big_divisior:
  315. movl %ecx,%edi
  316. shrl $1,%edx
  317. rcrl $1,%eax
  318. rorl $1,%edi
  319. rcrl $1,%ebx
  320. bsrl %ecx,%ecx
  321. shrdl %cl,%edi,%ebx
  322. shrdl %cl,%edx,%eax
  323. shrl %cl,%edx
  324. roll $1,%edi
  325. divl %ebx
  326. movl z,%ebx
  327. imull %eax,%edi
  328. mull n
  329. addl %edi,%edx
  330. setcb %cl // cl:edx:eax = 65 bits quotient*divisor
  331. movl z+4,%edi
  332. subl %eax,%ebx // subtract (quotient*divisor) from dividend
  333. movb $0,%al
  334. sbbl %edx,%edi
  335. sbbb %cl,%al // if carry is set now, the quotient was off by 1,
  336. // and we need to add divisor to result
  337. movl n,%eax
  338. sbbl %edx,%edx
  339. andl %edx,%eax
  340. andl n+4,%edx
  341. addl %ebx,%eax
  342. adcl %edi,%edx
  343. .Lexit:
  344. movl saveebx,%ebx
  345. movl saveedi,%edi
  346. end;
  347. {$ifndef VER3_0}
  348. {$define FPC_SYSTEM_HAS_MUL_QWORD}
  349. function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
  350. begin
  351. { the following piece of code is taken from the
  352. AMD Athlon Processor x86 Code Optimization manual }
  353. asm
  354. movl f1+4,%edx
  355. movl f2+4,%ecx
  356. orl %ecx,%edx
  357. movl f2,%edx
  358. movl f1,%eax
  359. jnz .Lqwordmultwomul
  360. { if both upper dwords are =0 then it cannot overflow }
  361. mull %edx
  362. jmp .Lqwordmulready
  363. .Lqwordmultwomul:
  364. imul f1+4,%edx
  365. imul %eax,%ecx
  366. addl %edx,%ecx
  367. mull f2
  368. add %ecx,%edx
  369. .Lqwordmulready:
  370. movl %eax,__RESULT
  371. movl %edx,__RESULT+4
  372. .Lend:
  373. end [ 'eax','edx','ecx'];
  374. end;
  375. function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
  376. var
  377. overflowed : boolean;
  378. begin
  379. overflowed:=false;
  380. { the following piece of code is taken from the
  381. AMD Athlon Processor x86 Code Optimization manual }
  382. asm
  383. movl f1+4,%edx
  384. movl f2+4,%ecx
  385. orl %ecx,%edx
  386. movl f2,%edx
  387. movl f1,%eax
  388. jnz .Loverflowchecked
  389. { if both upper dwords are =0 then it cannot overflow }
  390. mull %edx
  391. movl %eax,__RESULT
  392. movl %edx,__RESULT+4
  393. jmp .Lend
  394. .Loverflowchecked:
  395. { if both upper dwords are <>0 then it overflows always }
  396. or %ecx,%ecx
  397. jz .Loverok1
  398. cmpl $0,f1+4
  399. jnz .Loverflowed
  400. .Loverok1:
  401. { overflow checked code }
  402. movl f1+4,%eax
  403. mull f2
  404. movl %eax,%ecx
  405. jc .Loverflowed
  406. movl f1,%eax
  407. mull f2+4
  408. jc .Loverflowed
  409. addl %eax,%ecx
  410. jc .Loverflowed
  411. movl f2,%eax
  412. mull f1
  413. addl %ecx,%edx
  414. movl %eax,__RESULT
  415. movl %edx,__RESULT+4
  416. jnc .Lend
  417. .Loverflowed:
  418. movb $1,overflowed
  419. .Lend:
  420. end [ 'eax','edx','ecx'];
  421. if overflowed then
  422. HandleErrorFrame(215,get_frame);
  423. end;
  424. {$endif VER3_0}