x86_64.inc 26 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by Florian Klaempfl and Sergei Gorelkin
  4. Members of the Free Pascal development team
  5. Processor dependent implementation for the system unit for
  6. the x86-64 architecture
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$asmmode GAS}
  14. {****************************************************************************
  15. Primitives
  16. ****************************************************************************}
  17. {$define FPC_SYSTEM_HAS_SPTR}
  18. Function Sptr : Pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  19. asm
  20. movq %rsp,%rax
  21. end;
  22. {$IFNDEF INTERNAL_BACKTRACE}
  23. {$define FPC_SYSTEM_HAS_GET_FRAME}
  24. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. asm
  26. movq %rbp,%rax
  27. end;
  28. {$ENDIF not INTERNAL_BACKTRACE}
  29. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  30. function get_pc_addr:pointer;assembler;nostackframe;
  31. asm
  32. movq (%rsp),%rax
  33. end;
  34. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  35. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;{$ifdef SYSTEMINLINE}inline;{$endif}
  36. begin
  37. get_caller_addr:=framebp;
  38. if assigned(framebp) then
  39. get_caller_addr:=PPointer(framebp)[1];
  40. end;
  41. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  42. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;{$ifdef SYSTEMINLINE}inline;{$endif}
  43. begin
  44. get_caller_frame:=framebp;
  45. if assigned(framebp) then
  46. get_caller_frame:=PPointer(framebp)^;
  47. end;
  48. // The following assembler procedures are disabled for FreeBSD due to
  49. // multiple issues with its old GNU assembler (Mantis #19188).
  50. // Even after fixing them, it can be enabled only for the trunk version,
  51. // otherwise bootstrapping won't be possible.
  52. // Modified to use oldbinutils as in cpu.pp source, to allow easier use for other targets.
  53. {$ifdef freebsd}
  54. {$ifndef overridebinutils}
  55. {$define oldbinutils}
  56. {$endif}
  57. {$endif freebsd}
  58. {$ifndef oldbinutils}
  59. {$ifndef FPC_SYSTEM_HAS_MOVE}
  60. {$define FPC_SYSTEM_HAS_MOVE}
  61. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  62. { Linux: rdi source, rsi dest, rdx count
  63. win64: rcx source, rdx dest, r8 count }
  64. asm
  65. {$ifndef win64}
  66. mov %rdx, %r8
  67. mov %rsi, %rdx
  68. mov %rdi, %rcx
  69. {$endif win64}
  70. mov %r8, %rax
  71. sub %rdx, %rcx { rcx = src - dest }
  72. jz .Lquit { exit if src=dest }
  73. jnb .L1 { src>dest => forward move }
  74. add %rcx, %rax { rcx is negative => r8+rcx > 0 if regions overlap }
  75. jb .Lback { if no overlap, still do forward move }
  76. .L1:
  77. cmp $8, %r8
  78. jl .Lless8f { signed compare, negative count not allowed }
  79. test $7, %dl
  80. je .Ldestaligned
  81. test $1, %dl { align dest by moving first 1+2+4 bytes }
  82. je .L2f
  83. mov (%rcx,%rdx,1),%al
  84. dec %r8
  85. mov %al, (%rdx)
  86. add $1, %rdx
  87. .L2f:
  88. test $2, %dl
  89. je .L4f
  90. mov (%rcx,%rdx,1),%ax
  91. sub $2, %r8
  92. mov %ax, (%rdx)
  93. add $2, %rdx
  94. .L4f:
  95. test $4, %dl
  96. je .Ldestaligned
  97. mov (%rcx,%rdx,1),%eax
  98. sub $4, %r8
  99. mov %eax, (%rdx)
  100. add $4, %rdx
  101. .Ldestaligned:
  102. mov %r8, %r9
  103. shr $5, %r9
  104. jne .Lmore32
  105. .Ltail:
  106. mov %r8, %r9
  107. shr $3, %r9
  108. je .Lless8f
  109. .balign 16
  110. .Lloop8f: { max. 8 iterations }
  111. mov (%rcx,%rdx,1),%rax
  112. mov %rax, (%rdx)
  113. add $8, %rdx
  114. dec %r9
  115. jne .Lloop8f
  116. and $7, %r8
  117. .Lless8f:
  118. test %r8, %r8
  119. jle .Lquit
  120. .balign 16
  121. .Lloop1f:
  122. mov (%rcx,%rdx,1),%al
  123. mov %al,(%rdx)
  124. inc %rdx
  125. dec %r8
  126. jne .Lloop1f
  127. .Lquit:
  128. retq
  129. .Lmore32:
  130. cmp $0x2000, %r9 { this limit must be processor-specific (1/2 L2 cache size) }
  131. jnae .Lloop32
  132. cmp $0x1000, %rcx { but don't bother bypassing cache if src and dest }
  133. jnb .Lntloopf { are close to each other}
  134. .balign 16
  135. .Lloop32:
  136. add $32,%rdx
  137. mov -32(%rcx,%rdx,1),%rax
  138. mov -24(%rcx,%rdx,1),%r10
  139. mov %rax,-32(%rdx)
  140. mov %r10,-24(%rdx)
  141. dec %r9
  142. mov -16(%rcx,%rdx,1),%rax
  143. mov -8(%rcx,%rdx,1),%r10
  144. mov %rax,-16(%rdx)
  145. mov %r10,-8(%rdx)
  146. jne .Lloop32
  147. and $0x1f, %r8
  148. jmpq .Ltail
  149. .Lntloopf:
  150. mov $32, %eax
  151. .balign 16
  152. .Lpref:
  153. prefetchnta (%rcx,%rdx,1)
  154. prefetchnta 0x40(%rcx,%rdx,1)
  155. add $0x80, %rdx
  156. dec %eax
  157. jne .Lpref
  158. sub $0x1000, %rdx
  159. mov $64, %eax
  160. .balign 16
  161. .Loop64:
  162. add $64, %rdx
  163. mov -64(%rcx,%rdx,1), %r9
  164. mov -56(%rcx,%rdx,1), %r10
  165. movnti %r9, -64(%rdx)
  166. movnti %r10, -56(%rdx)
  167. mov -48(%rcx,%rdx,1), %r9
  168. mov -40(%rcx,%rdx,1), %r10
  169. movnti %r9, -48(%rdx)
  170. movnti %r10, -40(%rdx)
  171. dec %eax
  172. mov -32(%rcx,%rdx,1), %r9
  173. mov -24(%rcx,%rdx,1), %r10
  174. movnti %r9, -32(%rdx)
  175. movnti %r10, -24(%rdx)
  176. mov -16(%rcx,%rdx,1), %r9
  177. mov -8(%rcx,%rdx,1), %r10
  178. movnti %r9, -16(%rdx)
  179. movnti %r10, -8(%rdx)
  180. jne .Loop64
  181. sub $0x1000, %r8
  182. cmp $0x1000, %r8
  183. jae .Lntloopf
  184. mfence
  185. jmpq .Ldestaligned { go handle remaining bytes }
  186. { backwards move }
  187. .Lback:
  188. add %r8, %rdx { points to the end of dest }
  189. cmp $8, %r8
  190. jl .Lless8b { signed compare, negative count not allowed }
  191. test $7, %dl
  192. je .Ldestalignedb
  193. test $1, %dl
  194. je .L2b
  195. dec %rdx
  196. mov (%rcx,%rdx,1), %al
  197. dec %r8
  198. mov %al, (%rdx)
  199. .L2b:
  200. test $2, %dl
  201. je .L4b
  202. sub $2, %rdx
  203. mov (%rcx,%rdx,1), %ax
  204. sub $2, %r8
  205. mov %ax, (%rdx)
  206. .L4b:
  207. test $4, %dl
  208. je .Ldestalignedb
  209. sub $4, %rdx
  210. mov (%rcx,%rdx,1), %eax
  211. sub $4, %r8
  212. mov %eax, (%rdx)
  213. .Ldestalignedb:
  214. mov %r8, %r9
  215. shr $5, %r9
  216. jne .Lmore32b
  217. .Ltailb:
  218. mov %r8, %r9
  219. shr $3, %r9
  220. je .Lless8b
  221. .Lloop8b:
  222. sub $8, %rdx
  223. mov (%rcx,%rdx,1), %rax
  224. dec %r9
  225. mov %rax, (%rdx)
  226. jne .Lloop8b
  227. and $7, %r8
  228. .Lless8b:
  229. test %r8, %r8
  230. jle .Lquit2
  231. .balign 16
  232. .Lsmallb:
  233. dec %rdx
  234. mov (%rcx,%rdx,1), %al
  235. dec %r8
  236. mov %al,(%rdx)
  237. jnz .Lsmallb
  238. .Lquit2:
  239. retq
  240. .Lmore32b:
  241. cmp $0x2000, %r9
  242. jnae .Lloop32b
  243. cmp $0xfffffffffffff000,%rcx
  244. jb .Lntloopb
  245. .balign 16
  246. .Lloop32b:
  247. sub $32, %rdx
  248. mov 24(%rcx,%rdx,1), %rax
  249. mov 16(%rcx,%rdx,1), %r10
  250. mov %rax, 24(%rdx)
  251. mov %r10, 16(%rdx)
  252. dec %r9
  253. mov 8(%rcx,%rdx,1),%rax
  254. mov (%rcx,%rdx,1), %r10
  255. mov %rax, 8(%rdx)
  256. mov %r10, (%rdx)
  257. jne .Lloop32b
  258. and $0x1f, %r8
  259. jmpq .Ltailb
  260. .Lntloopb:
  261. mov $32, %eax
  262. .balign 16
  263. .Lprefb:
  264. sub $0x80, %rdx
  265. prefetchnta (%rcx,%rdx,1)
  266. prefetchnta 0x40(%rcx,%rdx,1)
  267. dec %eax
  268. jnz .Lprefb
  269. add $0x1000, %rdx
  270. mov $0x40, %eax
  271. .balign 16
  272. .Lloop64b:
  273. sub $64, %rdx
  274. mov 56(%rcx,%rdx,1), %r9
  275. mov 48(%rcx,%rdx,1), %r10
  276. movnti %r9, 56(%rdx)
  277. movnti %r10, 48(%rdx)
  278. mov 40(%rcx,%rdx,1), %r9
  279. mov 32(%rcx,%rdx,1), %r10
  280. movnti %r9, 40(%rdx)
  281. movnti %r10, 32(%rdx)
  282. dec %eax
  283. mov 24(%rcx,%rdx,1), %r9
  284. mov 16(%rcx,%rdx,1), %r10
  285. movnti %r9, 24(%rdx)
  286. movnti %r10, 16(%rdx)
  287. mov 8(%rcx,%rdx,1), %r9
  288. mov (%rcx,%rdx,1), %r10
  289. movnti %r9, 8(%rdx)
  290. movnti %r10, (%rdx)
  291. jne .Lloop64b
  292. sub $0x1000, %r8
  293. cmp $0x1000, %r8
  294. jae .Lntloopb
  295. mfence
  296. jmpq .Ldestalignedb
  297. end;
  298. {$endif FPC_SYSTEM_HAS_MOVE}
  299. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  300. {$define FPC_SYSTEM_HAS_FILLCHAR}
  301. Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
  302. asm
  303. { win64: rcx dest, rdx count, r8b value
  304. linux: rdi dest, rsi count, rdx value }
  305. {$ifndef win64}
  306. mov %rdx, %r8
  307. mov %rsi, %rdx
  308. mov %rdi, %rcx
  309. {$endif win64}
  310. cmp $8, %rdx
  311. jl .Ltiny
  312. // TODO: movz?q and movs?q are not accepted by FPC asmreader, it needs fixing.
  313. // `movzbl' instead is accepted and generates correct code with internal assembler,
  314. // but breaks targets using external GAS (Mantis #19188).
  315. // So use a different instruction for now.
  316. { expand byte value }
  317. andq $0xff, %r8
  318. {
  319. movzbq %r8b, %r8
  320. }
  321. mov $0x0101010101010101,%r9
  322. imul %r9, %r8
  323. test $7, %cl
  324. je .Laligned
  325. { align dest to 8 bytes }
  326. test $1, %cl
  327. je .L2
  328. movb %r8b, (%rcx)
  329. add $1, %rcx
  330. sub $1, %rdx
  331. .L2:
  332. test $2, %cl
  333. je .L4
  334. movw %r8w, (%rcx)
  335. add $2, %rcx
  336. sub $2, %rdx
  337. .L4:
  338. test $4, %cl
  339. je .Laligned
  340. movl %r8d, (%rcx)
  341. add $4, %rcx
  342. sub $4, %rdx
  343. .Laligned:
  344. mov %rdx, %rax
  345. and $0x3f, %rdx
  346. shr $6, %rax
  347. jne .Lmore64
  348. .Lless64:
  349. mov %rdx, %rax
  350. and $7, %rdx
  351. shr $3, %rax
  352. je .Ltiny
  353. .balign 16
  354. .Lloop8: { max. 8 iterations }
  355. mov %r8, (%rcx)
  356. add $8, %rcx
  357. dec %rax
  358. jne .Lloop8
  359. .Ltiny:
  360. test %rdx, %rdx
  361. jle .Lquit
  362. .Lloop1:
  363. movb %r8b, (%rcx)
  364. inc %rcx
  365. dec %rdx
  366. jnz .Lloop1
  367. .Lquit:
  368. retq
  369. .Lmore64:
  370. cmp $0x2000,%rax
  371. jae .Lloop64nti
  372. .balign 16
  373. .Lloop64:
  374. add $64, %rcx
  375. mov %r8, -64(%rcx)
  376. mov %r8, -56(%rcx)
  377. mov %r8, -48(%rcx)
  378. mov %r8, -40(%rcx)
  379. dec %rax
  380. mov %r8, -32(%rcx)
  381. mov %r8, -24(%rcx)
  382. mov %r8, -16(%rcx)
  383. mov %r8, -8(%rcx)
  384. jne .Lloop64
  385. jmp .Lless64
  386. .balign 16
  387. .Lloop64nti:
  388. add $64, %rcx
  389. movnti %r8, -64(%rcx)
  390. movnti %r8, -56(%rcx)
  391. movnti %r8, -48(%rcx)
  392. movnti %r8, -40(%rcx)
  393. dec %rax
  394. movnti %r8, -32(%rcx)
  395. movnti %r8, -24(%rcx)
  396. movnti %r8, -16(%rcx)
  397. movnti %r8, -8(%rcx)
  398. jnz .Lloop64nti
  399. mfence
  400. jmp .Lless64
  401. end;
  402. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  403. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  404. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  405. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  406. { win64: rcx buf, rdx len, r8b word
  407. linux: rdi buf, rsi len, rdx word }
  408. asm
  409. {$ifdef win64}
  410. movd %r8d, %xmm1
  411. {$else}
  412. movd %edx, %xmm1
  413. movq %rdi, %rcx
  414. movq %rsi, %rdx
  415. {$endif}
  416. mov %rcx, %r8
  417. punpcklbw %xmm1, %xmm1
  418. and $-0x10, %rcx { highest aligned address before buf }
  419. test %rdx, %rdx
  420. punpcklbw %xmm1, %xmm1
  421. jz .Lnotfound { exit if len=0 }
  422. add $16, %rcx { first aligned address after buf }
  423. pshufd $0, %xmm1, %xmm1
  424. movdqa -16(%rcx), %xmm0 { Fetch first 16 bytes (up to 15 bytes before target) }
  425. sub %r8, %rcx { rcx=number of valid bytes, r8=original ptr }
  426. pcmpeqb %xmm1, %xmm0 { compare with pattern and get bitmask }
  427. pmovmskb %xmm0, %eax
  428. shl %cl, %eax { shift valid bits into high word }
  429. and $0xffff0000, %eax { clear low word containing invalid bits }
  430. shr %cl, %eax { shift back }
  431. jmp .Lcontinue
  432. .balign 16
  433. .Lloop:
  434. movdqa (%r8,%rcx), %xmm0 { r8 and rcx may have any values, }
  435. add $16, %rcx { but their sum is evenly divisible by 16. }
  436. pcmpeqb %xmm1, %xmm0
  437. pmovmskb %xmm0, %eax
  438. .Lcontinue:
  439. test %eax, %eax
  440. jnz .Lmatch
  441. cmp %rcx, %rdx
  442. ja .Lloop
  443. .Lnotfound:
  444. or $-1, %rax
  445. retq
  446. .Lmatch:
  447. bsf %eax, %eax
  448. lea -16(%rcx,%rax), %rax
  449. cmp %rax, %rdx { check against the buffer length }
  450. jbe .Lnotfound
  451. end;
  452. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  453. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  454. {$define FPC_SYSTEM_HAS_INDEXWORD}
  455. function IndexWord(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  456. { win64: rcx buf, rdx len, r8b word
  457. linux: rdi buf, rsi len, rdx word }
  458. asm
  459. {$ifdef win64}
  460. movd %r8d, %xmm1
  461. {$else}
  462. movd %edx, %xmm1
  463. movq %rdi, %rcx
  464. movq %rsi, %rdx
  465. {$endif}
  466. mov %rcx, %r8
  467. punpcklwd %xmm1, %xmm1
  468. and $-0x10, %rcx
  469. test %rdx, %rdx
  470. pshufd $0, %xmm1, %xmm1
  471. jz .Lnotfound { exit if len=0 }
  472. add $16, %rcx
  473. movdqa -16(%rcx), %xmm0 { Fetch first 16 bytes (up to 14 bytes before target) }
  474. sub %r8, %rcx { rcx=number of valid bytes }
  475. test $1, %r8b { if buffer isn't aligned to word boundary, }
  476. jnz .Lunaligned { use a different algorithm }
  477. pcmpeqw %xmm1, %xmm0
  478. pmovmskb %xmm0, %eax
  479. shl %cl, %eax
  480. and $0xffff0000, %eax
  481. shr %cl, %eax
  482. shr $1, %ecx { bytes->words }
  483. jmp .Lcontinue
  484. .balign 16
  485. .Lloop:
  486. movdqa (%r8,%rcx,2), %xmm0
  487. add $8, %rcx
  488. pcmpeqw %xmm1, %xmm0
  489. pmovmskb %xmm0, %eax
  490. .Lcontinue:
  491. test %eax, %eax
  492. jnz .Lmatch
  493. cmp %rcx, %rdx
  494. ja .Lloop
  495. .Lnotfound:
  496. or $-1, %rax
  497. retq
  498. .Lmatch:
  499. bsf %eax, %eax
  500. shr $1, %eax { in words }
  501. lea -8(%rcx,%rax), %rax
  502. cmp %rax, %rdx
  503. jbe .Lnotfound { if match is after the specified length, ignore it }
  504. retq
  505. .Lunaligned:
  506. movdqa %xmm1, %xmm2 { (mis)align the pattern (in this particular case: }
  507. psllw $8, %xmm1 { swap bytes of each word of pattern) }
  508. psrlw $8, %xmm2
  509. por %xmm2, %xmm1
  510. pcmpeqb %xmm1, %xmm0
  511. pmovmskb %xmm0, %eax
  512. shl %cl, %eax
  513. and $0xffff0000, %eax
  514. shr %cl, %eax
  515. add %rdx, %rdx { length words -> bytes }
  516. xor %r10d, %r10d { nothing to merge yet }
  517. jmp .Lcontinue_u
  518. .balign 16
  519. .Lloop_u:
  520. movdqa (%r8,%rcx), %xmm0
  521. add $16, %rcx
  522. pcmpeqb %xmm1, %xmm0 { compare by bytes }
  523. shr $16, %r10d { bit 16 shifts into 0 }
  524. pmovmskb %xmm0, %eax
  525. .Lcontinue_u:
  526. shl $1, %eax { 15:0 -> 16:1 }
  527. or %r10d, %eax { merge bit 0 from previous round }
  528. mov %eax, %r10d
  529. shr $1, %eax { now AND together adjacent pairs of bits }
  530. and %r10d, %eax
  531. and $0x5555, %eax { also reset odd bits }
  532. jnz .Lmatch_u
  533. cmpq %rcx, %rdx
  534. ja .Lloop_u
  535. .Lnotfound_u:
  536. or $-1, %rax
  537. retq
  538. .Lmatch_u:
  539. bsf %eax, %eax
  540. lea -16(%rcx,%rax), %rax
  541. cmp %rax, %rdx
  542. jbe .Lnotfound_u { if match is after the specified length, ignore it }
  543. sar $1, %rax { in words }
  544. end;
  545. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  546. {$endif freebsd}
  547. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  548. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  549. function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  550. { win64: rcx buf, rdx buf, r8 len
  551. linux: rdi buf, rsi buf, rdx len }
  552. asm
  553. {$ifndef win64}
  554. mov %rdx, %r8
  555. mov %rsi, %rdx
  556. mov %rdi, %rcx
  557. {$endif win64}
  558. negq %r8
  559. jz .LCmpbyteZero
  560. subq %r8, %rcx
  561. subq %r8, %rdx
  562. .balign 16
  563. .LCmpbyteLoop:
  564. {$ifdef oldbinutils}
  565. // for the reason why this alternate coding of movzbl is given here
  566. // see the comments in FillChar above
  567. .byte 0x42,0x0F,0xB6,0x04,0x01
  568. {$else}
  569. movzbl (%rcx,%r8), %eax
  570. {$endif} cmpb (%rdx,%r8), %al
  571. jne .LCmpbyteExitFast
  572. incq %r8
  573. jne .LCmpbyteLoop
  574. .LCmpbyteZero:
  575. xorl %eax, %eax
  576. retq
  577. .LCmpbyteExitFast:
  578. {$ifdef oldbinutils}
  579. .byte 0x42,0x0F,0xB6,0x0C,0x02
  580. {$else}
  581. movzbl (%rdx,%r8), %ecx { Compare last position }
  582. {$endif}
  583. subq %rcx, %rax
  584. end;
  585. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  586. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  587. { does a thread save inc/dec }
  588. function declocked(var l : longint) : boolean;assembler; nostackframe;
  589. asm
  590. { this check should be done because a lock takes a lot }
  591. { of time! }
  592. {$ifdef FPC_PIC}
  593. movq IsMultithread@GOTPCREL(%rip),%rax
  594. cmpl $0,(%rax)
  595. {$else FPC_PIC}
  596. cmpl $0,IsMultithread(%rip)
  597. {$endif FPC_PIC}
  598. {$ifndef win64}
  599. mov %rdi, %rcx
  600. {$endif win64}
  601. jz .Ldeclockednolock
  602. lock
  603. decl (%rcx)
  604. jmp .Ldeclockedend
  605. .Ldeclockednolock:
  606. decl (%rcx)
  607. .Ldeclockedend:
  608. setzb %al
  609. end;
  610. {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
  611. function declocked(var l : int64) : boolean;assembler; nostackframe;
  612. asm
  613. { this check should be done because a lock takes a lot }
  614. { of time! }
  615. {$ifdef FPC_PIC}
  616. movq IsMultithread@GOTPCREL(%rip),%rax
  617. cmpl $0,(%rax)
  618. {$else FPC_PIC}
  619. cmpl $0,IsMultithread(%rip)
  620. {$endif FPC_PIC}
  621. {$ifndef win64}
  622. mov %rdi, %rcx
  623. {$endif win64}
  624. jz .Ldeclockednolock
  625. lock
  626. decq (%rcx)
  627. jmp .Ldeclockedend
  628. .Ldeclockednolock:
  629. decq (%rcx)
  630. .Ldeclockedend:
  631. setzb %al
  632. end;
  633. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  634. procedure inclocked(var l : longint);assembler; nostackframe;
  635. asm
  636. { this check should be done because a lock takes a lot }
  637. { of time! }
  638. {$ifdef FPC_PIC}
  639. movq IsMultithread@GOTPCREL(%rip),%rax
  640. cmpl $0,(%rax)
  641. {$else FPC_PIC}
  642. cmpl $0,IsMultithread(%rip)
  643. {$endif FPC_PIC}
  644. {$ifndef win64}
  645. mov %rdi, %rcx
  646. {$endif win64}
  647. jz .Linclockednolock
  648. lock
  649. incl (%rcx)
  650. jmp .Linclockedend
  651. .Linclockednolock:
  652. incl (%rcx)
  653. .Linclockedend:
  654. end;
  655. {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
  656. procedure inclocked(var l : int64);assembler; nostackframe;
  657. asm
  658. { this check should be done because a lock takes a lot }
  659. { of time! }
  660. {$ifdef FPC_PIC}
  661. movq IsMultithread@GOTPCREL(%rip),%rax
  662. cmpl $0,(%rax)
  663. {$else FPC_PIC}
  664. cmpl $0,IsMultithread(%rip)
  665. {$endif FPC_PIC}
  666. {$ifndef win64}
  667. mov %rdi, %rcx
  668. {$endif win64}
  669. jz .Linclockednolock
  670. lock
  671. incq (%rcx)
  672. jmp .Linclockedend
  673. .Linclockednolock:
  674. incq (%rcx)
  675. .Linclockedend:
  676. end;
  677. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  678. asm
  679. {$ifdef win64}
  680. movq %rcx,%rax
  681. {$else win64}
  682. movq %rdi,%rax
  683. {$endif win64}
  684. movl $-1,%edx
  685. xchgq %rdx,%rax
  686. lock
  687. xaddl %eax, (%rdx)
  688. decl %eax
  689. end;
  690. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  691. asm
  692. {$ifdef win64}
  693. movq %rcx,%rax
  694. {$else win64}
  695. movq %rdi,%rax
  696. {$endif win64}
  697. movl $1,%edx
  698. xchgq %rdx,%rax
  699. lock
  700. xaddl %eax, (%rdx)
  701. incl %eax
  702. end;
  703. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  704. asm
  705. {$ifdef win64}
  706. xchgl (%rcx),%edx
  707. movl %edx,%eax
  708. {$else win64}
  709. xchgl (%rdi),%esi
  710. movl %esi,%eax
  711. {$endif win64}
  712. end;
  713. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  714. asm
  715. {$ifdef win64}
  716. xchgq %rcx,%rdx
  717. lock
  718. xaddl %ecx, (%rdx)
  719. movl %ecx,%eax
  720. {$else win64}
  721. xchgq %rdi,%rsi
  722. lock
  723. xaddl %edi, (%rsi)
  724. movl %edi,%eax
  725. {$endif win64}
  726. end;
  727. function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler; nostackframe;
  728. asm
  729. {$ifdef win64}
  730. movl %r8d,%eax
  731. lock
  732. cmpxchgl %edx,(%rcx)
  733. {$else win64}
  734. movl %edx,%eax
  735. lock
  736. cmpxchgl %esi,(%rdi)
  737. {$endif win64}
  738. end;
  739. function InterLockedDecrement64 (var Target: int64) : int64; assembler; nostackframe;
  740. asm
  741. {$ifdef win64}
  742. movq %rcx,%rax
  743. {$else win64}
  744. movq %rdi,%rax
  745. {$endif win64}
  746. movq $-1,%rdx
  747. xchgq %rdx,%rax
  748. lock
  749. xaddq %rax, (%rdx)
  750. decq %rax
  751. end;
  752. function InterLockedIncrement64 (var Target: int64) : int64; assembler; nostackframe;
  753. asm
  754. {$ifdef win64}
  755. movq %rcx,%rax
  756. {$else win64}
  757. movq %rdi,%rax
  758. {$endif win64}
  759. movq $1,%rdx
  760. xchgq %rdx,%rax
  761. lock
  762. xaddq %rax, (%rdx)
  763. incq %rax
  764. end;
  765. function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler; nostackframe;
  766. asm
  767. {$ifdef win64}
  768. xchgq (%rcx),%rdx
  769. movq %rdx,%rax
  770. {$else win64}
  771. xchgq (%rdi),%rsi
  772. movq %rsi,%rax
  773. {$endif win64}
  774. end;
  775. function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler; nostackframe;
  776. asm
  777. {$ifdef win64}
  778. xchgq %rcx,%rdx
  779. lock
  780. xaddq %rcx, (%rdx)
  781. movq %rcx,%rax
  782. {$else win64}
  783. xchgq %rdi,%rsi
  784. lock
  785. xaddq %rdi, (%rsi)
  786. movq %rdi,%rax
  787. {$endif win64}
  788. end;
  789. function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler; nostackframe;
  790. asm
  791. {$ifdef win64}
  792. movq %r8,%rax
  793. lock
  794. cmpxchgq %rdx,(%rcx)
  795. {$else win64}
  796. movq %rdx,%rax
  797. lock
  798. cmpxchgq %rsi,(%rdi)
  799. {$endif win64}
  800. end;
  801. {****************************************************************************
  802. FPU
  803. ****************************************************************************}
  804. const
  805. { Internal constants for use in system unit }
  806. FPU_Invalid = 1;
  807. FPU_Denormal = 2;
  808. FPU_DivisionByZero = 4;
  809. FPU_Overflow = 8;
  810. FPU_Underflow = $10;
  811. FPU_StackUnderflow = $20;
  812. FPU_StackOverflow = $40;
  813. FPU_ExceptionMask = $ff;
  814. MM_Invalid = 1;
  815. MM_Denormal = 2;
  816. MM_DivisionByZero = 4;
  817. MM_Overflow = 8;
  818. MM_Underflow = $10;
  819. MM_Precicion = $20;
  820. MM_ExceptionMask = $3f;
  821. MM_MaskInvalidOp = %0000000010000000;
  822. MM_MaskDenorm = %0000000100000000;
  823. MM_MaskDivZero = %0000001000000000;
  824. MM_MaskOverflow = %0000010000000000;
  825. MM_MaskUnderflow = %0000100000000000;
  826. MM_MaskPrecision = %0001000000000000;
  827. procedure fpc_cpuinit;
  828. begin
  829. { don't let libraries influence the FPU cw set by the host program }
  830. if IsLibrary then
  831. begin
  832. Default8087CW:=Get8087CW;
  833. DefaultMXCSR:=GetMXCSR;
  834. end;
  835. SysResetFPU;
  836. end;
  837. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  838. Procedure SysInitFPU;
  839. begin
  840. end;
  841. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  842. Procedure SysResetFPU;
  843. var
  844. { these locals are so we don't have to hack pic code in the assembler }
  845. localmxcsr: dword;
  846. localfpucw: word;
  847. begin
  848. localfpucw:=Default8087CW;
  849. localmxcsr:=DefaultMXCSR;
  850. asm
  851. fninit
  852. fwait
  853. fldcw localfpucw
  854. ldmxcsr localmxcsr
  855. end;
  856. end;
  857. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  858. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  859. procedure ReadBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  860. asm
  861. lfence
  862. end;
  863. procedure ReadDependencyBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  864. asm
  865. { reads imply barrier on earlier reads depended on }
  866. end;
  867. procedure ReadWriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  868. asm
  869. mfence
  870. end;
  871. procedure WriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  872. asm
  873. sfence
  874. end;
  875. {$endif}
  876. {****************************************************************************
  877. Math Routines
  878. ****************************************************************************}
  879. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  880. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  881. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  882. begin
  883. { the extra Word type cast is necessary because the "AValue shr 8" }
  884. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  885. { the sign bits from the upper 16 bits are shifted in rather than }
  886. { zeroes. }
  887. Result := SmallInt(((Word(AValue) shr 8) or (Word(AValue) shl 8)) and $ffff);
  888. end;
  889. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  890. begin
  891. Result := ((AValue shr 8) or (AValue shl 8)) and $ffff;
  892. end;
  893. function SwapEndian(const AValue: LongInt): LongInt; assembler; nostackframe;
  894. asm
  895. {$ifdef win64}
  896. movl %ecx, %eax
  897. {$else win64}
  898. movl %edi, %eax
  899. {$endif win64}
  900. bswap %eax
  901. end;
  902. function SwapEndian(const AValue: DWord): DWord; assembler; nostackframe;
  903. asm
  904. {$ifdef win64}
  905. movl %ecx, %eax
  906. {$else win64}
  907. movl %edi, %eax
  908. {$endif win64}
  909. bswap %eax
  910. end;
  911. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  912. asm
  913. {$ifdef win64}
  914. movq %rcx, %rax
  915. {$else win64}
  916. movq %rdi, %rax
  917. {$endif win64}
  918. bswap %rax
  919. end;
  920. function SwapEndian(const AValue: QWord): QWord; assembler; nostackframe;
  921. asm
  922. {$ifdef win64}
  923. movq %rcx, %rax
  924. {$else win64}
  925. movq %rdi, %rax
  926. {$endif win64}
  927. bswap %rax
  928. end;
  929. {$ifndef win64}
  930. {$define FPC_SYSTEM_HAS_U128_DIV_U64_TO_U64}
  931. function u128_div_u64_to_u64( const xh, xl: qword; const y: qword; out quotient, remainder: qword ): boolean;nostackframe;assembler;
  932. {
  933. SysV:
  934. xh: RDI
  935. xl: RSI
  936. y: RDX
  937. quotient: RCX
  938. remainder: R8
  939. }
  940. label
  941. dodiv;
  942. asm
  943. cmpq %rdi,%rdx
  944. ja dodiv
  945. xorl %eax,%eax
  946. ret
  947. dodiv:
  948. movq %rdx,%r9
  949. movq %rsi,%rax
  950. movq %rdi,%rdx
  951. divq %r9
  952. movq %rax,(%rcx)
  953. movq %rdx,(%r8)
  954. movl $1,%eax
  955. end;
  956. {$endif win64}