x86_64.inc 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by Florian Klaempfl.
  4. Member of the Free Pascal development team
  5. Parts of this code are derived from the x86-64 linux port
  6. Copyright 2002 Andi Kleen
  7. Processor dependent implementation for the system unit for
  8. the x86-64 architecture
  9. See the file COPYING.FPC, included in this distribution,
  10. for details about the copyright.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. **********************************************************************}
  15. {$asmmode GAS}
  16. {****************************************************************************
  17. Primitives
  18. ****************************************************************************}
  19. {$define FPC_SYSTEM_HAS_SPTR}
  20. Function Sptr : Pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  21. asm
  22. movq %rsp,%rax
  23. end ['RAX'];
  24. {$IFNDEF INTERNAL_BACKTRACE}
  25. {$define FPC_SYSTEM_HAS_GET_FRAME}
  26. function get_frame:pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  27. asm
  28. movq %rbp,%rax
  29. end ['RAX'];
  30. {$ENDIF not INTERNAL_BACKTRACE}
  31. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  32. function get_caller_addr(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  33. asm
  34. {$ifdef win64}
  35. orq %rcx,%rcx
  36. jz .Lg_a_null
  37. movq 8(%rcx),%rax
  38. {$else win64}
  39. { %rdi = framebp }
  40. orq %rdi,%rdi
  41. jz .Lg_a_null
  42. movq 8(%rdi),%rax
  43. {$endif win64}
  44. .Lg_a_null:
  45. end ['RAX'];
  46. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  47. function get_caller_frame(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  48. asm
  49. {$ifdef win64}
  50. orq %rcx,%rcx
  51. jz .Lg_a_null
  52. movq (%rcx),%rax
  53. {$else win64}
  54. { %rdi = framebp }
  55. orq %rdi,%rdi
  56. jz .Lg_a_null
  57. movq (%rdi),%rax
  58. {$endif win64}
  59. .Lg_a_null:
  60. end ['RAX'];
  61. {$ifndef FPC_SYSTEM_HAS_MOVE}
  62. {$define FPC_SYSTEM_HAS_MOVE}
  63. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  64. { Linux: rdi source, rsi dest, rdx count
  65. win64: rcx source, rdx dest, r8 count }
  66. asm
  67. {$ifndef win64}
  68. mov %rdx, %r8
  69. mov %rsi, %rdx
  70. mov %rdi, %rcx
  71. {$endif win64}
  72. mov %r8, %rax
  73. sub %rdx, %rcx { rcx = src - dest }
  74. jz .Lquit { exit if src=dest }
  75. jnb .L1 { src>dest => forward move }
  76. add %rcx, %rax { rcx is negative => r8+rcx > 0 if regions overlap }
  77. jb .Lback { if no overlap, still do forward move }
  78. .L1:
  79. cmp $8, %r8
  80. jl .Lless8f { signed compare, negative count not allowed }
  81. test $7, %dl
  82. je .Ldestaligned
  83. test $1, %dl { align dest by moving first 1+2+4 bytes }
  84. je .L2f
  85. mov (%rcx,%rdx,1),%al
  86. dec %r8
  87. mov %al, (%rdx)
  88. add $1, %rdx
  89. .L2f:
  90. test $2, %dl
  91. je .L4f
  92. mov (%rcx,%rdx,1),%ax
  93. sub $2, %r8
  94. mov %ax, (%rdx)
  95. add $2, %rdx
  96. .L4f:
  97. test $4, %dl
  98. je .Ldestaligned
  99. mov (%rcx,%rdx,1),%eax
  100. sub $4, %r8
  101. mov %eax, (%rdx)
  102. add $4, %rdx
  103. .Ldestaligned:
  104. mov %r8, %r9
  105. shr $5, %r9
  106. jne .Lmore32
  107. .Ltail:
  108. mov %r8, %r9
  109. shr $3, %r9
  110. je .Lless8f
  111. .balign 16
  112. .Lloop8f: { max. 8 iterations }
  113. mov (%rcx,%rdx,1),%rax
  114. mov %rax, (%rdx)
  115. add $8, %rdx
  116. dec %r9
  117. jne .Lloop8f
  118. and $7, %r8
  119. .Lless8f:
  120. test %r8, %r8
  121. jle .Lquit
  122. .balign 16
  123. .Lloop1f:
  124. mov (%rcx,%rdx,1),%al
  125. mov %al,(%rdx)
  126. inc %rdx
  127. dec %r8
  128. jne .Lloop1f
  129. .Lquit:
  130. retq
  131. .Lmore32:
  132. cmp $0x2000, %r9 { this limit must be processor-specific (1/2 L2 cache size) }
  133. jnae .Lloop32
  134. cmp $0x1000, %rcx { but don't bother bypassing cache if src and dest }
  135. jnb .Lntloopf { are close to each other}
  136. .balign 16
  137. .Lloop32:
  138. add $32,%rdx
  139. mov -32(%rcx,%rdx,1),%rax
  140. mov -24(%rcx,%rdx,1),%r10
  141. mov %rax,-32(%rdx)
  142. mov %r10,-24(%rdx)
  143. dec %r9
  144. mov -16(%rcx,%rdx,1),%rax
  145. mov -8(%rcx,%rdx,1),%r10
  146. mov %rax,-16(%rdx)
  147. mov %r10,-8(%rdx)
  148. jne .Lloop32
  149. and $0x1f, %r8
  150. jmpq .Ltail
  151. .Lntloopf:
  152. mov $32, %eax
  153. .balign 16
  154. .Lpref:
  155. prefetchnta (%rcx,%rdx,1)
  156. prefetchnta 0x40(%rcx,%rdx,1)
  157. add $0x80, %rdx
  158. dec %eax
  159. jne .Lpref
  160. sub $0x1000, %rdx
  161. mov $64, %eax
  162. .balign 16
  163. .Loop64:
  164. add $64, %rdx
  165. mov -64(%rcx,%rdx,1), %r9
  166. mov -56(%rcx,%rdx,1), %r10
  167. movnti %r9, -64(%rdx)
  168. movnti %r10, -56(%rdx)
  169. mov -48(%rcx,%rdx,1), %r9
  170. mov -40(%rcx,%rdx,1), %r10
  171. movnti %r9, -48(%rdx)
  172. movnti %r10, -40(%rdx)
  173. dec %eax
  174. mov -32(%rcx,%rdx,1), %r9
  175. mov -24(%rcx,%rdx,1), %r10
  176. movnti %r9, -32(%rdx)
  177. movnti %r10, -24(%rdx)
  178. mov -16(%rcx,%rdx,1), %r9
  179. mov -8(%rcx,%rdx,1), %r10
  180. movnti %r9, -16(%rdx)
  181. movnti %r10, -8(%rdx)
  182. jne .Loop64
  183. sub $0x1000, %r8
  184. cmp $0x1000, %r8
  185. jae .Lntloopf
  186. mfence
  187. jmpq .Ldestaligned { go handle remaining bytes }
  188. { backwards move }
  189. .Lback:
  190. add %r8, %rdx { points to the end of dest }
  191. cmp $8, %r8
  192. jl .Lless8b { signed compare, negative count not allowed }
  193. test $7, %dl
  194. je .Ldestalignedb
  195. test $1, %dl
  196. je .L2b
  197. dec %rdx
  198. mov (%rcx,%rdx,1), %al
  199. dec %r8
  200. mov %al, (%rdx)
  201. .L2b:
  202. test $2, %dl
  203. je .L4b
  204. sub $2, %rdx
  205. mov (%rcx,%rdx,1), %ax
  206. sub $2, %r8
  207. mov %ax, (%rdx)
  208. .L4b:
  209. test $4, %dl
  210. je .Ldestalignedb
  211. sub $4, %rdx
  212. mov (%rcx,%rdx,1), %eax
  213. sub $4, %r8
  214. mov %eax, (%rdx)
  215. .Ldestalignedb:
  216. mov %r8, %r9
  217. shr $5, %r9
  218. jne .Lmore32b
  219. .Ltailb:
  220. mov %r8, %r9
  221. shr $3, %r9
  222. je .Lless8b
  223. .Lloop8b:
  224. sub $8, %rdx
  225. mov (%rcx,%rdx,1), %rax
  226. dec %r9
  227. mov %rax, (%rdx)
  228. jne .Lloop8b
  229. and $7, %r8
  230. .Lless8b:
  231. test %r8, %r8
  232. jle .Lquit2
  233. .balign 16
  234. .Lsmallb:
  235. dec %rdx
  236. mov (%rcx,%rdx,1), %al
  237. dec %r8
  238. mov %al,(%rdx)
  239. jnz .Lsmallb
  240. .Lquit2:
  241. retq
  242. .Lmore32b:
  243. cmp $0x2000, %r9
  244. jnae .Lloop32b
  245. cmp $0xfffffffffffff000,%rcx
  246. jb .Lntloopb
  247. .balign 16
  248. .Lloop32b:
  249. sub $32, %rdx
  250. mov 24(%rcx,%rdx,1), %rax
  251. mov 16(%rcx,%rdx,1), %r10
  252. mov %rax, 24(%rdx)
  253. mov %r10, 16(%rdx)
  254. dec %r9
  255. mov 8(%rcx,%rdx,1),%rax
  256. mov (%rcx,%rdx,1), %r10
  257. mov %rax, 8(%rdx)
  258. mov %r10, (%rdx)
  259. jne .Lloop32b
  260. and $0x1f, %r8
  261. jmpq .Ltailb
  262. .Lntloopb:
  263. mov $32, %eax
  264. .balign 16
  265. .Lprefb:
  266. sub $0x80, %rdx
  267. prefetchnta (%rcx,%rdx,1)
  268. prefetchnta 0x40(%rcx,%rdx,1)
  269. dec %eax
  270. jnz .Lprefb
  271. add $0x1000, %rdx
  272. mov $0x40, %eax
  273. .balign 16
  274. .Lloop64b:
  275. sub $64, %rdx
  276. mov 56(%rcx,%rdx,1), %r9
  277. mov 48(%rcx,%rdx,1), %r10
  278. movnti %r9, 56(%rdx)
  279. movnti %r10, 48(%rdx)
  280. mov 40(%rcx,%rdx,1), %r9
  281. mov 32(%rcx,%rdx,1), %r10
  282. movnti %r9, 40(%rdx)
  283. movnti %r10, 32(%rdx)
  284. dec %eax
  285. mov 24(%rcx,%rdx,1), %r9
  286. mov 16(%rcx,%rdx,1), %r10
  287. movnti %r9, 24(%rdx)
  288. movnti %r10, 16(%rdx)
  289. mov 8(%rcx,%rdx,1), %r9
  290. mov (%rcx,%rdx,1), %r10
  291. movnti %r9, 8(%rdx)
  292. movnti %r10, (%rdx)
  293. jne .Lloop64b
  294. sub $0x1000, %r8
  295. cmp $0x1000, %r8
  296. jae .Lntloopb
  297. mfence
  298. jmpq .Ldestalignedb
  299. end;
  300. {$endif FPC_SYSTEM_HAS_MOVE}
  301. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  302. {$define FPC_SYSTEM_HAS_FILLCHAR}
  303. Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
  304. asm
  305. { win64: rcx dest, rdx count, r8b value
  306. linux: rdi dest, rsi count, rdx value }
  307. {$ifndef win64}
  308. mov %rdx, %r8
  309. mov %rsi, %rdx
  310. mov %rdi, %rcx
  311. {$endif win64}
  312. cmp $8, %rdx
  313. jl .Ltiny
  314. { expand byte value }
  315. movzbl %r8b, %r8
  316. mov $0x0101010101010101,%r9
  317. imul %r9, %r8
  318. test $7, %cl
  319. je .Laligned
  320. { align dest to 8 bytes }
  321. test $1, %cl
  322. je .L2
  323. movb %r8b, (%rcx)
  324. add $1, %rcx
  325. sub $1, %rdx
  326. .L2:
  327. test $2, %cl
  328. je .L4
  329. movw %r8w, (%rcx)
  330. add $2, %rcx
  331. sub $2, %rdx
  332. .L4:
  333. test $4, %cl
  334. je .Laligned
  335. movl %r8d, (%rcx)
  336. add $4, %rcx
  337. sub $4, %rdx
  338. .Laligned:
  339. mov %rdx, %rax
  340. and $0x3f, %rdx
  341. shr $6, %rax
  342. jne .Lmore64
  343. .Lless64:
  344. mov %rdx, %rax
  345. and $7, %rdx
  346. shr $3, %rax
  347. je .Ltiny
  348. .balign 16
  349. .Lloop8: { max. 8 iterations }
  350. mov %r8, (%rcx)
  351. add $8, %rcx
  352. dec %rax
  353. jne .Lloop8
  354. .Ltiny:
  355. test %rdx, %rdx
  356. jle .Lquit
  357. .Lloop1:
  358. movb %r8b, (%rcx)
  359. inc %rcx
  360. dec %rdx
  361. jnz .Lloop1
  362. .Lquit:
  363. retq
  364. .Lmore64:
  365. cmp $0x2000,%rax
  366. jae .Lloop64nti
  367. .balign 16
  368. .Lloop64:
  369. add $64, %rcx
  370. mov %r8, -64(%rcx)
  371. mov %r8, -56(%rcx)
  372. mov %r8, -48(%rcx)
  373. mov %r8, -40(%rcx)
  374. dec %rax
  375. mov %r8, -32(%rcx)
  376. mov %r8, -24(%rcx)
  377. mov %r8, -16(%rcx)
  378. mov %r8, -8(%rcx)
  379. jne .Lloop64
  380. jmp .Lless64
  381. .balign 16
  382. .Lloop64nti:
  383. add $64, %rcx
  384. movnti %r8, -64(%rcx)
  385. movnti %r8, -56(%rcx)
  386. movnti %r8, -48(%rcx)
  387. movnti %r8, -40(%rcx)
  388. dec %rax
  389. movnti %r8, -32(%rcx)
  390. movnti %r8, -24(%rcx)
  391. movnti %r8, -16(%rcx)
  392. movnti %r8, -8(%rcx)
  393. jnz .Lloop64nti
  394. mfence
  395. jmp .Lless64
  396. end;
  397. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  398. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  399. { based on libc/sysdeps/x86_64/memchr.S }
  400. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  401. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  402. { win64: rcx buf, rdx len, r8b word
  403. linux: rdi buf, rsi len, rdx word }
  404. asm
  405. {$ifdef win64}
  406. movd %r8d, %xmm1
  407. {$else}
  408. movd %edx, %xmm1
  409. movq %rdi, %rcx
  410. movq %rsi, %rdx
  411. {$endif}
  412. mov %rcx, %rax { duplicate buf }
  413. punpcklbw %xmm1, %xmm1
  414. and $0xfffffffffffffff0, %rax
  415. test %rdx, %rdx
  416. punpcklbw %xmm1, %xmm1
  417. jz .L3 { exit if len=0 }
  418. orl $0xffffffff, %r8d
  419. movdqa (%rax), %xmm0 { Fetch first 16 bytes (up to 15 bytes before target) }
  420. pshufd $0, %xmm1, %xmm1
  421. sub %rax, %rcx { rcx=misalignment }
  422. pcmpeqb %xmm1, %xmm0
  423. add %rcx, %rdx { add misalignment to length }
  424. cmovb %r8, %rdx { if it overflows (happens when length=-1), set back to -1, }
  425. { otherwise loop will terminate too early }
  426. mov %rcx, %r9 { and save it, will subtract back in the end }
  427. shl %cl, %r8d
  428. pmovmskb %xmm0, %ecx
  429. andl %r8d, %ecx { mask away matches before buffer start }
  430. movl $16, %r8d
  431. jnz .L1 { got a match within buffer -> we're done (almost) }
  432. cmpq %r8, %rdx
  433. jbe .L3
  434. .balign 16
  435. .L2:
  436. movdqa (%rax,%r8), %xmm0
  437. lea 16(%r8), %r8
  438. pcmpeqb %xmm1, %xmm0
  439. pmovmskb %xmm0, %ecx
  440. test %ecx, %ecx
  441. jnz .L1
  442. cmp %r8, %rdx
  443. ja .L2
  444. .L3:
  445. or $-1, %rax
  446. jmp .Ldone
  447. .L1:
  448. bsfl %ecx, %ecx { compute position of the first match }
  449. lea -16(%rcx,%r8), %rax
  450. cmp %rax, %rdx
  451. jbe .L3 { if it is after the specified length, ignore it }
  452. sub %r9, %rax
  453. .Ldone:
  454. end;
  455. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  456. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  457. {$define FPC_SYSTEM_HAS_INDEXWORD}
  458. function IndexWord(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  459. { win64: rcx buf, rdx len, r8b word
  460. linux: rdi buf, rsi len, rdx word }
  461. asm
  462. {$ifdef win64}
  463. movd %r8d, %xmm1
  464. {$else}
  465. movd %edx, %xmm1
  466. movq %rdi, %rcx
  467. movq %rsi, %rdx
  468. {$endif}
  469. mov %rcx, %rax { duplicate buf }
  470. punpcklwd %xmm1, %xmm1
  471. and $0xfffffffffffffff0, %rax
  472. test %rdx, %rdx
  473. pshufd $0, %xmm1, %xmm1
  474. jz .L3 { exit if len=0 }
  475. orl $0xffffffff, %r8d
  476. test $1, %cl { if buffer isn't aligned to word boundary, }
  477. jnz .Lunaligned { fallback to slower unaligned loop }
  478. movdqa (%rax), %xmm0 { Fetch first 16 bytes (up to 14 bytes before target) }
  479. sub %rax, %rcx { rcx=misalignment }
  480. pcmpeqw %xmm1, %xmm0
  481. mov %rcx, %r9
  482. shr $1, %r9 { save misalignment in words }
  483. add %r9, %rdx { add misalignment to length }
  484. cmovb %r8, %rdx { if it overflows (happens when length=-1), set back to -1, }
  485. { otherwise loop will terminate too early }
  486. shl %cl, %r8d
  487. pmovmskb %xmm0, %ecx
  488. andl %r8d, %ecx { mask away matches before buffer start }
  489. movl $8, %r8d
  490. jnz .L1 { got a match within buffer -> we're done (almost) }
  491. cmpq %r8, %rdx
  492. jbe .L3
  493. .balign 16
  494. .L2:
  495. movdqa (%rax,%r8,2), %xmm0
  496. lea 8(%r8), %r8
  497. pcmpeqw %xmm1, %xmm0
  498. pmovmskb %xmm0, %ecx
  499. test %ecx, %ecx
  500. jnz .L1
  501. cmp %r8, %rdx
  502. ja .L2
  503. .L3:
  504. or $-1, %rax
  505. jmp .Ldone
  506. .L1:
  507. bsfl %ecx, %ecx { compute position of the first match }
  508. shr $1, %ecx { in words }
  509. lea -8(%rcx,%r8), %rax
  510. cmp %rax, %rdx
  511. jbe .L3 { if it is after the specified length, ignore it }
  512. sub %r9, %rax
  513. .Ldone:
  514. retq
  515. { TODO: aligned processing is still possible, but for now
  516. use the simplest form }
  517. .Lunaligned:
  518. xor %r9, %r9
  519. xor %r8, %r8
  520. mov %rcx, %rax
  521. .balign 16
  522. .L2u:
  523. movdqu (%rax,%r8,2), %xmm0
  524. lea 8(%r8), %r8
  525. pcmpeqw %xmm1, %xmm0
  526. pmovmskb %xmm0, %ecx
  527. test %ecx, %ecx
  528. jnz .L1
  529. cmp %r8, %rdx
  530. ja .L2u
  531. or $-1, %rax
  532. end;
  533. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  534. {$asmmode att}
  535. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  536. { does a thread save inc/dec }
  537. function declocked(var l : longint) : boolean;assembler;
  538. asm
  539. {$ifdef win64}
  540. {
  541. l: %rcx
  542. }
  543. { this check should be done because a lock takes a lot }
  544. { of time! }
  545. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  546. jz .Ldeclockednolock
  547. lock
  548. decl (%rcx)
  549. jmp .Ldeclockedend
  550. .Ldeclockednolock:
  551. decl (%rcx)
  552. .Ldeclockedend:
  553. setzb %al
  554. {$else win64}
  555. {
  556. l: %rdi
  557. }
  558. { this check should be done because a lock takes a lot }
  559. { of time! }
  560. {$ifdef FPC_PIC}
  561. movq IsMultithread@GOTPCREL(%rip),%rax
  562. cmpb $0,(%rax)
  563. {$else FPC_PIC}
  564. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  565. {$endif FPC_PIC}
  566. jz .Ldeclockednolock
  567. lock
  568. decl (%rdi)
  569. jmp .Ldeclockedend
  570. .Ldeclockednolock:
  571. decl (%rdi)
  572. .Ldeclockedend:
  573. setzb %al
  574. {$endif win64}
  575. end;
  576. {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
  577. function declocked(var l : int64) : boolean;assembler;
  578. asm
  579. {$ifdef win64}
  580. {
  581. l: %rcx
  582. }
  583. { this check should be done because a lock takes a lot }
  584. { of time! }
  585. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  586. jz .Ldeclockednolock
  587. lock
  588. decq (%rcx)
  589. jmp .Ldeclockedend
  590. .Ldeclockednolock:
  591. decq (%rcx)
  592. .Ldeclockedend:
  593. setzb %al
  594. {$else win64}
  595. {
  596. l: %rdi
  597. }
  598. { this check should be done because a lock takes a lot }
  599. { of time! }
  600. {$ifdef FPC_PIC}
  601. movq IsMultithread@GOTPCREL(%rip),%rax
  602. cmpb $0,(%rax)
  603. {$else FPC_PIC}
  604. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  605. {$endif FPC_PIC}
  606. jz .Ldeclockednolock
  607. lock
  608. decq (%rdi)
  609. jmp .Ldeclockedend
  610. .Ldeclockednolock:
  611. decq (%rdi)
  612. .Ldeclockedend:
  613. setzb %al
  614. {$endif win64}
  615. end;
  616. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  617. procedure inclocked(var l : longint);assembler;
  618. asm
  619. {$ifdef win64}
  620. {
  621. l: %rcx
  622. }
  623. { this check should be done because a lock takes a lot }
  624. { of time! }
  625. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  626. jz .Linclockednolock
  627. lock
  628. incl (%rcx)
  629. jmp .Linclockedend
  630. .Linclockednolock:
  631. incl (%rcx)
  632. .Linclockedend:
  633. {$else win64}
  634. {
  635. l: %rdi
  636. }
  637. { this check should be done because a lock takes a lot }
  638. { of time! }
  639. {$ifdef FPC_PIC}
  640. movq IsMultithread@GOTPCREL(%rip),%rax
  641. cmpb $0,(%rax)
  642. {$else FPC_PIC}
  643. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  644. {$endif FPC_PIC}
  645. jz .Linclockednolock
  646. lock
  647. incl (%rdi)
  648. jmp .Linclockedend
  649. .Linclockednolock:
  650. incl (%rdi)
  651. .Linclockedend:
  652. {$endif win64}
  653. end;
  654. {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
  655. procedure inclocked(var l : int64);assembler;
  656. asm
  657. {$ifdef win64}
  658. {
  659. l: %rcx
  660. }
  661. { this check should be done because a lock takes a lot }
  662. { of time! }
  663. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  664. jz .Linclockednolock
  665. lock
  666. incq (%rcx)
  667. jmp .Linclockedend
  668. .Linclockednolock:
  669. incq (%rcx)
  670. .Linclockedend:
  671. {$else win64}
  672. {
  673. l: %rdi
  674. }
  675. { this check should be done because a lock takes a lot }
  676. { of time! }
  677. {$ifdef FPC_PIC}
  678. movq IsMultithread@GOTPCREL(%rip),%rax
  679. cmpb $0,(%rax)
  680. {$else FPC_PIC}
  681. cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
  682. {$endif FPC_PIC}
  683. jz .Linclockednolock
  684. lock
  685. incq (%rdi)
  686. jmp .Linclockedend
  687. .Linclockednolock:
  688. incq (%rdi)
  689. .Linclockedend:
  690. {$endif win64}
  691. end;
  692. function InterLockedDecrement (var Target: longint) : longint; assembler;
  693. asm
  694. {$ifdef win64}
  695. movq %rcx,%rax
  696. {$else win64}
  697. movq %rdi,%rax
  698. {$endif win64}
  699. movl $-1,%edx
  700. xchgq %rdx,%rax
  701. lock
  702. xaddl %eax, (%rdx)
  703. decl %eax
  704. end;
  705. function InterLockedIncrement (var Target: longint) : longint; assembler;
  706. asm
  707. {$ifdef win64}
  708. movq %rcx,%rax
  709. {$else win64}
  710. movq %rdi,%rax
  711. {$endif win64}
  712. movl $1,%edx
  713. xchgq %rdx,%rax
  714. lock
  715. xaddl %eax, (%rdx)
  716. incl %eax
  717. end;
  718. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  719. asm
  720. {$ifdef win64}
  721. xchgl (%rcx),%edx
  722. movl %edx,%eax
  723. {$else win64}
  724. xchgl (%rdi),%esi
  725. movl %esi,%eax
  726. {$endif win64}
  727. end;
  728. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  729. asm
  730. {$ifdef win64}
  731. xchgq %rcx,%rdx
  732. lock
  733. xaddl %ecx, (%rdx)
  734. movl %ecx,%eax
  735. {$else win64}
  736. xchgq %rdi,%rsi
  737. lock
  738. xaddl %edi, (%rsi)
  739. movl %edi,%eax
  740. {$endif win64}
  741. end;
  742. function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler;
  743. asm
  744. {$ifdef win64}
  745. movl %r8d,%eax
  746. lock
  747. cmpxchgl %edx,(%rcx)
  748. {$else win64}
  749. movl %edx,%eax
  750. lock
  751. cmpxchgl %esi,(%rdi)
  752. {$endif win64}
  753. end;
  754. function InterLockedDecrement64 (var Target: int64) : int64; assembler;
  755. asm
  756. {$ifdef win64}
  757. movq %rcx,%rax
  758. {$else win64}
  759. movq %rdi,%rax
  760. {$endif win64}
  761. movq $-1,%rdx
  762. xchgq %rdx,%rax
  763. lock
  764. xaddq %rax, (%rdx)
  765. decq %rax
  766. end;
  767. function InterLockedIncrement64 (var Target: int64) : int64; assembler;
  768. asm
  769. {$ifdef win64}
  770. movq %rcx,%rax
  771. {$else win64}
  772. movq %rdi,%rax
  773. {$endif win64}
  774. movq $1,%rdx
  775. xchgq %rdx,%rax
  776. lock
  777. xaddq %rax, (%rdx)
  778. incq %rax
  779. end;
  780. function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler;
  781. asm
  782. {$ifdef win64}
  783. xchgq (%rcx),%rdx
  784. movq %rdx,%rax
  785. {$else win64}
  786. xchgq (%rdi),%rsi
  787. movq %rsi,%rax
  788. {$endif win64}
  789. end;
  790. function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler;
  791. asm
  792. {$ifdef win64}
  793. xchgq %rcx,%rdx
  794. lock
  795. xaddq %rcx, (%rdx)
  796. movq %rcx,%rax
  797. {$else win64}
  798. xchgq %rdi,%rsi
  799. lock
  800. xaddq %rdi, (%rsi)
  801. movq %rdi,%rax
  802. {$endif win64}
  803. end;
  804. function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler;
  805. asm
  806. {$ifdef win64}
  807. movq %r8,%rax
  808. lock
  809. cmpxchgq %rdx,(%rcx)
  810. {$else win64}
  811. movq %rdx,%rax
  812. lock
  813. cmpxchgq %rsi,(%rdi)
  814. {$endif win64}
  815. end;
  816. {****************************************************************************
  817. FPU
  818. ****************************************************************************}
  819. const
  820. { Internal constants for use in system unit }
  821. FPU_Invalid = 1;
  822. FPU_Denormal = 2;
  823. FPU_DivisionByZero = 4;
  824. FPU_Overflow = 8;
  825. FPU_Underflow = $10;
  826. FPU_StackUnderflow = $20;
  827. FPU_StackOverflow = $40;
  828. FPU_ExceptionMask = $ff;
  829. fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
  830. MM_MaskInvalidOp = %0000000010000000;
  831. MM_MaskDenorm = %0000000100000000;
  832. MM_MaskDivZero = %0000001000000000;
  833. MM_MaskOverflow = %0000010000000000;
  834. MM_MaskUnderflow = %0000100000000000;
  835. MM_MaskPrecision = %0001000000000000;
  836. mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
  837. procedure fpc_cpuinit;
  838. begin
  839. { don't let libraries influence the FPU cw set by the host program }
  840. if IsLibrary then
  841. begin
  842. Default8087CW:=Get8087CW;
  843. mxcsr:=GetSSECSR;
  844. end;
  845. SysResetFPU;
  846. if not(IsLibrary) then
  847. SysInitFPU;
  848. end;
  849. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  850. Procedure SysInitFPU;
  851. var
  852. { these locals are so we don't have to hack pic code in the assembler }
  853. localmxcsr: dword;
  854. localfpucw: word;
  855. begin
  856. localmxcsr:=mxcsr;
  857. localfpucw:=fpucw;
  858. asm
  859. fldcw localfpucw
  860. { set sse exceptions }
  861. ldmxcsr localmxcsr
  862. end ['RAX'];
  863. { x86-64 might use softfloat code }
  864. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  865. end;
  866. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  867. Procedure SysResetFPU;
  868. var
  869. { these locals are so we don't have to hack pic code in the assembler }
  870. localmxcsr: dword;
  871. localfpucw: word;
  872. begin
  873. localfpucw:=Default8087CW;
  874. localmxcsr:=mxcsr;
  875. asm
  876. fninit
  877. fwait
  878. fldcw localfpucw
  879. ldmxcsr localmxcsr
  880. end;
  881. { x86-64 might use softfloat code }
  882. softfloat_exception_flags:=0;
  883. end;
  884. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  885. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  886. procedure ReadBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  887. asm
  888. lfence
  889. end;
  890. procedure ReadDependencyBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  891. asm
  892. { reads imply barrier on earlier reads depended on }
  893. end;
  894. procedure ReadWriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  895. asm
  896. mfence
  897. end;
  898. procedure WriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  899. asm
  900. sfence
  901. end;
  902. {$endif}
  903. {****************************************************************************
  904. Math Routines
  905. ****************************************************************************}
  906. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  907. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  908. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  909. begin
  910. { the extra Word type cast is necessary because the "AValue shr 8" }
  911. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  912. { the sign bits from the upper 16 bits are shifted in rather than }
  913. { zeroes. }
  914. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  915. end;
  916. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  917. begin
  918. Result := Word((AValue shr 8) or (AValue shl 8));
  919. end;
  920. function SwapEndian(const AValue: LongInt): LongInt; assembler;
  921. asm
  922. {$ifdef win64}
  923. movl %ecx, %eax
  924. {$else win64}
  925. movl %edi, %eax
  926. {$endif win64}
  927. bswap %eax
  928. end;
  929. function SwapEndian(const AValue: DWord): DWord; assembler;
  930. asm
  931. {$ifdef win64}
  932. movl %ecx, %eax
  933. {$else win64}
  934. movl %edi, %eax
  935. {$endif win64}
  936. bswap %eax
  937. end;
  938. function SwapEndian(const AValue: Int64): Int64; assembler;
  939. asm
  940. {$ifdef win64}
  941. movq %rcx, %rax
  942. {$else win64}
  943. movq %rdi, %rax
  944. {$endif win64}
  945. bswap %rax
  946. end;
  947. function SwapEndian(const AValue: QWord): QWord; assembler;
  948. asm
  949. {$ifdef win64}
  950. movq %rcx, %rax
  951. {$else win64}
  952. movq %rdi, %rax
  953. {$endif win64}
  954. bswap %rax
  955. end;