x86_64.inc 25 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by Florian Klaempfl and Sergei Gorelkin
  4. Members of the Free Pascal development team
  5. Processor dependent implementation for the system unit for
  6. the x86-64 architecture
  7. See the file COPYING.FPC, included in this distribution,
  8. for details about the copyright.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  12. **********************************************************************}
  13. {$asmmode GAS}
  14. {****************************************************************************
  15. Primitives
  16. ****************************************************************************}
  17. {$define FPC_SYSTEM_HAS_SPTR}
  18. Function Sptr : Pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  19. asm
  20. movq %rsp,%rax
  21. end;
  22. {$IFNDEF INTERNAL_BACKTRACE}
  23. {$define FPC_SYSTEM_HAS_GET_FRAME}
  24. function get_frame:pointer;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. asm
  26. movq %rbp,%rax
  27. end;
  28. {$ENDIF not INTERNAL_BACKTRACE}
  29. {$define FPC_SYSTEM_HAS_GET_PC_ADDR}
  30. function get_pc_addr:pointer;assembler;nostackframe;
  31. asm
  32. movq (%rsp),%rax
  33. end;
  34. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  35. function get_caller_addr(framebp:pointer;addr:pointer=nil):pointer;{$ifdef SYSTEMINLINE}inline;{$endif}
  36. begin
  37. get_caller_addr:=framebp;
  38. if assigned(framebp) then
  39. get_caller_addr:=PPointer(framebp)[1];
  40. end;
  41. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  42. function get_caller_frame(framebp:pointer;addr:pointer=nil):pointer;{$ifdef SYSTEMINLINE}inline;{$endif}
  43. begin
  44. get_caller_frame:=framebp;
  45. if assigned(framebp) then
  46. get_caller_frame:=PPointer(framebp)^;
  47. end;
  48. // The following assembler procedures are disabled for FreeBSD due to
  49. // multiple issues with its old GNU assembler (Mantis #19188).
  50. // Even after fixing them, it can be enabled only for the trunk version,
  51. // otherwise bootstrapping won't be possible.
  52. // Modifed to use oldbinutils as in cpu.pp source, to allow easier use for other targets.
  53. {$ifdef freebsd}
  54. {$ifndef overridebinutils}
  55. {$define oldbinutils}
  56. {$endif}
  57. {$endif freebsd}
  58. {$ifndef oldbinutils}
  59. {$ifndef FPC_SYSTEM_HAS_MOVE}
  60. {$define FPC_SYSTEM_HAS_MOVE}
  61. procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
  62. { Linux: rdi source, rsi dest, rdx count
  63. win64: rcx source, rdx dest, r8 count }
  64. asm
  65. {$ifndef win64}
  66. mov %rdx, %r8
  67. mov %rsi, %rdx
  68. mov %rdi, %rcx
  69. {$endif win64}
  70. mov %r8, %rax
  71. sub %rdx, %rcx { rcx = src - dest }
  72. jz .Lquit { exit if src=dest }
  73. jnb .L1 { src>dest => forward move }
  74. add %rcx, %rax { rcx is negative => r8+rcx > 0 if regions overlap }
  75. jb .Lback { if no overlap, still do forward move }
  76. .L1:
  77. cmp $8, %r8
  78. jl .Lless8f { signed compare, negative count not allowed }
  79. test $7, %dl
  80. je .Ldestaligned
  81. test $1, %dl { align dest by moving first 1+2+4 bytes }
  82. je .L2f
  83. mov (%rcx,%rdx,1),%al
  84. dec %r8
  85. mov %al, (%rdx)
  86. add $1, %rdx
  87. .L2f:
  88. test $2, %dl
  89. je .L4f
  90. mov (%rcx,%rdx,1),%ax
  91. sub $2, %r8
  92. mov %ax, (%rdx)
  93. add $2, %rdx
  94. .L4f:
  95. test $4, %dl
  96. je .Ldestaligned
  97. mov (%rcx,%rdx,1),%eax
  98. sub $4, %r8
  99. mov %eax, (%rdx)
  100. add $4, %rdx
  101. .Ldestaligned:
  102. mov %r8, %r9
  103. shr $5, %r9
  104. jne .Lmore32
  105. .Ltail:
  106. mov %r8, %r9
  107. shr $3, %r9
  108. je .Lless8f
  109. .balign 16
  110. .Lloop8f: { max. 8 iterations }
  111. mov (%rcx,%rdx,1),%rax
  112. mov %rax, (%rdx)
  113. add $8, %rdx
  114. dec %r9
  115. jne .Lloop8f
  116. and $7, %r8
  117. .Lless8f:
  118. test %r8, %r8
  119. jle .Lquit
  120. .balign 16
  121. .Lloop1f:
  122. mov (%rcx,%rdx,1),%al
  123. mov %al,(%rdx)
  124. inc %rdx
  125. dec %r8
  126. jne .Lloop1f
  127. .Lquit:
  128. retq
  129. .Lmore32:
  130. cmp $0x2000, %r9 { this limit must be processor-specific (1/2 L2 cache size) }
  131. jnae .Lloop32
  132. cmp $0x1000, %rcx { but don't bother bypassing cache if src and dest }
  133. jnb .Lntloopf { are close to each other}
  134. .balign 16
  135. .Lloop32:
  136. add $32,%rdx
  137. mov -32(%rcx,%rdx,1),%rax
  138. mov -24(%rcx,%rdx,1),%r10
  139. mov %rax,-32(%rdx)
  140. mov %r10,-24(%rdx)
  141. dec %r9
  142. mov -16(%rcx,%rdx,1),%rax
  143. mov -8(%rcx,%rdx,1),%r10
  144. mov %rax,-16(%rdx)
  145. mov %r10,-8(%rdx)
  146. jne .Lloop32
  147. and $0x1f, %r8
  148. jmpq .Ltail
  149. .Lntloopf:
  150. mov $32, %eax
  151. .balign 16
  152. .Lpref:
  153. prefetchnta (%rcx,%rdx,1)
  154. prefetchnta 0x40(%rcx,%rdx,1)
  155. add $0x80, %rdx
  156. dec %eax
  157. jne .Lpref
  158. sub $0x1000, %rdx
  159. mov $64, %eax
  160. .balign 16
  161. .Loop64:
  162. add $64, %rdx
  163. mov -64(%rcx,%rdx,1), %r9
  164. mov -56(%rcx,%rdx,1), %r10
  165. movnti %r9, -64(%rdx)
  166. movnti %r10, -56(%rdx)
  167. mov -48(%rcx,%rdx,1), %r9
  168. mov -40(%rcx,%rdx,1), %r10
  169. movnti %r9, -48(%rdx)
  170. movnti %r10, -40(%rdx)
  171. dec %eax
  172. mov -32(%rcx,%rdx,1), %r9
  173. mov -24(%rcx,%rdx,1), %r10
  174. movnti %r9, -32(%rdx)
  175. movnti %r10, -24(%rdx)
  176. mov -16(%rcx,%rdx,1), %r9
  177. mov -8(%rcx,%rdx,1), %r10
  178. movnti %r9, -16(%rdx)
  179. movnti %r10, -8(%rdx)
  180. jne .Loop64
  181. sub $0x1000, %r8
  182. cmp $0x1000, %r8
  183. jae .Lntloopf
  184. mfence
  185. jmpq .Ldestaligned { go handle remaining bytes }
  186. { backwards move }
  187. .Lback:
  188. add %r8, %rdx { points to the end of dest }
  189. cmp $8, %r8
  190. jl .Lless8b { signed compare, negative count not allowed }
  191. test $7, %dl
  192. je .Ldestalignedb
  193. test $1, %dl
  194. je .L2b
  195. dec %rdx
  196. mov (%rcx,%rdx,1), %al
  197. dec %r8
  198. mov %al, (%rdx)
  199. .L2b:
  200. test $2, %dl
  201. je .L4b
  202. sub $2, %rdx
  203. mov (%rcx,%rdx,1), %ax
  204. sub $2, %r8
  205. mov %ax, (%rdx)
  206. .L4b:
  207. test $4, %dl
  208. je .Ldestalignedb
  209. sub $4, %rdx
  210. mov (%rcx,%rdx,1), %eax
  211. sub $4, %r8
  212. mov %eax, (%rdx)
  213. .Ldestalignedb:
  214. mov %r8, %r9
  215. shr $5, %r9
  216. jne .Lmore32b
  217. .Ltailb:
  218. mov %r8, %r9
  219. shr $3, %r9
  220. je .Lless8b
  221. .Lloop8b:
  222. sub $8, %rdx
  223. mov (%rcx,%rdx,1), %rax
  224. dec %r9
  225. mov %rax, (%rdx)
  226. jne .Lloop8b
  227. and $7, %r8
  228. .Lless8b:
  229. test %r8, %r8
  230. jle .Lquit2
  231. .balign 16
  232. .Lsmallb:
  233. dec %rdx
  234. mov (%rcx,%rdx,1), %al
  235. dec %r8
  236. mov %al,(%rdx)
  237. jnz .Lsmallb
  238. .Lquit2:
  239. retq
  240. .Lmore32b:
  241. cmp $0x2000, %r9
  242. jnae .Lloop32b
  243. cmp $0xfffffffffffff000,%rcx
  244. jb .Lntloopb
  245. .balign 16
  246. .Lloop32b:
  247. sub $32, %rdx
  248. mov 24(%rcx,%rdx,1), %rax
  249. mov 16(%rcx,%rdx,1), %r10
  250. mov %rax, 24(%rdx)
  251. mov %r10, 16(%rdx)
  252. dec %r9
  253. mov 8(%rcx,%rdx,1),%rax
  254. mov (%rcx,%rdx,1), %r10
  255. mov %rax, 8(%rdx)
  256. mov %r10, (%rdx)
  257. jne .Lloop32b
  258. and $0x1f, %r8
  259. jmpq .Ltailb
  260. .Lntloopb:
  261. mov $32, %eax
  262. .balign 16
  263. .Lprefb:
  264. sub $0x80, %rdx
  265. prefetchnta (%rcx,%rdx,1)
  266. prefetchnta 0x40(%rcx,%rdx,1)
  267. dec %eax
  268. jnz .Lprefb
  269. add $0x1000, %rdx
  270. mov $0x40, %eax
  271. .balign 16
  272. .Lloop64b:
  273. sub $64, %rdx
  274. mov 56(%rcx,%rdx,1), %r9
  275. mov 48(%rcx,%rdx,1), %r10
  276. movnti %r9, 56(%rdx)
  277. movnti %r10, 48(%rdx)
  278. mov 40(%rcx,%rdx,1), %r9
  279. mov 32(%rcx,%rdx,1), %r10
  280. movnti %r9, 40(%rdx)
  281. movnti %r10, 32(%rdx)
  282. dec %eax
  283. mov 24(%rcx,%rdx,1), %r9
  284. mov 16(%rcx,%rdx,1), %r10
  285. movnti %r9, 24(%rdx)
  286. movnti %r10, 16(%rdx)
  287. mov 8(%rcx,%rdx,1), %r9
  288. mov (%rcx,%rdx,1), %r10
  289. movnti %r9, 8(%rdx)
  290. movnti %r10, (%rdx)
  291. jne .Lloop64b
  292. sub $0x1000, %r8
  293. cmp $0x1000, %r8
  294. jae .Lntloopb
  295. mfence
  296. jmpq .Ldestalignedb
  297. end;
  298. {$endif FPC_SYSTEM_HAS_MOVE}
  299. {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
  300. {$define FPC_SYSTEM_HAS_FILLCHAR}
  301. Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
  302. asm
  303. { win64: rcx dest, rdx count, r8b value
  304. linux: rdi dest, rsi count, rdx value }
  305. {$ifndef win64}
  306. mov %rdx, %r8
  307. mov %rsi, %rdx
  308. mov %rdi, %rcx
  309. {$endif win64}
  310. cmp $8, %rdx
  311. jl .Ltiny
  312. // TODO: movz?q and movs?q are not accepted by FPC asmreader, it needs fixing.
  313. // `movzbl' instead is accepted and generates correct code with internal assembler,
  314. // but breaks targets using external GAS (Mantis #19188).
  315. // So use a different instruction for now.
  316. { expand byte value }
  317. andq $0xff, %r8
  318. {
  319. movzbq %r8b, %r8
  320. }
  321. mov $0x0101010101010101,%r9
  322. imul %r9, %r8
  323. test $7, %cl
  324. je .Laligned
  325. { align dest to 8 bytes }
  326. test $1, %cl
  327. je .L2
  328. movb %r8b, (%rcx)
  329. add $1, %rcx
  330. sub $1, %rdx
  331. .L2:
  332. test $2, %cl
  333. je .L4
  334. movw %r8w, (%rcx)
  335. add $2, %rcx
  336. sub $2, %rdx
  337. .L4:
  338. test $4, %cl
  339. je .Laligned
  340. movl %r8d, (%rcx)
  341. add $4, %rcx
  342. sub $4, %rdx
  343. .Laligned:
  344. mov %rdx, %rax
  345. and $0x3f, %rdx
  346. shr $6, %rax
  347. jne .Lmore64
  348. .Lless64:
  349. mov %rdx, %rax
  350. and $7, %rdx
  351. shr $3, %rax
  352. je .Ltiny
  353. .balign 16
  354. .Lloop8: { max. 8 iterations }
  355. mov %r8, (%rcx)
  356. add $8, %rcx
  357. dec %rax
  358. jne .Lloop8
  359. .Ltiny:
  360. test %rdx, %rdx
  361. jle .Lquit
  362. .Lloop1:
  363. movb %r8b, (%rcx)
  364. inc %rcx
  365. dec %rdx
  366. jnz .Lloop1
  367. .Lquit:
  368. retq
  369. .Lmore64:
  370. cmp $0x2000,%rax
  371. jae .Lloop64nti
  372. .balign 16
  373. .Lloop64:
  374. add $64, %rcx
  375. mov %r8, -64(%rcx)
  376. mov %r8, -56(%rcx)
  377. mov %r8, -48(%rcx)
  378. mov %r8, -40(%rcx)
  379. dec %rax
  380. mov %r8, -32(%rcx)
  381. mov %r8, -24(%rcx)
  382. mov %r8, -16(%rcx)
  383. mov %r8, -8(%rcx)
  384. jne .Lloop64
  385. jmp .Lless64
  386. .balign 16
  387. .Lloop64nti:
  388. add $64, %rcx
  389. movnti %r8, -64(%rcx)
  390. movnti %r8, -56(%rcx)
  391. movnti %r8, -48(%rcx)
  392. movnti %r8, -40(%rcx)
  393. dec %rax
  394. movnti %r8, -32(%rcx)
  395. movnti %r8, -24(%rcx)
  396. movnti %r8, -16(%rcx)
  397. movnti %r8, -8(%rcx)
  398. jnz .Lloop64nti
  399. mfence
  400. jmp .Lless64
  401. end;
  402. {$endif FPC_SYSTEM_HAS_FILLCHAR}
  403. {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
  404. {$define FPC_SYSTEM_HAS_INDEXBYTE}
  405. function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
  406. { win64: rcx buf, rdx len, r8b word
  407. linux: rdi buf, rsi len, rdx word }
  408. asm
  409. {$ifdef win64}
  410. movd %r8d, %xmm1
  411. {$else}
  412. movd %edx, %xmm1
  413. movq %rdi, %rcx
  414. movq %rsi, %rdx
  415. {$endif}
  416. mov %rcx, %r8
  417. punpcklbw %xmm1, %xmm1
  418. and $-0x10, %rcx { highest aligned address before buf }
  419. test %rdx, %rdx
  420. punpcklbw %xmm1, %xmm1
  421. jz .Lnotfound { exit if len=0 }
  422. add $16, %rcx { first aligned address after buf }
  423. pshufd $0, %xmm1, %xmm1
  424. movdqa -16(%rcx), %xmm0 { Fetch first 16 bytes (up to 15 bytes before target) }
  425. sub %r8, %rcx { rcx=number of valid bytes, r8=original ptr }
  426. pcmpeqb %xmm1, %xmm0 { compare with pattern and get bitmask }
  427. pmovmskb %xmm0, %eax
  428. shl %cl, %eax { shift valid bits into high word }
  429. and $0xffff0000, %eax { clear low word containing invalid bits }
  430. shr %cl, %eax { shift back }
  431. jmp .Lcontinue
  432. .balign 16
  433. .Lloop:
  434. movdqa (%r8,%rcx), %xmm0 { r8 and rcx may have any values, }
  435. add $16, %rcx { but their sum is evenly divisible by 16. }
  436. pcmpeqb %xmm1, %xmm0
  437. pmovmskb %xmm0, %eax
  438. .Lcontinue:
  439. test %eax, %eax
  440. jnz .Lmatch
  441. cmp %rcx, %rdx
  442. ja .Lloop
  443. .Lnotfound:
  444. or $-1, %rax
  445. retq
  446. .Lmatch:
  447. bsf %eax, %eax
  448. lea -16(%rcx,%rax), %rax
  449. cmp %rax, %rdx { check against the buffer length }
  450. jbe .Lnotfound
  451. end;
  452. {$endif FPC_SYSTEM_HAS_INDEXBYTE}
  453. {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
  454. {$define FPC_SYSTEM_HAS_INDEXWORD}
  455. function IndexWord(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
  456. { win64: rcx buf, rdx len, r8b word
  457. linux: rdi buf, rsi len, rdx word }
  458. asm
  459. {$ifdef win64}
  460. movd %r8d, %xmm1
  461. {$else}
  462. movd %edx, %xmm1
  463. movq %rdi, %rcx
  464. movq %rsi, %rdx
  465. {$endif}
  466. mov %rcx, %r8
  467. punpcklwd %xmm1, %xmm1
  468. and $-0x10, %rcx
  469. test %rdx, %rdx
  470. pshufd $0, %xmm1, %xmm1
  471. jz .Lnotfound { exit if len=0 }
  472. add $16, %rcx
  473. movdqa -16(%rcx), %xmm0 { Fetch first 16 bytes (up to 14 bytes before target) }
  474. sub %r8, %rcx { rcx=number of valid bytes }
  475. test $1, %r8b { if buffer isn't aligned to word boundary, }
  476. jnz .Lunaligned { use a different algorithm }
  477. pcmpeqw %xmm1, %xmm0
  478. pmovmskb %xmm0, %eax
  479. shl %cl, %eax
  480. and $0xffff0000, %eax
  481. shr %cl, %eax
  482. shr $1, %ecx { bytes->words }
  483. jmp .Lcontinue
  484. .balign 16
  485. .Lloop:
  486. movdqa (%r8,%rcx,2), %xmm0
  487. add $8, %rcx
  488. pcmpeqw %xmm1, %xmm0
  489. pmovmskb %xmm0, %eax
  490. .Lcontinue:
  491. test %eax, %eax
  492. jnz .Lmatch
  493. cmp %rcx, %rdx
  494. ja .Lloop
  495. .Lnotfound:
  496. or $-1, %rax
  497. retq
  498. .Lmatch:
  499. bsf %eax, %eax
  500. shr $1, %eax { in words }
  501. lea -8(%rcx,%rax), %rax
  502. cmp %rax, %rdx
  503. jbe .Lnotfound { if match is after the specified length, ignore it }
  504. retq
  505. .Lunaligned:
  506. movdqa %xmm1, %xmm2 { (mis)align the pattern (in this particular case: }
  507. psllw $8, %xmm1 { swap bytes of each word of pattern) }
  508. psrlw $8, %xmm2
  509. por %xmm2, %xmm1
  510. pcmpeqb %xmm1, %xmm0
  511. pmovmskb %xmm0, %eax
  512. shl %cl, %eax
  513. and $0xffff0000, %eax
  514. shr %cl, %eax
  515. add %rdx, %rdx { length words -> bytes }
  516. xor %r10d, %r10d { nothing to merge yet }
  517. jmp .Lcontinue_u
  518. .balign 16
  519. .Lloop_u:
  520. movdqa (%r8,%rcx), %xmm0
  521. add $16, %rcx
  522. pcmpeqb %xmm1, %xmm0 { compare by bytes }
  523. shr $16, %r10d { bit 16 shifts into 0 }
  524. pmovmskb %xmm0, %eax
  525. .Lcontinue_u:
  526. shl $1, %eax { 15:0 -> 16:1 }
  527. or %r10d, %eax { merge bit 0 from previous round }
  528. mov %eax, %r10d
  529. shr $1, %eax { now AND together adjacent pairs of bits }
  530. and %r10d, %eax
  531. and $0x5555, %eax { also reset odd bits }
  532. jnz .Lmatch_u
  533. cmpq %rcx, %rdx
  534. ja .Lloop_u
  535. .Lnotfound_u:
  536. or $-1, %rax
  537. retq
  538. .Lmatch_u:
  539. bsf %eax, %eax
  540. lea -16(%rcx,%rax), %rax
  541. cmp %rax, %rdx
  542. jbe .Lnotfound_u { if match is after the specified length, ignore it }
  543. sar $1, %rax { in words }
  544. end;
  545. {$endif FPC_SYSTEM_HAS_INDEXWORD}
  546. {$endif freebsd}
  547. {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
  548. {$define FPC_SYSTEM_HAS_COMPAREBYTE}
  549. function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
  550. { win64: rcx buf, rdx buf, r8 len
  551. linux: rdi buf, rsi buf, rdx len }
  552. asm
  553. {$ifndef win64}
  554. mov %rdx, %r8
  555. mov %rsi, %rdx
  556. mov %rdi, %rcx
  557. {$endif win64}
  558. testq %r8,%r8
  559. je .LCmpbyteZero
  560. .balign 8
  561. .LCmpbyteLoop:
  562. movb (%rcx),%r9b
  563. cmpb (%rdx),%r9b
  564. leaq 1(%rcx),%rcx
  565. leaq 1(%rdx),%rdx
  566. jne .LCmpbyteExitFast
  567. decq %r8
  568. jne .LCmpbyteLoop
  569. .LCmpbyteExitFast:
  570. movzbq -1(%rdx),%r8 { Compare last position }
  571. movzbq %r9b,%rax
  572. subq %r8,%rax
  573. ret
  574. .LCmpbyteZero:
  575. movq $0,%rax
  576. ret
  577. end;
  578. {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
  579. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  580. { does a thread save inc/dec }
  581. function declocked(var l : longint) : boolean;assembler; nostackframe;
  582. asm
  583. { this check should be done because a lock takes a lot }
  584. { of time! }
  585. {$ifdef FPC_PIC}
  586. movq IsMultithread@GOTPCREL(%rip),%rax
  587. cmpl $0,(%rax)
  588. {$else FPC_PIC}
  589. cmpl $0,IsMultithread(%rip)
  590. {$endif FPC_PIC}
  591. {$ifndef win64}
  592. mov %rdi, %rcx
  593. {$endif win64}
  594. jz .Ldeclockednolock
  595. lock
  596. decl (%rcx)
  597. jmp .Ldeclockedend
  598. .Ldeclockednolock:
  599. decl (%rcx)
  600. .Ldeclockedend:
  601. setzb %al
  602. end;
  603. {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
  604. function declocked(var l : int64) : boolean;assembler; nostackframe;
  605. asm
  606. { this check should be done because a lock takes a lot }
  607. { of time! }
  608. {$ifdef FPC_PIC}
  609. movq IsMultithread@GOTPCREL(%rip),%rax
  610. cmpl $0,(%rax)
  611. {$else FPC_PIC}
  612. cmpl $0,IsMultithread(%rip)
  613. {$endif FPC_PIC}
  614. {$ifndef win64}
  615. mov %rdi, %rcx
  616. {$endif win64}
  617. jz .Ldeclockednolock
  618. lock
  619. decq (%rcx)
  620. jmp .Ldeclockedend
  621. .Ldeclockednolock:
  622. decq (%rcx)
  623. .Ldeclockedend:
  624. setzb %al
  625. end;
  626. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  627. procedure inclocked(var l : longint);assembler; nostackframe;
  628. asm
  629. { this check should be done because a lock takes a lot }
  630. { of time! }
  631. {$ifdef FPC_PIC}
  632. movq IsMultithread@GOTPCREL(%rip),%rax
  633. cmpl $0,(%rax)
  634. {$else FPC_PIC}
  635. cmpl $0,IsMultithread(%rip)
  636. {$endif FPC_PIC}
  637. {$ifndef win64}
  638. mov %rdi, %rcx
  639. {$endif win64}
  640. jz .Linclockednolock
  641. lock
  642. incl (%rcx)
  643. jmp .Linclockedend
  644. .Linclockednolock:
  645. incl (%rcx)
  646. .Linclockedend:
  647. end;
  648. {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
  649. procedure inclocked(var l : int64);assembler; nostackframe;
  650. asm
  651. { this check should be done because a lock takes a lot }
  652. { of time! }
  653. {$ifdef FPC_PIC}
  654. movq IsMultithread@GOTPCREL(%rip),%rax
  655. cmpl $0,(%rax)
  656. {$else FPC_PIC}
  657. cmpl $0,IsMultithread(%rip)
  658. {$endif FPC_PIC}
  659. {$ifndef win64}
  660. mov %rdi, %rcx
  661. {$endif win64}
  662. jz .Linclockednolock
  663. lock
  664. incq (%rcx)
  665. jmp .Linclockedend
  666. .Linclockednolock:
  667. incq (%rcx)
  668. .Linclockedend:
  669. end;
  670. function InterLockedDecrement (var Target: longint) : longint; assembler; nostackframe;
  671. asm
  672. {$ifdef win64}
  673. movq %rcx,%rax
  674. {$else win64}
  675. movq %rdi,%rax
  676. {$endif win64}
  677. movl $-1,%edx
  678. xchgq %rdx,%rax
  679. lock
  680. xaddl %eax, (%rdx)
  681. decl %eax
  682. end;
  683. function InterLockedIncrement (var Target: longint) : longint; assembler; nostackframe;
  684. asm
  685. {$ifdef win64}
  686. movq %rcx,%rax
  687. {$else win64}
  688. movq %rdi,%rax
  689. {$endif win64}
  690. movl $1,%edx
  691. xchgq %rdx,%rax
  692. lock
  693. xaddl %eax, (%rdx)
  694. incl %eax
  695. end;
  696. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  697. asm
  698. {$ifdef win64}
  699. xchgl (%rcx),%edx
  700. movl %edx,%eax
  701. {$else win64}
  702. xchgl (%rdi),%esi
  703. movl %esi,%eax
  704. {$endif win64}
  705. end;
  706. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler; nostackframe;
  707. asm
  708. {$ifdef win64}
  709. xchgq %rcx,%rdx
  710. lock
  711. xaddl %ecx, (%rdx)
  712. movl %ecx,%eax
  713. {$else win64}
  714. xchgq %rdi,%rsi
  715. lock
  716. xaddl %edi, (%rsi)
  717. movl %edi,%eax
  718. {$endif win64}
  719. end;
  720. function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler; nostackframe;
  721. asm
  722. {$ifdef win64}
  723. movl %r8d,%eax
  724. lock
  725. cmpxchgl %edx,(%rcx)
  726. {$else win64}
  727. movl %edx,%eax
  728. lock
  729. cmpxchgl %esi,(%rdi)
  730. {$endif win64}
  731. end;
  732. function InterLockedDecrement64 (var Target: int64) : int64; assembler; nostackframe;
  733. asm
  734. {$ifdef win64}
  735. movq %rcx,%rax
  736. {$else win64}
  737. movq %rdi,%rax
  738. {$endif win64}
  739. movq $-1,%rdx
  740. xchgq %rdx,%rax
  741. lock
  742. xaddq %rax, (%rdx)
  743. decq %rax
  744. end;
  745. function InterLockedIncrement64 (var Target: int64) : int64; assembler; nostackframe;
  746. asm
  747. {$ifdef win64}
  748. movq %rcx,%rax
  749. {$else win64}
  750. movq %rdi,%rax
  751. {$endif win64}
  752. movq $1,%rdx
  753. xchgq %rdx,%rax
  754. lock
  755. xaddq %rax, (%rdx)
  756. incq %rax
  757. end;
  758. function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler; nostackframe;
  759. asm
  760. {$ifdef win64}
  761. xchgq (%rcx),%rdx
  762. movq %rdx,%rax
  763. {$else win64}
  764. xchgq (%rdi),%rsi
  765. movq %rsi,%rax
  766. {$endif win64}
  767. end;
  768. function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler; nostackframe;
  769. asm
  770. {$ifdef win64}
  771. xchgq %rcx,%rdx
  772. lock
  773. xaddq %rcx, (%rdx)
  774. movq %rcx,%rax
  775. {$else win64}
  776. xchgq %rdi,%rsi
  777. lock
  778. xaddq %rdi, (%rsi)
  779. movq %rdi,%rax
  780. {$endif win64}
  781. end;
  782. function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler; nostackframe;
  783. asm
  784. {$ifdef win64}
  785. movq %r8,%rax
  786. lock
  787. cmpxchgq %rdx,(%rcx)
  788. {$else win64}
  789. movq %rdx,%rax
  790. lock
  791. cmpxchgq %rsi,(%rdi)
  792. {$endif win64}
  793. end;
  794. {****************************************************************************
  795. FPU
  796. ****************************************************************************}
  797. const
  798. { Internal constants for use in system unit }
  799. FPU_Invalid = 1;
  800. FPU_Denormal = 2;
  801. FPU_DivisionByZero = 4;
  802. FPU_Overflow = 8;
  803. FPU_Underflow = $10;
  804. FPU_StackUnderflow = $20;
  805. FPU_StackOverflow = $40;
  806. FPU_ExceptionMask = $ff;
  807. MM_MaskInvalidOp = %0000000010000000;
  808. MM_MaskDenorm = %0000000100000000;
  809. MM_MaskDivZero = %0000001000000000;
  810. MM_MaskOverflow = %0000010000000000;
  811. MM_MaskUnderflow = %0000100000000000;
  812. MM_MaskPrecision = %0001000000000000;
  813. procedure fpc_cpuinit;
  814. begin
  815. { don't let libraries influence the FPU cw set by the host program }
  816. if IsLibrary then
  817. begin
  818. Default8087CW:=Get8087CW;
  819. DefaultMXCSR:=GetMXCSR;
  820. end;
  821. SysResetFPU;
  822. end;
  823. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  824. Procedure SysInitFPU;
  825. begin
  826. end;
  827. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  828. Procedure SysResetFPU;
  829. var
  830. { these locals are so we don't have to hack pic code in the assembler }
  831. localmxcsr: dword;
  832. localfpucw: word;
  833. begin
  834. localfpucw:=Default8087CW;
  835. localmxcsr:=DefaultMXCSR;
  836. asm
  837. fninit
  838. fwait
  839. fldcw localfpucw
  840. ldmxcsr localmxcsr
  841. end;
  842. end;
  843. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  844. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  845. procedure ReadBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  846. asm
  847. lfence
  848. end;
  849. procedure ReadDependencyBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  850. asm
  851. { reads imply barrier on earlier reads depended on }
  852. end;
  853. procedure ReadWriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  854. asm
  855. mfence
  856. end;
  857. procedure WriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  858. asm
  859. sfence
  860. end;
  861. {$endif}
  862. {****************************************************************************
  863. Math Routines
  864. ****************************************************************************}
  865. {$define FPC_SYSTEM_HAS_SWAPENDIAN}
  866. { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
  867. function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
  868. begin
  869. { the extra Word type cast is necessary because the "AValue shr 8" }
  870. { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
  871. { the sign bits from the upper 16 bits are shifted in rather than }
  872. { zeroes. }
  873. Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
  874. end;
  875. function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
  876. begin
  877. Result := Word((AValue shr 8) or (AValue shl 8));
  878. end;
  879. function SwapEndian(const AValue: LongInt): LongInt; assembler; nostackframe;
  880. asm
  881. {$ifdef win64}
  882. movl %ecx, %eax
  883. {$else win64}
  884. movl %edi, %eax
  885. {$endif win64}
  886. bswap %eax
  887. end;
  888. function SwapEndian(const AValue: DWord): DWord; assembler; nostackframe;
  889. asm
  890. {$ifdef win64}
  891. movl %ecx, %eax
  892. {$else win64}
  893. movl %edi, %eax
  894. {$endif win64}
  895. bswap %eax
  896. end;
  897. function SwapEndian(const AValue: Int64): Int64; assembler; nostackframe;
  898. asm
  899. {$ifdef win64}
  900. movq %rcx, %rax
  901. {$else win64}
  902. movq %rdi, %rax
  903. {$endif win64}
  904. bswap %rax
  905. end;
  906. function SwapEndian(const AValue: QWord): QWord; assembler; nostackframe;
  907. asm
  908. {$ifdef win64}
  909. movq %rcx, %rax
  910. {$else win64}
  911. movq %rdi, %rax
  912. {$endif win64}
  913. bswap %rax
  914. end;