x86_64.inc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by Florian Klaempfl.
  4. Member of the Free Pascal development team
  5. Parts of this code are derived from the x86-64 linux port
  6. Copyright 2002 Andi Kleen
  7. Processor dependent implementation for the system unit for
  8. the x86-64 architecture
  9. See the file COPYING.FPC, included in this distribution,
  10. for details about the copyright.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. **********************************************************************}
  15. {$asmmode GAS}
  16. {****************************************************************************
  17. Primitives
  18. ****************************************************************************}
  19. procedure fpc_cpuinit;
  20. begin
  21. SysResetFPU;
  22. if not(IsLibrary) then
  23. SysInitFPU;
  24. end;
  25. {$define FPC_SYSTEM_HAS_SPTR}
  26. Function Sptr : Pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  27. asm
  28. movq %rsp,%rax
  29. end ['RAX'];
  30. {$IFNDEF INTERNAL_BACKTRACE}
  31. {$define FPC_SYSTEM_HAS_GET_FRAME}
  32. function get_frame:pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  33. asm
  34. movq %rbp,%rax
  35. end ['RAX'];
  36. {$ENDIF not INTERNAL_BACKTRACE}
  37. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  38. function get_caller_addr(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  39. asm
  40. {$ifdef win64}
  41. orq %rcx,%rcx
  42. jz .Lg_a_null
  43. movq 8(%rcx),%rax
  44. {$else win64}
  45. { %rdi = framebp }
  46. orq %rdi,%rdi
  47. jz .Lg_a_null
  48. movq 8(%rdi),%rax
  49. {$endif win64}
  50. .Lg_a_null:
  51. end ['RAX'];
  52. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  53. function get_caller_frame(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  54. asm
  55. {$ifdef win64}
  56. orq %rcx,%rcx
  57. jz .Lg_a_null
  58. movq (%rcx),%rax
  59. {$else win64}
  60. { %rdi = framebp }
  61. orq %rdi,%rdi
  62. jz .Lg_a_null
  63. movq (%rdi),%rax
  64. {$endif win64}
  65. .Lg_a_null:
  66. end ['RAX'];
  67. (*
  68. {$define FPC_SYSTEM_HAS_MOVE}
  69. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;
  70. asm
  71. { rdi destination
  72. rsi source
  73. rdx count
  74. }
  75. pushq %rbx
  76. prefetcht0 (%rsi) // for more hopefully the hw prefetch will kick in
  77. movq %rdi,%rax
  78. movl %edi,%ecx
  79. andl $7,%ecx
  80. jnz .Lbad_alignment
  81. .Lafter_bad_alignment:
  82. movq %rdx,%rcx
  83. movl $64,%ebx
  84. shrq $6,%rcx
  85. jz .Lhandle_tail
  86. .Lloop_64:
  87. { no prefetch because we assume the hw prefetcher does it already
  88. and we have no specific temporal hint to give. XXX or give a nta
  89. hint for the source? }
  90. movq (%rsi),%r11
  91. movq 8(%rsi),%r8
  92. movq 2*8(%rsi),%r9
  93. movq 3*8(%rsi),%r10
  94. movnti %r11,(%rdi)
  95. movnti %r8,1*8(%rdi)
  96. movnti %r9,2*8(%rdi)
  97. movnti %r10,3*8(%rdi)
  98. movq 4*8(%rsi),%r11
  99. movq 5*8(%rsi),%r8
  100. movq 6*8(%rsi),%r9
  101. movq 7*8(%rsi),%r10
  102. movnti %r11,4*8(%rdi)
  103. movnti %r8,5*8(%rdi)
  104. movnti %r9,6*8(%rdi)
  105. movnti %r10,7*8(%rdi)
  106. addq %rbx,%rsi
  107. addq %rbx,%rdi
  108. loop .Lloop_64
  109. .Lhandle_tail:
  110. movl %edx,%ecx
  111. andl $63,%ecx
  112. shrl $3,%ecx
  113. jz .Lhandle_7
  114. movl $8,%ebx
  115. .Lloop_8:
  116. movq (%rsi),%r8
  117. movnti %r8,(%rdi)
  118. addq %rbx,%rdi
  119. addq %rbx,%rsi
  120. loop .Lloop_8
  121. .Lhandle_7:
  122. movl %edx,%ecx
  123. andl $7,%ecx
  124. jz .Lende
  125. .Lloop_1:
  126. movb (%rsi),%r8b
  127. movb %r8b,(%rdi)
  128. incq %rdi
  129. incq %rsi
  130. loop .Lloop_1
  131. jmp .Lende
  132. { align destination }
  133. { This is simpleminded. For bigger blocks it may make sense to align
  134. src and dst to their aligned subset and handle the rest separately }
  135. .Lbad_alignment:
  136. movl $8,%r9d
  137. subl %ecx,%r9d
  138. movl %r9d,%ecx
  139. subq %r9,%rdx
  140. js .Lsmall_alignment
  141. jz .Lsmall_alignment
  142. .Lalign_1:
  143. movb (%rsi),%r8b
  144. movb %r8b,(%rdi)
  145. incq %rdi
  146. incq %rsi
  147. loop .Lalign_1
  148. jmp .Lafter_bad_alignment
  149. .Lsmall_alignment:
  150. addq %r9,%rdx
  151. jmp .Lhandle_7
  152. .Lende:
  153. sfence
  154. popq %rbx
  155. end;
  156. *)
  157. (*
  158. {$define FPC_SYSTEM_HAS_FILLCHAR}
  159. Procedure FillChar(var x;count:longint;value:byte);assembler;
  160. asm
  161. { rdi destination
  162. rsi value (char)
  163. rdx count (bytes)
  164. }
  165. movq %rdi,%r10
  166. movq %rdx,%r11
  167. { expand byte value }
  168. movzbl %sil,%ecx
  169. movabs $0x0101010101010101,%rax
  170. mul %rcx { with rax, clobbers rdx }
  171. { align dst }
  172. movl %edi,%r9d
  173. andl $7,%r9d
  174. jnz .Lbad_alignment
  175. .Lafter_bad_alignment:
  176. movq %r11,%rcx
  177. movl $64,%r8d
  178. shrq $6,%rcx
  179. jz .Lhandle_tail
  180. .Lloop_64:
  181. movnti %rax,(%rdi)
  182. movnti %rax,8(%rdi)
  183. movnti %rax,16(%rdi)
  184. movnti %rax,24(%rdi)
  185. movnti %rax,32(%rdi)
  186. movnti %rax,40(%rdi)
  187. movnti %rax,48(%rdi)
  188. movnti %rax,56(%rdi)
  189. addq %r8,%rdi
  190. loop .Lloop_64
  191. { Handle tail in loops. The loops should be faster than hard
  192. to predict jump tables. }
  193. .Lhandle_tail:
  194. movl %r11d,%ecx
  195. andl $56,%ecx
  196. jz .Lhandle_7
  197. shrl $3,%ecx
  198. .Lloop_8:
  199. movnti %rax,(%rdi)
  200. addq $8,%rdi
  201. loop .Lloop_8
  202. .Lhandle_7:
  203. movl %r11d,%ecx
  204. andl $7,%ecx
  205. jz .Lende
  206. .Lloop_1:
  207. movb %al,(%rdi)
  208. addq $1,%rdi
  209. loop .Lloop_1
  210. jmp .Lende
  211. .Lbad_alignment:
  212. cmpq $7,%r11
  213. jbe .Lhandle_7
  214. movnti %rax,(%rdi) (* unaligned store *)
  215. movq $8,%r8
  216. subq %r9,%r8
  217. addq %r8,%rdi
  218. subq %r8,%r11
  219. jmp .Lafter_bad_alignment
  220. .Lende:
  221. movq %r10,%rax
  222. end;
  223. *)
  224. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  225. { does a thread save inc/dec }
  226. function declocked(var l : longint) : boolean;assembler;
  227. asm
  228. {$ifdef win64}
  229. {
  230. l: %rcx
  231. }
  232. { this check should be done because a lock takes a lot }
  233. { of time! }
  234. cmpb $0,IsMultithread
  235. jz .Ldeclockednolock
  236. lock
  237. decl (%rcx)
  238. jmp .Ldeclockedend
  239. .Ldeclockednolock:
  240. decl (%rcx)
  241. .Ldeclockedend:
  242. setzb %al
  243. {$else win64}
  244. {
  245. l: %rdi
  246. }
  247. { this check should be done because a lock takes a lot }
  248. { of time! }
  249. {$ifdef FPC_PIC}
  250. movq IsMultithread@GOTPCREL(%rip),%rax
  251. cmpb $0,(%rax)
  252. {$else FPC_PIC}
  253. cmpb $0,IsMultithread
  254. {$endif FPC_PIC}
  255. jz .Ldeclockednolock
  256. lock
  257. decl (%rdi)
  258. jmp .Ldeclockedend
  259. .Ldeclockednolock:
  260. decl (%rdi)
  261. .Ldeclockedend:
  262. setzb %al
  263. {$endif win64}
  264. end;
  265. {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
  266. function declocked(var l : int64) : boolean;assembler;
  267. asm
  268. {$ifdef win64}
  269. {
  270. l: %rcx
  271. }
  272. { this check should be done because a lock takes a lot }
  273. { of time! }
  274. cmpb $0,IsMultithread
  275. jz .Ldeclockednolock
  276. lock
  277. decq (%rcx)
  278. jmp .Ldeclockedend
  279. .Ldeclockednolock:
  280. decq (%rcx)
  281. .Ldeclockedend:
  282. setzb %al
  283. {$else win64}
  284. {
  285. l: %rdi
  286. }
  287. { this check should be done because a lock takes a lot }
  288. { of time! }
  289. {$ifdef FPC_PIC}
  290. movq IsMultithread@GOTPCREL(%rip),%rax
  291. cmpb $0,(%rax)
  292. {$else FPC_PIC}
  293. cmpb $0,IsMultithread
  294. {$endif FPC_PIC}
  295. jz .Ldeclockednolock
  296. lock
  297. decq (%rdi)
  298. jmp .Ldeclockedend
  299. .Ldeclockednolock:
  300. decq (%rdi)
  301. .Ldeclockedend:
  302. setzb %al
  303. {$endif win64}
  304. end;
  305. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  306. procedure inclocked(var l : longint);assembler;
  307. asm
  308. {$ifdef win64}
  309. {
  310. l: %rcx
  311. }
  312. { this check should be done because a lock takes a lot }
  313. { of time! }
  314. cmpb $0,IsMultithread
  315. jz .Linclockednolock
  316. lock
  317. incl (%rcx)
  318. jmp .Linclockedend
  319. .Linclockednolock:
  320. incl (%rcx)
  321. .Linclockedend:
  322. {$else win64}
  323. {
  324. l: %rdi
  325. }
  326. { this check should be done because a lock takes a lot }
  327. { of time! }
  328. {$ifdef FPC_PIC}
  329. movq IsMultithread@GOTPCREL(%rip),%rax
  330. cmpb $0,(%rax)
  331. {$else FPC_PIC}
  332. cmpb $0,IsMultithread
  333. {$endif FPC_PIC}
  334. jz .Linclockednolock
  335. lock
  336. incl (%rdi)
  337. jmp .Linclockedend
  338. .Linclockednolock:
  339. incl (%rdi)
  340. .Linclockedend:
  341. {$endif win64}
  342. end;
  343. {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
  344. procedure inclocked(var l : int64);assembler;
  345. asm
  346. {$ifdef win64}
  347. {
  348. l: %rcx
  349. }
  350. { this check should be done because a lock takes a lot }
  351. { of time! }
  352. cmpb $0,IsMultithread
  353. jz .Linclockednolock
  354. lock
  355. incq (%rcx)
  356. jmp .Linclockedend
  357. .Linclockednolock:
  358. incq (%rcx)
  359. .Linclockedend:
  360. {$else win64}
  361. {
  362. l: %rdi
  363. }
  364. { this check should be done because a lock takes a lot }
  365. { of time! }
  366. {$ifdef FPC_PIC}
  367. movq IsMultithread@GOTPCREL(%rip),%rax
  368. cmpb $0,(%rax)
  369. {$else FPC_PIC}
  370. cmpb $0,IsMultithread
  371. {$endif FPC_PIC}
  372. jz .Linclockednolock
  373. lock
  374. incq (%rdi)
  375. jmp .Linclockedend
  376. .Linclockednolock:
  377. incq (%rdi)
  378. .Linclockedend:
  379. {$endif win64}
  380. end;
  381. function InterLockedDecrement (var Target: longint) : longint; assembler;
  382. asm
  383. {$ifdef win64}
  384. movq %rcx,%rax
  385. {$else win64}
  386. movq %rdi,%rax
  387. {$endif win64}
  388. movl $-1,%edx
  389. xchgq %rdx,%rax
  390. lock
  391. xaddl %eax, (%rdx)
  392. decl %eax
  393. end;
  394. function InterLockedIncrement (var Target: longint) : longint; assembler;
  395. asm
  396. {$ifdef win64}
  397. movq %rcx,%rax
  398. {$else win64}
  399. movq %rdi,%rax
  400. {$endif win64}
  401. movl $1,%edx
  402. xchgq %rdx,%rax
  403. lock
  404. xaddl %eax, (%rdx)
  405. incl %eax
  406. end;
  407. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  408. asm
  409. {$ifdef win64}
  410. xchgl (%rcx),%edx
  411. movl %edx,%eax
  412. {$else win64}
  413. xchgl (%rdi),%esi
  414. movl %esi,%eax
  415. {$endif win64}
  416. end;
  417. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  418. asm
  419. {$ifdef win64}
  420. xchgq %rcx,%rdx
  421. lock
  422. xaddl %ecx, (%rdx)
  423. movl %ecx,%eax
  424. {$else win64}
  425. xchgq %rdi,%rsi
  426. lock
  427. xaddl %edi, (%rsi)
  428. movl %edi,%eax
  429. {$endif win64}
  430. end;
  431. function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler;
  432. asm
  433. {$ifdef win64}
  434. movl %r8d,%eax
  435. lock
  436. cmpxchgl %edx,(%rcx)
  437. {$else win64}
  438. movl %edx,%eax
  439. lock
  440. cmpxchgl %esi,(%rdi)
  441. {$endif win64}
  442. end;
  443. function InterLockedDecrement64 (var Target: int64) : int64; assembler;
  444. asm
  445. {$ifdef win64}
  446. movq %rcx,%rax
  447. {$else win64}
  448. movq %rdi,%rax
  449. {$endif win64}
  450. movq $-1,%rdx
  451. xchgq %rdx,%rax
  452. lock
  453. xaddq %rax, (%rdx)
  454. decq %rax
  455. end;
  456. function InterLockedIncrement64 (var Target: int64) : int64; assembler;
  457. asm
  458. {$ifdef win64}
  459. movq %rcx,%rax
  460. {$else win64}
  461. movq %rdi,%rax
  462. {$endif win64}
  463. movq $1,%rdx
  464. xchgq %rdx,%rax
  465. lock
  466. xaddq %rax, (%rdx)
  467. incq %rax
  468. end;
  469. function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler;
  470. asm
  471. {$ifdef win64}
  472. xchgq (%rcx),%rdx
  473. movq %rdx,%rax
  474. {$else win64}
  475. xchgq (%rdi),%rsi
  476. movq %rsi,%rax
  477. {$endif win64}
  478. end;
  479. function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler;
  480. asm
  481. {$ifdef win64}
  482. xchgq %rcx,%rdx
  483. lock
  484. xaddq %rcx, (%rdx)
  485. movq %rcx,%rax
  486. {$else win64}
  487. xchgq %rdi,%rsi
  488. lock
  489. xaddq %rdi, (%rsi)
  490. movq %rdi,%rax
  491. {$endif win64}
  492. end;
  493. function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler;
  494. asm
  495. {$ifdef win64}
  496. movq %r8,%rax
  497. lock
  498. cmpxchgq %rdx,(%rcx)
  499. {$else win64}
  500. movq %rdx,%rax
  501. lock
  502. cmpxchgq %rsi,(%rdi)
  503. {$endif win64}
  504. end;
  505. {****************************************************************************
  506. FPU
  507. ****************************************************************************}
  508. const
  509. { Internal constants for use in system unit }
  510. FPU_Invalid = 1;
  511. FPU_Denormal = 2;
  512. FPU_DivisionByZero = 4;
  513. FPU_Overflow = 8;
  514. FPU_Underflow = $10;
  515. FPU_StackUnderflow = $20;
  516. FPU_StackOverflow = $40;
  517. FPU_ExceptionMask = $ff;
  518. fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
  519. MM_MaskInvalidOp = %0000000010000000;
  520. MM_MaskDenorm = %0000000100000000;
  521. MM_MaskDivZero = %0000001000000000;
  522. MM_MaskOverflow = %0000010000000000;
  523. MM_MaskUnderflow = %0000100000000000;
  524. MM_MaskPrecision = %0001000000000000;
  525. mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
  526. {$define FPC_SYSTEM_HAS_SYSINITFPU}
  527. Procedure SysInitFPU;
  528. begin
  529. asm
  530. {$ifdef FPC_PIC}
  531. movq fpucw@GOTPCREL(%rip),%rax
  532. fldcw (%rax)
  533. { set sse exceptions }
  534. movq mxcsr@GOTPCREL(%rip),%rax
  535. ldmxcsr (%rax)
  536. {$else FPC_PIC}
  537. fldcw fpucw
  538. { set sse exceptions }
  539. ldmxcsr mxcsr
  540. {$endif FPC_PIC}
  541. end ['RAX'];
  542. { x86-64 might use softfloat code }
  543. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  544. end;
  545. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  546. Procedure SysResetFPU;
  547. begin
  548. asm
  549. { initialize fpu }
  550. fnclex
  551. fwait
  552. end;
  553. { x86-64 might use softfloat code }
  554. softfloat_exception_flags:=0;
  555. end;
  556. {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
  557. {$define FPC_SYSTEM_HAS_MEM_BARRIER}
  558. procedure ReadBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  559. asm
  560. lfence
  561. end;
  562. procedure ReadDependencyBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  563. asm
  564. { reads imply barrier on earlier reads depended on }
  565. end;
  566. procedure ReadWriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  567. asm
  568. mfence
  569. end;
  570. procedure WriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
  571. asm
  572. { no write reordering on intel CPUs (yet) }
  573. end;
  574. {$endif}