x86_64.inc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. {
  2. This file is part of the Free Pascal run time library.
  3. Copyright (c) 2002 by Florian Klaempfl.
  4. Member of the Free Pascal development team
  5. Parts of this code are derived from the x86-64 linux port
  6. Copyright 2002 Andi Kleen
  7. Processor dependent implementation for the system unit for
  8. the x86-64 architecture
  9. See the file COPYING.FPC, included in this distribution,
  10. for details about the copyright.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  14. **********************************************************************}
  15. {$asmmode GAS}
  16. {****************************************************************************
  17. Primitives
  18. ****************************************************************************}
  19. procedure fpc_cpuinit;
  20. begin
  21. SysResetFPU;
  22. end;
  23. {$define FPC_SYSTEM_HAS_SPTR}
  24. Function Sptr : Pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  25. asm
  26. movq %rsp,%rax
  27. end ['RAX'];
  28. {$IFNDEF INTERNAL_BACKTRACE}
  29. {$define FPC_SYSTEM_HAS_GET_FRAME}
  30. function get_frame:pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  31. asm
  32. movq %rbp,%rax
  33. end ['RAX'];
  34. {$ENDIF not INTERNAL_BACKTRACE}
  35. {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
  36. function get_caller_addr(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  37. asm
  38. {$ifdef win64}
  39. orq %rcx,%rcx
  40. jz .Lg_a_null
  41. movq 8(%rcx),%rax
  42. {$else win64}
  43. { %rdi = framebp }
  44. orq %rdi,%rdi
  45. jz .Lg_a_null
  46. movq 8(%rdi),%rax
  47. {$endif win64}
  48. .Lg_a_null:
  49. end ['RAX'];
  50. {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
  51. function get_caller_frame(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
  52. asm
  53. {$ifdef win64}
  54. orq %rcx,%rcx
  55. jz .Lg_a_null
  56. movq (%rcx),%rax
  57. {$else win64}
  58. { %rdi = framebp }
  59. orq %rdi,%rdi
  60. jz .Lg_a_null
  61. movq (%rdi),%rax
  62. {$endif win64}
  63. .Lg_a_null:
  64. end ['RAX'];
  65. (*
  66. {$define FPC_SYSTEM_HAS_MOVE}
  67. procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;
  68. asm
  69. { rdi destination
  70. rsi source
  71. rdx count
  72. }
  73. pushq %rbx
  74. prefetcht0 (%rsi) // for more hopefully the hw prefetch will kick in
  75. movq %rdi,%rax
  76. movl %edi,%ecx
  77. andl $7,%ecx
  78. jnz .Lbad_alignment
  79. .Lafter_bad_alignment:
  80. movq %rdx,%rcx
  81. movl $64,%ebx
  82. shrq $6,%rcx
  83. jz .Lhandle_tail
  84. .Lloop_64:
  85. { no prefetch because we assume the hw prefetcher does it already
  86. and we have no specific temporal hint to give. XXX or give a nta
  87. hint for the source? }
  88. movq (%rsi),%r11
  89. movq 8(%rsi),%r8
  90. movq 2*8(%rsi),%r9
  91. movq 3*8(%rsi),%r10
  92. movnti %r11,(%rdi)
  93. movnti %r8,1*8(%rdi)
  94. movnti %r9,2*8(%rdi)
  95. movnti %r10,3*8(%rdi)
  96. movq 4*8(%rsi),%r11
  97. movq 5*8(%rsi),%r8
  98. movq 6*8(%rsi),%r9
  99. movq 7*8(%rsi),%r10
  100. movnti %r11,4*8(%rdi)
  101. movnti %r8,5*8(%rdi)
  102. movnti %r9,6*8(%rdi)
  103. movnti %r10,7*8(%rdi)
  104. addq %rbx,%rsi
  105. addq %rbx,%rdi
  106. loop .Lloop_64
  107. .Lhandle_tail:
  108. movl %edx,%ecx
  109. andl $63,%ecx
  110. shrl $3,%ecx
  111. jz .Lhandle_7
  112. movl $8,%ebx
  113. .Lloop_8:
  114. movq (%rsi),%r8
  115. movnti %r8,(%rdi)
  116. addq %rbx,%rdi
  117. addq %rbx,%rsi
  118. loop .Lloop_8
  119. .Lhandle_7:
  120. movl %edx,%ecx
  121. andl $7,%ecx
  122. jz .Lende
  123. .Lloop_1:
  124. movb (%rsi),%r8b
  125. movb %r8b,(%rdi)
  126. incq %rdi
  127. incq %rsi
  128. loop .Lloop_1
  129. jmp .Lende
  130. { align destination }
  131. { This is simpleminded. For bigger blocks it may make sense to align
  132. src and dst to their aligned subset and handle the rest separately }
  133. .Lbad_alignment:
  134. movl $8,%r9d
  135. subl %ecx,%r9d
  136. movl %r9d,%ecx
  137. subq %r9,%rdx
  138. js .Lsmall_alignment
  139. jz .Lsmall_alignment
  140. .Lalign_1:
  141. movb (%rsi),%r8b
  142. movb %r8b,(%rdi)
  143. incq %rdi
  144. incq %rsi
  145. loop .Lalign_1
  146. jmp .Lafter_bad_alignment
  147. .Lsmall_alignment:
  148. addq %r9,%rdx
  149. jmp .Lhandle_7
  150. .Lende:
  151. sfence
  152. popq %rbx
  153. end;
  154. *)
  155. (*
  156. {$define FPC_SYSTEM_HAS_FILLCHAR}
  157. Procedure FillChar(var x;count:longint;value:byte);assembler;
  158. asm
  159. { rdi destination
  160. rsi value (char)
  161. rdx count (bytes)
  162. }
  163. movq %rdi,%r10
  164. movq %rdx,%r11
  165. { expand byte value }
  166. movzbl %sil,%ecx
  167. movabs $0x0101010101010101,%rax
  168. mul %rcx { with rax, clobbers rdx }
  169. { align dst }
  170. movl %edi,%r9d
  171. andl $7,%r9d
  172. jnz .Lbad_alignment
  173. .Lafter_bad_alignment:
  174. movq %r11,%rcx
  175. movl $64,%r8d
  176. shrq $6,%rcx
  177. jz .Lhandle_tail
  178. .Lloop_64:
  179. movnti %rax,(%rdi)
  180. movnti %rax,8(%rdi)
  181. movnti %rax,16(%rdi)
  182. movnti %rax,24(%rdi)
  183. movnti %rax,32(%rdi)
  184. movnti %rax,40(%rdi)
  185. movnti %rax,48(%rdi)
  186. movnti %rax,56(%rdi)
  187. addq %r8,%rdi
  188. loop .Lloop_64
  189. { Handle tail in loops. The loops should be faster than hard
  190. to predict jump tables. }
  191. .Lhandle_tail:
  192. movl %r11d,%ecx
  193. andl $56,%ecx
  194. jz .Lhandle_7
  195. shrl $3,%ecx
  196. .Lloop_8:
  197. movnti %rax,(%rdi)
  198. addq $8,%rdi
  199. loop .Lloop_8
  200. .Lhandle_7:
  201. movl %r11d,%ecx
  202. andl $7,%ecx
  203. jz .Lende
  204. .Lloop_1:
  205. movb %al,(%rdi)
  206. addq $1,%rdi
  207. loop .Lloop_1
  208. jmp .Lende
  209. .Lbad_alignment:
  210. cmpq $7,%r11
  211. jbe .Lhandle_7
  212. movnti %rax,(%rdi) (* unaligned store *)
  213. movq $8,%r8
  214. subq %r9,%r8
  215. addq %r8,%rdi
  216. subq %r8,%r11
  217. jmp .Lafter_bad_alignment
  218. .Lende:
  219. movq %r10,%rax
  220. end;
  221. *)
  222. {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
  223. { does a thread save inc/dec }
  224. function declocked(var l : longint) : boolean;assembler;
  225. asm
  226. {$ifdef win64}
  227. {
  228. l: %rcx
  229. }
  230. { this check should be done because a lock takes a lot }
  231. { of time! }
  232. cmpb $0,IsMultithread
  233. jz .Ldeclockednolock
  234. lock
  235. decl (%rcx)
  236. jmp .Ldeclockedend
  237. .Ldeclockednolock:
  238. decl (%rcx)
  239. .Ldeclockedend:
  240. setzb %al
  241. {$else win64}
  242. {
  243. l: %rdi
  244. }
  245. { this check should be done because a lock takes a lot }
  246. { of time! }
  247. {$ifdef FPC_PIC}
  248. movq IsMultithread@GOTPCREL(%rip),%rax
  249. cmpb $0,(%rax)
  250. {$else FPC_PIC}
  251. cmpb $0,IsMultithread
  252. {$endif FPC_PIC}
  253. jz .Ldeclockednolock
  254. lock
  255. decl (%rdi)
  256. jmp .Ldeclockedend
  257. .Ldeclockednolock:
  258. decl (%rdi)
  259. .Ldeclockedend:
  260. setzb %al
  261. {$endif win64}
  262. end;
  263. {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
  264. function declocked(var l : int64) : boolean;assembler;
  265. asm
  266. {$ifdef win64}
  267. {
  268. l: %rcx
  269. }
  270. { this check should be done because a lock takes a lot }
  271. { of time! }
  272. cmpb $0,IsMultithread
  273. jz .Ldeclockednolock
  274. lock
  275. decq (%rcx)
  276. jmp .Ldeclockedend
  277. .Ldeclockednolock:
  278. decq (%rcx)
  279. .Ldeclockedend:
  280. setzb %al
  281. {$else win64}
  282. {
  283. l: %rdi
  284. }
  285. { this check should be done because a lock takes a lot }
  286. { of time! }
  287. {$ifdef FPC_PIC}
  288. movq IsMultithread@GOTPCREL(%rip),%rax
  289. cmpb $0,(%rax)
  290. {$else FPC_PIC}
  291. cmpb $0,IsMultithread
  292. {$endif FPC_PIC}
  293. jz .Ldeclockednolock
  294. lock
  295. decq (%rdi)
  296. jmp .Ldeclockedend
  297. .Ldeclockednolock:
  298. decq (%rdi)
  299. .Ldeclockedend:
  300. setzb %al
  301. {$endif win64}
  302. end;
  303. {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
  304. procedure inclocked(var l : longint);assembler;
  305. asm
  306. {$ifdef win64}
  307. {
  308. l: %rcx
  309. }
  310. { this check should be done because a lock takes a lot }
  311. { of time! }
  312. cmpb $0,IsMultithread
  313. jz .Linclockednolock
  314. lock
  315. incl (%rcx)
  316. jmp .Linclockedend
  317. .Linclockednolock:
  318. incl (%rcx)
  319. .Linclockedend:
  320. {$else win64}
  321. {
  322. l: %rdi
  323. }
  324. { this check should be done because a lock takes a lot }
  325. { of time! }
  326. {$ifdef FPC_PIC}
  327. movq IsMultithread@GOTPCREL(%rip),%rax
  328. cmpb $0,(%rax)
  329. {$else FPC_PIC}
  330. cmpb $0,IsMultithread
  331. {$endif FPC_PIC}
  332. jz .Linclockednolock
  333. lock
  334. incl (%rdi)
  335. jmp .Linclockedend
  336. .Linclockednolock:
  337. incl (%rdi)
  338. .Linclockedend:
  339. {$endif win64}
  340. end;
  341. {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
  342. procedure inclocked(var l : int64);assembler;
  343. asm
  344. {$ifdef win64}
  345. {
  346. l: %rcx
  347. }
  348. { this check should be done because a lock takes a lot }
  349. { of time! }
  350. cmpb $0,IsMultithread
  351. jz .Linclockednolock
  352. lock
  353. incq (%rcx)
  354. jmp .Linclockedend
  355. .Linclockednolock:
  356. incq (%rcx)
  357. .Linclockedend:
  358. {$else win64}
  359. {
  360. l: %rdi
  361. }
  362. { this check should be done because a lock takes a lot }
  363. { of time! }
  364. {$ifdef FPC_PIC}
  365. movq IsMultithread@GOTPCREL(%rip),%rax
  366. cmpb $0,(%rax)
  367. {$else FPC_PIC}
  368. cmpb $0,IsMultithread
  369. {$endif FPC_PIC}
  370. jz .Linclockednolock
  371. lock
  372. incq (%rdi)
  373. jmp .Linclockedend
  374. .Linclockednolock:
  375. incq (%rdi)
  376. .Linclockedend:
  377. {$endif win64}
  378. end;
  379. function InterLockedDecrement (var Target: longint) : longint; assembler;
  380. asm
  381. {$ifdef win64}
  382. movq %rcx,%rax
  383. {$else win64}
  384. movq %rdi,%rax
  385. {$endif win64}
  386. movl $-1,%edx
  387. xchgq %rdx,%rax
  388. lock
  389. xaddl %eax, (%rdx)
  390. decl %eax
  391. end;
  392. function InterLockedIncrement (var Target: longint) : longint; assembler;
  393. asm
  394. {$ifdef win64}
  395. movq %rcx,%rax
  396. {$else win64}
  397. movq %rdi,%rax
  398. {$endif win64}
  399. movl $1,%edx
  400. xchgq %rdx,%rax
  401. lock
  402. xaddl %eax, (%rdx)
  403. incl %eax
  404. end;
  405. function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
  406. asm
  407. {$ifdef win64}
  408. xchgl (%rcx),%edx
  409. movl %edx,%eax
  410. {$else win64}
  411. xchgl (%rdi),%esi
  412. movl %esi,%eax
  413. {$endif win64}
  414. end;
  415. function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
  416. asm
  417. {$ifdef win64}
  418. xchgq %rcx,%rdx
  419. lock
  420. xaddl %ecx, (%rdx)
  421. movl %ecx,%eax
  422. {$else win64}
  423. xchgq %rdi,%rsi
  424. lock
  425. xaddl %edi, (%rsi)
  426. movl %edi,%eax
  427. {$endif win64}
  428. end;
  429. function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler;
  430. asm
  431. {$ifdef win64}
  432. movl %r8d,%eax
  433. lock
  434. cmpxchgl %edx,(%rcx)
  435. {$else win64}
  436. movl %edx,%eax
  437. lock
  438. cmpxchgl %esi,(%rdi)
  439. {$endif win64}
  440. end;
  441. function InterLockedDecrement64 (var Target: int64) : int64; assembler;
  442. asm
  443. {$ifdef win64}
  444. movq %rcx,%rax
  445. {$else win64}
  446. movq %rdi,%rax
  447. {$endif win64}
  448. movq $-1,%rdx
  449. xchgq %rdx,%rax
  450. lock
  451. xaddq %rax, (%rdx)
  452. decq %rax
  453. end;
  454. function InterLockedIncrement64 (var Target: int64) : int64; assembler;
  455. asm
  456. {$ifdef win64}
  457. movq %rcx,%rax
  458. {$else win64}
  459. movq %rdi,%rax
  460. {$endif win64}
  461. movq $1,%rdx
  462. xchgq %rdx,%rax
  463. lock
  464. xaddq %rax, (%rdx)
  465. incq %rax
  466. end;
  467. function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler;
  468. asm
  469. {$ifdef win64}
  470. xchgq (%rcx),%rdx
  471. movq %rdx,%rax
  472. {$else win64}
  473. xchgq (%rdi),%rsi
  474. movq %rsi,%rax
  475. {$endif win64}
  476. end;
  477. function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler;
  478. asm
  479. {$ifdef win64}
  480. xchgq %rcx,%rdx
  481. lock
  482. xaddq %rcx, (%rdx)
  483. movq %rcx,%rax
  484. {$else win64}
  485. xchgq %rdi,%rsi
  486. lock
  487. xaddq %rdi, (%rsi)
  488. movq %rdi,%rax
  489. {$endif win64}
  490. end;
  491. function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler;
  492. asm
  493. {$ifdef win64}
  494. movq %r8,%rax
  495. lock
  496. cmpxchgq %rdx,(%rcx)
  497. {$else win64}
  498. movq %rdx,%rax
  499. lock
  500. cmpxchgq %rsi,(%rdi)
  501. {$endif win64}
  502. end;
  503. {****************************************************************************
  504. FPU
  505. ****************************************************************************}
  506. const
  507. { Internal constants for use in system unit }
  508. FPU_Invalid = 1;
  509. FPU_Denormal = 2;
  510. FPU_DivisionByZero = 4;
  511. FPU_Overflow = 8;
  512. FPU_Underflow = $10;
  513. FPU_StackUnderflow = $20;
  514. FPU_StackOverflow = $40;
  515. FPU_ExceptionMask = $ff;
  516. fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
  517. MM_MaskInvalidOp = %0000000010000000;
  518. MM_MaskDenorm = %0000000100000000;
  519. MM_MaskDivZero = %0000001000000000;
  520. MM_MaskOverflow = %0000010000000000;
  521. MM_MaskUnderflow = %0000100000000000;
  522. MM_MaskPrecision = %0001000000000000;
  523. mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
  524. {$define FPC_SYSTEM_HAS_SYSRESETFPU}
  525. Procedure SysResetFPU;
  526. begin
  527. asm
  528. { Win64 uses the fpu for ln etc. so we've to reset it as well
  529. $ifndef WIN64}
  530. { initialize fpu }
  531. fninit
  532. fwait
  533. { $endif WIN64}
  534. {$ifdef FPC_PIC}
  535. movq fpucw@GOTPCREL(%rip),%rax
  536. fldcw (%rax)
  537. { set sse exceptions }
  538. movq mxcsr@GOTPCREL(%rip),%rax
  539. ldmxcsr (%rax)
  540. {$else FPC_PIC}
  541. fldcw fpucw
  542. { set sse exceptions }
  543. ldmxcsr mxcsr
  544. {$endif FPC_PIC}
  545. end ['RAX'];
  546. { x86-64 might use softfloat code }
  547. softfloat_exception_flags:=0;
  548. softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
  549. end;