1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090 |
- {
- This file is part of the Free Pascal run time library.
- Copyright (c) 2002 by Florian Klaempfl.
- Member of the Free Pascal development team
- Parts of this code are derived from the x86-64 linux port
- Copyright 2002 Andi Kleen
- Processor dependent implementation for the system unit for
- the x86-64 architecture
- See the file COPYING.FPC, included in this distribution,
- for details about the copyright.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- **********************************************************************}
- {$asmmode GAS}
- {****************************************************************************
- Primitives
- ****************************************************************************}
- {$define FPC_SYSTEM_HAS_SPTR}
- Function Sptr : Pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- movq %rsp,%rax
- end ['RAX'];
- {$IFNDEF INTERNAL_BACKTRACE}
- {$define FPC_SYSTEM_HAS_GET_FRAME}
- function get_frame:pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- movq %rbp,%rax
- end ['RAX'];
- {$ENDIF not INTERNAL_BACKTRACE}
- {$define FPC_SYSTEM_HAS_GET_CALLER_ADDR}
- function get_caller_addr(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- {$ifdef win64}
- orq %rcx,%rcx
- jz .Lg_a_null
- movq 8(%rcx),%rax
- {$else win64}
- { %rdi = framebp }
- orq %rdi,%rdi
- jz .Lg_a_null
- movq 8(%rdi),%rax
- {$endif win64}
- .Lg_a_null:
- end ['RAX'];
- {$define FPC_SYSTEM_HAS_GET_CALLER_FRAME}
- function get_caller_frame(framebp:pointer):pointer;assembler;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- {$ifdef win64}
- orq %rcx,%rcx
- jz .Lg_a_null
- movq (%rcx),%rax
- {$else win64}
- { %rdi = framebp }
- orq %rdi,%rdi
- jz .Lg_a_null
- movq (%rdi),%rax
- {$endif win64}
- .Lg_a_null:
- end ['RAX'];
- {$ifndef FPC_SYSTEM_HAS_MOVE}
- {$define FPC_SYSTEM_HAS_MOVE}
- procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;nostackframe;
- { Linux: rdi source, rsi dest, rdx count
- win64: rcx source, rdx dest, r8 count }
- asm
- {$ifndef win64}
- mov %rdx, %r8
- mov %rsi, %rdx
- mov %rdi, %rcx
- {$endif win64}
- mov %r8, %rax
- sub %rdx, %rcx { rcx = src - dest }
- jz .Lquit { exit if src=dest }
- jnb .L1 { src>dest => forward move }
- add %rcx, %rax { rcx is negative => r8+rcx > 0 if regions overlap }
- jb .Lback { if no overlap, still do forward move }
- .L1:
- cmp $8, %r8
- jl .Lless8f { signed compare, negative count not allowed }
- test $7, %dl
- je .Ldestaligned
- test $1, %dl { align dest by moving first 1+2+4 bytes }
- je .L2f
- mov (%rcx,%rdx,1),%al
- dec %r8
- mov %al, (%rdx)
- add $1, %rdx
- .L2f:
- test $2, %dl
- je .L4f
- mov (%rcx,%rdx,1),%ax
- sub $2, %r8
- mov %ax, (%rdx)
- add $2, %rdx
- .L4f:
- test $4, %dl
- je .Ldestaligned
- mov (%rcx,%rdx,1),%eax
- sub $4, %r8
- mov %eax, (%rdx)
- add $4, %rdx
- .Ldestaligned:
- mov %r8, %r9
- shr $5, %r9
- jne .Lmore32
- .Ltail:
- mov %r8, %r9
- shr $3, %r9
- je .Lless8f
- .balign 16
- .Lloop8f: { max. 8 iterations }
- mov (%rcx,%rdx,1),%rax
- mov %rax, (%rdx)
- add $8, %rdx
- dec %r9
- jne .Lloop8f
- and $7, %r8
- .Lless8f:
- test %r8, %r8
- jle .Lquit
- .balign 16
- .Lloop1f:
- mov (%rcx,%rdx,1),%al
- mov %al,(%rdx)
- inc %rdx
- dec %r8
- jne .Lloop1f
- .Lquit:
- retq
- .Lmore32:
- cmp $0x2000, %r9 { this limit must be processor-specific (1/2 L2 cache size) }
- jnae .Lloop32
- cmp $0x1000, %rcx { but don't bother bypassing cache if src and dest }
- jnb .Lntloopf { are close to each other}
- .balign 16
- .Lloop32:
- add $32,%rdx
- mov -32(%rcx,%rdx,1),%rax
- mov -24(%rcx,%rdx,1),%r10
- mov %rax,-32(%rdx)
- mov %r10,-24(%rdx)
- dec %r9
- mov -16(%rcx,%rdx,1),%rax
- mov -8(%rcx,%rdx,1),%r10
- mov %rax,-16(%rdx)
- mov %r10,-8(%rdx)
- jne .Lloop32
- and $0x1f, %r8
- jmpq .Ltail
- .Lntloopf:
- mov $32, %eax
- .balign 16
- .Lpref:
- prefetchnta (%rcx,%rdx,1)
- prefetchnta 0x40(%rcx,%rdx,1)
- add $0x80, %rdx
- dec %eax
- jne .Lpref
- sub $0x1000, %rdx
- mov $64, %eax
- .balign 16
- .Loop64:
- add $64, %rdx
- mov -64(%rcx,%rdx,1), %r9
- mov -56(%rcx,%rdx,1), %r10
- movnti %r9, -64(%rdx)
- movnti %r10, -56(%rdx)
- mov -48(%rcx,%rdx,1), %r9
- mov -40(%rcx,%rdx,1), %r10
- movnti %r9, -48(%rdx)
- movnti %r10, -40(%rdx)
- dec %eax
- mov -32(%rcx,%rdx,1), %r9
- mov -24(%rcx,%rdx,1), %r10
- movnti %r9, -32(%rdx)
- movnti %r10, -24(%rdx)
- mov -16(%rcx,%rdx,1), %r9
- mov -8(%rcx,%rdx,1), %r10
- movnti %r9, -16(%rdx)
- movnti %r10, -8(%rdx)
- jne .Loop64
- sub $0x1000, %r8
- cmp $0x1000, %r8
- jae .Lntloopf
- mfence
- jmpq .Ldestaligned { go handle remaining bytes }
- { backwards move }
- .Lback:
- add %r8, %rdx { points to the end of dest }
- cmp $8, %r8
- jl .Lless8b { signed compare, negative count not allowed }
- test $7, %dl
- je .Ldestalignedb
- test $1, %dl
- je .L2b
- dec %rdx
- mov (%rcx,%rdx,1), %al
- dec %r8
- mov %al, (%rdx)
- .L2b:
- test $2, %dl
- je .L4b
- sub $2, %rdx
- mov (%rcx,%rdx,1), %ax
- sub $2, %r8
- mov %ax, (%rdx)
- .L4b:
- test $4, %dl
- je .Ldestalignedb
- sub $4, %rdx
- mov (%rcx,%rdx,1), %eax
- sub $4, %r8
- mov %eax, (%rdx)
- .Ldestalignedb:
- mov %r8, %r9
- shr $5, %r9
- jne .Lmore32b
- .Ltailb:
- mov %r8, %r9
- shr $3, %r9
- je .Lless8b
- .Lloop8b:
- sub $8, %rdx
- mov (%rcx,%rdx,1), %rax
- dec %r9
- mov %rax, (%rdx)
- jne .Lloop8b
- and $7, %r8
- .Lless8b:
- test %r8, %r8
- jle .Lquit2
- .balign 16
- .Lsmallb:
- dec %rdx
- mov (%rcx,%rdx,1), %al
- dec %r8
- mov %al,(%rdx)
- jnz .Lsmallb
- .Lquit2:
- retq
- .Lmore32b:
- cmp $0x2000, %r9
- jnae .Lloop32b
- cmp $0xfffffffffffff000,%rcx
- jb .Lntloopb
- .balign 16
- .Lloop32b:
- sub $32, %rdx
- mov 24(%rcx,%rdx,1), %rax
- mov 16(%rcx,%rdx,1), %r10
- mov %rax, 24(%rdx)
- mov %r10, 16(%rdx)
- dec %r9
- mov 8(%rcx,%rdx,1),%rax
- mov (%rcx,%rdx,1), %r10
- mov %rax, 8(%rdx)
- mov %r10, (%rdx)
- jne .Lloop32b
- and $0x1f, %r8
- jmpq .Ltailb
- .Lntloopb:
- mov $32, %eax
- .balign 16
- .Lprefb:
- sub $0x80, %rdx
- prefetchnta (%rcx,%rdx,1)
- prefetchnta 0x40(%rcx,%rdx,1)
- dec %eax
- jnz .Lprefb
- add $0x1000, %rdx
- mov $0x40, %eax
- .balign 16
- .Lloop64b:
- sub $64, %rdx
- mov 56(%rcx,%rdx,1), %r9
- mov 48(%rcx,%rdx,1), %r10
- movnti %r9, 56(%rdx)
- movnti %r10, 48(%rdx)
- mov 40(%rcx,%rdx,1), %r9
- mov 32(%rcx,%rdx,1), %r10
- movnti %r9, 40(%rdx)
- movnti %r10, 32(%rdx)
- dec %eax
- mov 24(%rcx,%rdx,1), %r9
- mov 16(%rcx,%rdx,1), %r10
- movnti %r9, 24(%rdx)
- movnti %r10, 16(%rdx)
- mov 8(%rcx,%rdx,1), %r9
- mov (%rcx,%rdx,1), %r10
- movnti %r9, 8(%rdx)
- movnti %r10, (%rdx)
- jne .Lloop64b
- sub $0x1000, %r8
- cmp $0x1000, %r8
- jae .Lntloopb
- mfence
- jmpq .Ldestalignedb
- end;
- {$endif FPC_SYSTEM_HAS_MOVE}
- {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
- {$define FPC_SYSTEM_HAS_FILLCHAR}
- Procedure FillChar(var x;count:SizeInt;value:byte);assembler;nostackframe;
- asm
- { win64: rcx dest, rdx count, r8b value
- linux: rdi dest, rsi count, rdx value }
- {$ifndef win64}
- mov %rdx, %r8
- mov %rsi, %rdx
- mov %rdi, %rcx
- {$endif win64}
- cmp $8, %rdx
- jl .Ltiny
- { expand byte value }
- movzbl %r8b, %r8
- mov $0x0101010101010101,%r9
- imul %r9, %r8
- test $7, %cl
- je .Laligned
- { align dest to 8 bytes }
- test $1, %cl
- je .L2
- movb %r8b, (%rcx)
- add $1, %rcx
- sub $1, %rdx
- .L2:
- test $2, %cl
- je .L4
- movw %r8w, (%rcx)
- add $2, %rcx
- sub $2, %rdx
- .L4:
- test $4, %cl
- je .Laligned
- movl %r8d, (%rcx)
- add $4, %rcx
- sub $4, %rdx
- .Laligned:
- mov %rdx, %rax
- and $0x3f, %rdx
- shr $6, %rax
- jne .Lmore64
- .Lless64:
- mov %rdx, %rax
- and $7, %rdx
- shr $3, %rax
- je .Ltiny
- .balign 16
- .Lloop8: { max. 8 iterations }
- mov %r8, (%rcx)
- add $8, %rcx
- dec %rax
- jne .Lloop8
- .Ltiny:
- test %rdx, %rdx
- jle .Lquit
- .Lloop1:
- movb %r8b, (%rcx)
- inc %rcx
- dec %rdx
- jnz .Lloop1
- .Lquit:
- retq
- .Lmore64:
- cmp $0x2000,%rax
- jae .Lloop64nti
- .balign 16
- .Lloop64:
- add $64, %rcx
- mov %r8, -64(%rcx)
- mov %r8, -56(%rcx)
- mov %r8, -48(%rcx)
- mov %r8, -40(%rcx)
- dec %rax
- mov %r8, -32(%rcx)
- mov %r8, -24(%rcx)
- mov %r8, -16(%rcx)
- mov %r8, -8(%rcx)
- jne .Lloop64
- jmp .Lless64
- .balign 16
- .Lloop64nti:
- add $64, %rcx
- movnti %r8, -64(%rcx)
- movnti %r8, -56(%rcx)
- movnti %r8, -48(%rcx)
- movnti %r8, -40(%rcx)
- dec %rax
- movnti %r8, -32(%rcx)
- movnti %r8, -24(%rcx)
- movnti %r8, -16(%rcx)
- movnti %r8, -8(%rcx)
- jnz .Lloop64nti
- mfence
- jmp .Lless64
- end;
- {$endif FPC_SYSTEM_HAS_FILLCHAR}
- {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
- { based on libc/sysdeps/x86_64/memchr.S }
- {$define FPC_SYSTEM_HAS_INDEXBYTE}
- function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler; nostackframe;
- { win64: rcx buf, rdx len, r8b word
- linux: rdi buf, rsi len, rdx word }
- asm
- {$ifdef win64}
- movd %r8d, %xmm1
- {$else}
- movd %edx, %xmm1
- movq %rdi, %rcx
- movq %rsi, %rdx
- {$endif}
- mov %rcx, %rax { duplicate buf }
- punpcklbw %xmm1, %xmm1
- and $0xfffffffffffffff0, %rax
- test %rdx, %rdx
- punpcklbw %xmm1, %xmm1
- jz .L3 { exit if len=0 }
- orl $0xffffffff, %r8d
- movdqa (%rax), %xmm0 { Fetch first 16 bytes (up to 15 bytes before target) }
- pshufd $0, %xmm1, %xmm1
- sub %rax, %rcx { rcx=misalignment }
- pcmpeqb %xmm1, %xmm0
- add %rcx, %rdx { add misalignment to length }
- cmovb %r8, %rdx { if it overflows (happens when length=-1), set back to -1, }
- { otherwise loop will terminate too early }
- mov %rcx, %r9 { and save it, will subtract back in the end }
- shl %cl, %r8d
- pmovmskb %xmm0, %ecx
- andl %r8d, %ecx { mask away matches before buffer start }
- movl $16, %r8d
- jnz .L1 { got a match within buffer -> we're done (almost) }
- cmpq %r8, %rdx
- jbe .L3
- .balign 16
- .L2:
- movdqa (%rax,%r8), %xmm0
- lea 16(%r8), %r8
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz .L1
- cmp %r8, %rdx
- ja .L2
- .L3:
- or $-1, %rax
- jmp .Ldone
- .L1:
- bsfl %ecx, %ecx { compute position of the first match }
- lea -16(%rcx,%r8), %rax
- cmp %rax, %rdx
- jbe .L3 { if it is after the specified length, ignore it }
- sub %r9, %rax
- .Ldone:
- end;
- {$endif FPC_SYSTEM_HAS_INDEXBYTE}
- {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
- {$define FPC_SYSTEM_HAS_INDEXWORD}
- function IndexWord(Const buf;len:SizeInt;b:word):SizeInt; assembler; nostackframe;
- { win64: rcx buf, rdx len, r8b word
- linux: rdi buf, rsi len, rdx word }
- asm
- {$ifdef win64}
- movd %r8d, %xmm1
- {$else}
- movd %edx, %xmm1
- movq %rdi, %rcx
- movq %rsi, %rdx
- {$endif}
- mov %rcx, %rax { duplicate buf }
- punpcklwd %xmm1, %xmm1
- and $0xfffffffffffffff0, %rax
- test %rdx, %rdx
- pshufd $0, %xmm1, %xmm1
- jz .L3 { exit if len=0 }
- orl $0xffffffff, %r8d
- test $1, %cl { if buffer isn't aligned to word boundary, }
- jnz .Lunaligned { fallback to slower unaligned loop }
- movdqa (%rax), %xmm0 { Fetch first 16 bytes (up to 14 bytes before target) }
- sub %rax, %rcx { rcx=misalignment }
- pcmpeqw %xmm1, %xmm0
- mov %rcx, %r9
- shr $1, %r9 { save misalignment in words }
- add %r9, %rdx { add misalignment to length }
- cmovb %r8, %rdx { if it overflows (happens when length=-1), set back to -1, }
- { otherwise loop will terminate too early }
- shl %cl, %r8d
- pmovmskb %xmm0, %ecx
- andl %r8d, %ecx { mask away matches before buffer start }
- movl $8, %r8d
- jnz .L1 { got a match within buffer -> we're done (almost) }
- cmpq %r8, %rdx
- jbe .L3
- .balign 16
- .L2:
- movdqa (%rax,%r8,2), %xmm0
- lea 8(%r8), %r8
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz .L1
- cmp %r8, %rdx
- ja .L2
- .L3:
- or $-1, %rax
- jmp .Ldone
- .L1:
- bsfl %ecx, %ecx { compute position of the first match }
- shr $1, %ecx { in words }
- lea -8(%rcx,%r8), %rax
- cmp %rax, %rdx
- jbe .L3 { if it is after the specified length, ignore it }
- sub %r9, %rax
- .Ldone:
- retq
- { TODO: aligned processing is still possible, but for now
- use the simplest form }
- .Lunaligned:
- xor %r9, %r9
- xor %r8, %r8
- mov %rcx, %rax
- .balign 16
- .L2u:
- movdqu (%rax,%r8,2), %xmm0
- lea 8(%r8), %r8
- pcmpeqw %xmm1, %xmm0
- pmovmskb %xmm0, %ecx
- test %ecx, %ecx
- jnz .L1
- cmp %r8, %rdx
- ja .L2u
- or $-1, %rax
- end;
- {$endif FPC_SYSTEM_HAS_INDEXWORD}
- {$asmmode att}
- {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
- { does a thread save inc/dec }
- function declocked(var l : longint) : boolean;assembler;
- asm
- {$ifdef win64}
- {
- l: %rcx
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- jz .Ldeclockednolock
- lock
- decl (%rcx)
- jmp .Ldeclockedend
- .Ldeclockednolock:
- decl (%rcx)
- .Ldeclockedend:
- setzb %al
- {$else win64}
- {
- l: %rdi
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- {$ifdef FPC_PIC}
- movq IsMultithread@GOTPCREL(%rip),%rax
- cmpb $0,(%rax)
- {$else FPC_PIC}
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- {$endif FPC_PIC}
- jz .Ldeclockednolock
- lock
- decl (%rdi)
- jmp .Ldeclockedend
- .Ldeclockednolock:
- decl (%rdi)
- .Ldeclockedend:
- setzb %al
- {$endif win64}
- end;
- {$define FPC_SYSTEM_HAS_DECLOCKED_INT64}
- function declocked(var l : int64) : boolean;assembler;
- asm
- {$ifdef win64}
- {
- l: %rcx
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- jz .Ldeclockednolock
- lock
- decq (%rcx)
- jmp .Ldeclockedend
- .Ldeclockednolock:
- decq (%rcx)
- .Ldeclockedend:
- setzb %al
- {$else win64}
- {
- l: %rdi
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- {$ifdef FPC_PIC}
- movq IsMultithread@GOTPCREL(%rip),%rax
- cmpb $0,(%rax)
- {$else FPC_PIC}
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- {$endif FPC_PIC}
- jz .Ldeclockednolock
- lock
- decq (%rdi)
- jmp .Ldeclockedend
- .Ldeclockednolock:
- decq (%rdi)
- .Ldeclockedend:
- setzb %al
- {$endif win64}
- end;
- {$define FPC_SYSTEM_HAS_INCLOCKED_LONGINT}
- procedure inclocked(var l : longint);assembler;
- asm
- {$ifdef win64}
- {
- l: %rcx
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- jz .Linclockednolock
- lock
- incl (%rcx)
- jmp .Linclockedend
- .Linclockednolock:
- incl (%rcx)
- .Linclockedend:
- {$else win64}
- {
- l: %rdi
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- {$ifdef FPC_PIC}
- movq IsMultithread@GOTPCREL(%rip),%rax
- cmpb $0,(%rax)
- {$else FPC_PIC}
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- {$endif FPC_PIC}
- jz .Linclockednolock
- lock
- incl (%rdi)
- jmp .Linclockedend
- .Linclockednolock:
- incl (%rdi)
- .Linclockedend:
- {$endif win64}
- end;
- {$define FPC_SYSTEM_HAS_INCLOCKED_INT64}
- procedure inclocked(var l : int64);assembler;
- asm
- {$ifdef win64}
- {
- l: %rcx
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- jz .Linclockednolock
- lock
- incq (%rcx)
- jmp .Linclockedend
- .Linclockednolock:
- incq (%rcx)
- .Linclockedend:
- {$else win64}
- {
- l: %rdi
- }
- { this check should be done because a lock takes a lot }
- { of time! }
- {$ifdef FPC_PIC}
- movq IsMultithread@GOTPCREL(%rip),%rax
- cmpb $0,(%rax)
- {$else FPC_PIC}
- cmpb $0,IsMultithread{$ifdef FPC_HAS_RIP_RELATIVE}(%rip){$endif}
- {$endif FPC_PIC}
- jz .Linclockednolock
- lock
- incq (%rdi)
- jmp .Linclockedend
- .Linclockednolock:
- incq (%rdi)
- .Linclockedend:
- {$endif win64}
- end;
- function InterLockedDecrement (var Target: longint) : longint; assembler;
- asm
- {$ifdef win64}
- movq %rcx,%rax
- {$else win64}
- movq %rdi,%rax
- {$endif win64}
- movl $-1,%edx
- xchgq %rdx,%rax
- lock
- xaddl %eax, (%rdx)
- decl %eax
- end;
- function InterLockedIncrement (var Target: longint) : longint; assembler;
- asm
- {$ifdef win64}
- movq %rcx,%rax
- {$else win64}
- movq %rdi,%rax
- {$endif win64}
- movl $1,%edx
- xchgq %rdx,%rax
- lock
- xaddl %eax, (%rdx)
- incl %eax
- end;
- function InterLockedExchange (var Target: longint;Source : longint) : longint; assembler;
- asm
- {$ifdef win64}
- xchgl (%rcx),%edx
- movl %edx,%eax
- {$else win64}
- xchgl (%rdi),%esi
- movl %esi,%eax
- {$endif win64}
- end;
- function InterLockedExchangeAdd (var Target: longint;Source : longint) : longint; assembler;
- asm
- {$ifdef win64}
- xchgq %rcx,%rdx
- lock
- xaddl %ecx, (%rdx)
- movl %ecx,%eax
- {$else win64}
- xchgq %rdi,%rsi
- lock
- xaddl %edi, (%rsi)
- movl %edi,%eax
- {$endif win64}
- end;
- function InterLockedCompareExchange(var Target: longint; NewValue, Comperand : longint): longint; assembler;
- asm
- {$ifdef win64}
- movl %r8d,%eax
- lock
- cmpxchgl %edx,(%rcx)
- {$else win64}
- movl %edx,%eax
- lock
- cmpxchgl %esi,(%rdi)
- {$endif win64}
- end;
- function InterLockedDecrement64 (var Target: int64) : int64; assembler;
- asm
- {$ifdef win64}
- movq %rcx,%rax
- {$else win64}
- movq %rdi,%rax
- {$endif win64}
- movq $-1,%rdx
- xchgq %rdx,%rax
- lock
- xaddq %rax, (%rdx)
- decq %rax
- end;
- function InterLockedIncrement64 (var Target: int64) : int64; assembler;
- asm
- {$ifdef win64}
- movq %rcx,%rax
- {$else win64}
- movq %rdi,%rax
- {$endif win64}
- movq $1,%rdx
- xchgq %rdx,%rax
- lock
- xaddq %rax, (%rdx)
- incq %rax
- end;
- function InterLockedExchange64 (var Target: int64;Source : int64) : int64; assembler;
- asm
- {$ifdef win64}
- xchgq (%rcx),%rdx
- movq %rdx,%rax
- {$else win64}
- xchgq (%rdi),%rsi
- movq %rsi,%rax
- {$endif win64}
- end;
- function InterLockedExchangeAdd64 (var Target: int64;Source : int64) : int64; assembler;
- asm
- {$ifdef win64}
- xchgq %rcx,%rdx
- lock
- xaddq %rcx, (%rdx)
- movq %rcx,%rax
- {$else win64}
- xchgq %rdi,%rsi
- lock
- xaddq %rdi, (%rsi)
- movq %rdi,%rax
- {$endif win64}
- end;
- function InterLockedCompareExchange64(var Target: int64; NewValue, Comperand : int64): int64; assembler;
- asm
- {$ifdef win64}
- movq %r8,%rax
- lock
- cmpxchgq %rdx,(%rcx)
- {$else win64}
- movq %rdx,%rax
- lock
- cmpxchgq %rsi,(%rdi)
- {$endif win64}
- end;
- {****************************************************************************
- FPU
- ****************************************************************************}
- const
- { Internal constants for use in system unit }
- FPU_Invalid = 1;
- FPU_Denormal = 2;
- FPU_DivisionByZero = 4;
- FPU_Overflow = 8;
- FPU_Underflow = $10;
- FPU_StackUnderflow = $20;
- FPU_StackOverflow = $40;
- FPU_ExceptionMask = $ff;
- fpucw : word = $1300 or FPU_StackUnderflow or FPU_Underflow or FPU_Denormal;
- MM_MaskInvalidOp = %0000000010000000;
- MM_MaskDenorm = %0000000100000000;
- MM_MaskDivZero = %0000001000000000;
- MM_MaskOverflow = %0000010000000000;
- MM_MaskUnderflow = %0000100000000000;
- MM_MaskPrecision = %0001000000000000;
- mxcsr : dword = MM_MaskUnderflow or MM_MaskPrecision or MM_MaskDenorm;
- procedure fpc_cpuinit;
- begin
- { don't let libraries influence the FPU cw set by the host program }
- if IsLibrary then
- begin
- Default8087CW:=Get8087CW;
- mxcsr:=GetSSECSR;
- end;
- SysResetFPU;
- if not(IsLibrary) then
- SysInitFPU;
- end;
- {$define FPC_SYSTEM_HAS_SYSINITFPU}
- Procedure SysInitFPU;
- var
- { these locals are so we don't have to hack pic code in the assembler }
- localmxcsr: dword;
- localfpucw: word;
-
- begin
- localmxcsr:=mxcsr;
- localfpucw:=fpucw;
- asm
- fldcw localfpucw
- { set sse exceptions }
- ldmxcsr localmxcsr
- end ['RAX'];
- { x86-64 might use softfloat code }
- softfloat_exception_mask:=float_flag_underflow or float_flag_inexact or float_flag_denormal;
- end;
- {$define FPC_SYSTEM_HAS_SYSRESETFPU}
- Procedure SysResetFPU;
- var
- { these locals are so we don't have to hack pic code in the assembler }
- localmxcsr: dword;
- localfpucw: word;
- begin
- localfpucw:=Default8087CW;
- localmxcsr:=mxcsr;
- asm
- fninit
- fwait
- fldcw localfpucw
- ldmxcsr localmxcsr
- end;
- { x86-64 might use softfloat code }
- softfloat_exception_flags:=0;
- end;
- {$ifndef FPC_SYSTEM_HAS_MEM_BARRIER}
- {$define FPC_SYSTEM_HAS_MEM_BARRIER}
- procedure ReadBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- lfence
- end;
- procedure ReadDependencyBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- { reads imply barrier on earlier reads depended on }
- end;
- procedure ReadWriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- mfence
- end;
- procedure WriteBarrier;assembler;nostackframe;{$ifdef SYSTEMINLINE}inline;{$endif}
- asm
- sfence
- end;
- {$endif}
- {****************************************************************************
- Math Routines
- ****************************************************************************}
- {$define FPC_SYSTEM_HAS_SWAPENDIAN}
- { SwapEndian(<16 Bit>) being inlined is faster than using assembler }
- function SwapEndian(const AValue: SmallInt): SmallInt;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- { the extra Word type cast is necessary because the "AValue shr 8" }
- { is turned into "longint(AValue) shr 8", so if AValue < 0 then }
- { the sign bits from the upper 16 bits are shifted in rather than }
- { zeroes. }
- Result := SmallInt((Word(AValue) shr 8) or (Word(AValue) shl 8));
- end;
- function SwapEndian(const AValue: Word): Word;{$ifdef SYSTEMINLINE}inline;{$endif}
- begin
- Result := Word((AValue shr 8) or (AValue shl 8));
- end;
- function SwapEndian(const AValue: LongInt): LongInt; assembler;
- asm
- {$ifdef win64}
- movl %ecx, %eax
- {$else win64}
- movl %edi, %eax
- {$endif win64}
- bswap %eax
- end;
- function SwapEndian(const AValue: DWord): DWord; assembler;
- asm
- {$ifdef win64}
- movl %ecx, %eax
- {$else win64}
- movl %edi, %eax
- {$endif win64}
- bswap %eax
- end;
- function SwapEndian(const AValue: Int64): Int64; assembler;
- asm
- {$ifdef win64}
- movq %rcx, %rax
- {$else win64}
- movq %rdi, %rax
- {$endif win64}
- bswap %rax
- end;
- function SwapEndian(const AValue: QWord): QWord; assembler;
- asm
- {$ifdef win64}
- movq %rcx, %rax
- {$else win64}
- movq %rdi, %rax
- {$endif win64}
- bswap %rax
- end;
|