|
@@ -4,8 +4,12 @@
|
|
{ at least valgrind up to 3.3 has a bug which prevents the default code to
|
|
{ at least valgrind up to 3.3 has a bug which prevents the default code to
|
|
work so we use a rather simple implementation here }
|
|
work so we use a rather simple implementation here }
|
|
procedure Move_8OrMore_Valgrind; assembler; nostackframe;
|
|
procedure Move_8OrMore_Valgrind; assembler; nostackframe;
|
|
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
|
|
|
|
|
|
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
|
|
|
|
+ If FPC_PIC: ebx pushed. }
|
|
asm
|
|
asm
|
|
|
|
+{$ifndef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
|
|
+{$endif}
|
|
sub %edx, %eax
|
|
sub %edx, %eax
|
|
jae .LForward
|
|
jae .LForward
|
|
mov %ecx, %ebx
|
|
mov %ecx, %ebx
|
|
@@ -38,7 +42,8 @@ asm
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure Move_8OrMore_IA32; assembler; nostackframe;
|
|
procedure Move_8OrMore_IA32; assembler; nostackframe;
|
|
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
|
|
|
|
|
|
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
|
|
|
|
+ If FPC_PIC: ebx pushed. }
|
|
asm
|
|
asm
|
|
fildq (%eax) { First and last 8 bytes, used both in .L33OrMore and ladder ending (.L9to16). }
|
|
fildq (%eax) { First and last 8 bytes, used both in .L33OrMore and ladder ending (.L9to16). }
|
|
fildq -8(%eax,%ecx)
|
|
fildq -8(%eax,%ecx)
|
|
@@ -53,18 +58,25 @@ asm
|
|
.L9to16:
|
|
.L9to16:
|
|
fistpq -8(%edx,%ecx) { 9–16 bytes }
|
|
fistpq -8(%edx,%ecx) { 9–16 bytes }
|
|
fistpq (%edx)
|
|
fistpq (%edx)
|
|
|
|
+{$ifdef FPC_PIC}
|
|
pop %ebx
|
|
pop %ebx
|
|
|
|
+{$endif}
|
|
ret
|
|
ret
|
|
|
|
|
|
.Lcancel:
|
|
.Lcancel:
|
|
fucompp { Pop two elements loaded at the beginning. }
|
|
fucompp { Pop two elements loaded at the beginning. }
|
|
|
|
+{$ifdef FPC_PIC}
|
|
pop %ebx
|
|
pop %ebx
|
|
|
|
+{$endif}
|
|
ret
|
|
ret
|
|
- .byte 0x66,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16f into a no-op. }
|
|
|
|
|
|
+ .byte {$ifndef FPC_PIC}102,{$endif}102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16f into a no-op. }
|
|
|
|
|
|
.L33OrMore:
|
|
.L33OrMore:
|
|
sub %edx, %eax { eax = src - dest }
|
|
sub %edx, %eax { eax = src - dest }
|
|
jz .Lcancel { exit if src=dest }
|
|
jz .Lcancel { exit if src=dest }
|
|
|
|
+{$ifndef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
|
|
+{$endif}
|
|
jnb .LForward { src>dest => forward move }
|
|
jnb .LForward { src>dest => forward move }
|
|
|
|
|
|
mov %ecx, %ebx
|
|
mov %ecx, %ebx
|
|
@@ -101,7 +113,7 @@ asm
|
|
fistpq (%ebx) { Important for <8-byte step between src and dest. }
|
|
fistpq (%ebx) { Important for <8-byte step between src and dest. }
|
|
pop %ebx
|
|
pop %ebx
|
|
ret
|
|
ret
|
|
- .byte 0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16b into a no-op. }
|
|
|
|
|
|
+ .byte 102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16b into a no-op. }
|
|
|
|
|
|
{ backwards move }
|
|
{ backwards move }
|
|
.Lback:
|
|
.Lback:
|
|
@@ -137,10 +149,14 @@ asm
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure Move_8OrMore_MMX; assembler; nostackframe;
|
|
procedure Move_8OrMore_MMX; assembler; nostackframe;
|
|
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
|
|
|
|
|
|
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
|
|
|
|
+ If FPC_PIC: ebx pushed. }
|
|
asm
|
|
asm
|
|
cmp $72, %ecx { Size at which using MMX becomes worthwhile. }
|
|
cmp $72, %ecx { Size at which using MMX becomes worthwhile. }
|
|
jl Move_8OrMore_IA32
|
|
jl Move_8OrMore_IA32
|
|
|
|
+{$ifndef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
|
|
+{$endif}
|
|
movq (%eax), %mm4 { First and last 8 bytes. }
|
|
movq (%eax), %mm4 { First and last 8 bytes. }
|
|
movq -8(%eax,%ecx), %mm5
|
|
movq -8(%eax,%ecx), %mm5
|
|
sub %edx, %eax { eax = src - dest }
|
|
sub %edx, %eax { eax = src - dest }
|
|
@@ -183,7 +199,7 @@ asm
|
|
emms
|
|
emms
|
|
pop %ebx
|
|
pop %ebx
|
|
ret
|
|
ret
|
|
- .byte 0x66,0x66,0x66,0x66,0x66,0x2E,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16b into a no-op. }
|
|
|
|
|
|
+ .byte 102,102,102,102,102,102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16b into a no-op. }
|
|
|
|
|
|
{ backwards move }
|
|
{ backwards move }
|
|
.Lback:
|
|
.Lback:
|
|
@@ -221,7 +237,8 @@ end;
|
|
|
|
|
|
{$ifndef FASTMOVE_DISABLE_SSE}
|
|
{$ifndef FASTMOVE_DISABLE_SSE}
|
|
procedure Move_8OrMore_SSE; assembler; nostackframe;
|
|
procedure Move_8OrMore_SSE; assembler; nostackframe;
|
|
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
|
|
|
|
|
|
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
|
|
|
|
+ If FPC_PIC: ebx pushed. }
|
|
const
|
|
const
|
|
ErmsThreshold = 1536;
|
|
ErmsThreshold = 1536;
|
|
NtThreshold = 256 * 1024; { this limit must be processor-specific (1/2 L2 cache size) }
|
|
NtThreshold = 256 * 1024; { this limit must be processor-specific (1/2 L2 cache size) }
|
|
@@ -235,7 +252,9 @@ asm
|
|
jg .L33OrMore
|
|
jg .L33OrMore
|
|
movups %xmm4, (%edx) { 17–32 bytes }
|
|
movups %xmm4, (%edx) { 17–32 bytes }
|
|
movups %xmm5, -16(%edx,%ecx)
|
|
movups %xmm5, -16(%edx,%ecx)
|
|
|
|
+{$ifdef FPC_PIC}
|
|
pop %ebx
|
|
pop %ebx
|
|
|
|
+{$endif}
|
|
ret
|
|
ret
|
|
|
|
|
|
.L9to16:
|
|
.L9to16:
|
|
@@ -244,13 +263,18 @@ asm
|
|
movq %xmm0, (%edx)
|
|
movq %xmm0, (%edx)
|
|
movq %xmm1, -8(%edx,%ecx)
|
|
movq %xmm1, -8(%edx,%ecx)
|
|
.Lquit:
|
|
.Lquit:
|
|
|
|
+{$ifdef FPC_PIC}
|
|
pop %ebx
|
|
pop %ebx
|
|
|
|
+{$endif}
|
|
ret
|
|
ret
|
|
- .byte 0x66,0x66,0x66,0x66,0x66,0x2E,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop32f into a no-op. }
|
|
|
|
|
|
+ .byte {$ifndef FPC_PIC}102,{$endif}102,102,102,102,102,102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop32f into a no-op. }
|
|
|
|
|
|
.L33OrMore:
|
|
.L33OrMore:
|
|
sub %edx, %eax { eax = src - dest }
|
|
sub %edx, %eax { eax = src - dest }
|
|
jz .Lquit { exit if src=dest }
|
|
jz .Lquit { exit if src=dest }
|
|
|
|
+{$ifndef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
|
|
+{$endif}
|
|
jnb .LForward { src>dest => forward move }
|
|
jnb .LForward { src>dest => forward move }
|
|
|
|
|
|
mov %ecx, %ebx
|
|
mov %ecx, %ebx
|
|
@@ -386,7 +410,7 @@ asm
|
|
sfence
|
|
sfence
|
|
add $PrefetchDistance+64, %ecx
|
|
add $PrefetchDistance+64, %ecx
|
|
jmp .LRestAfterNTf
|
|
jmp .LRestAfterNTf
|
|
- .byte 0x66,0x0F,0x1F,0x44,0,0 { Turns .balign 16 before .Lloop32b into a no-op. }
|
|
|
|
|
|
+ .byte 102,102,102,102,102,144 { Turns .balign 16 before .Lloop32b into a no-op. }
|
|
|
|
|
|
{ backwards move }
|
|
{ backwards move }
|
|
.Lback:
|
|
.Lback:
|
|
@@ -480,8 +504,12 @@ begin
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure Move_8OrMore_Dispatch; assembler; nostackframe;
|
|
procedure Move_8OrMore_Dispatch; assembler; nostackframe;
|
|
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
|
|
|
|
|
|
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
|
|
|
|
+ If FPC_PIC: ebx pushed. }
|
|
asm
|
|
asm
|
|
|
|
+{$ifndef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
|
|
+{$endif}
|
|
push %eax
|
|
push %eax
|
|
push %edx
|
|
push %edx
|
|
push %ecx
|
|
push %ecx
|
|
@@ -490,15 +518,20 @@ asm
|
|
pop %ecx
|
|
pop %ecx
|
|
pop %edx
|
|
pop %edx
|
|
pop %eax
|
|
pop %eax
|
|
|
|
+{$ifdef FPC_PIC}
|
|
jmp %ebx
|
|
jmp %ebx
|
|
|
|
+{$else}
|
|
|
|
+ call %ebx
|
|
|
|
+ pop %ebx
|
|
|
|
+{$endif}
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure Move(const source;var dest;count:SizeInt); [public, alias: 'FPC_MOVE']; assembler; nostackframe;
|
|
procedure Move(const source;var dest;count:SizeInt); [public, alias: 'FPC_MOVE']; assembler; nostackframe;
|
|
asm
|
|
asm
|
|
- push %ebx
|
|
|
|
cmp $8, %ecx
|
|
cmp $8, %ecx
|
|
jle .L8OrLess
|
|
jle .L8OrLess
|
|
{$ifdef FPC_PIC}
|
|
{$ifdef FPC_PIC}
|
|
|
|
+ push %ebx
|
|
call fpc_geteipasebx
|
|
call fpc_geteipasebx
|
|
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
addl $_GLOBAL_OFFSET_TABLE_, %ebx
|
|
movl fastmoveproc@GOT(%ebx), %ebx
|
|
movl fastmoveproc@GOT(%ebx), %ebx
|
|
@@ -510,6 +543,7 @@ asm
|
|
.L8OrLess:
|
|
.L8OrLess:
|
|
cmp $3, %ecx
|
|
cmp $3, %ecx
|
|
jle .L3OrLess
|
|
jle .L3OrLess
|
|
|
|
+ push %ebx
|
|
mov (%eax), %ebx
|
|
mov (%eax), %ebx
|
|
mov -4(%eax,%ecx), %eax
|
|
mov -4(%eax,%ecx), %eax
|
|
mov %ebx, (%edx)
|
|
mov %ebx, (%edx)
|
|
@@ -520,14 +554,15 @@ asm
|
|
.L3OrLess:
|
|
.L3OrLess:
|
|
cmp $1, %ecx
|
|
cmp $1, %ecx
|
|
jl .LZero
|
|
jl .LZero
|
|
|
|
+ push %ebx
|
|
movzbl (%eax), %ebx
|
|
movzbl (%eax), %ebx
|
|
je .LOne
|
|
je .LOne
|
|
movzwl -2(%eax,%ecx), %eax
|
|
movzwl -2(%eax,%ecx), %eax
|
|
mov %ax, -2(%edx,%ecx)
|
|
mov %ax, -2(%edx,%ecx)
|
|
.LOne:
|
|
.LOne:
|
|
mov %bl, (%edx)
|
|
mov %bl, (%edx)
|
|
-.LZero:
|
|
|
|
pop %ebx
|
|
pop %ebx
|
|
|
|
+.LZero:
|
|
end;
|
|
end;
|
|
|
|
|
|
{$endif FPC_SYSTEM_HAS_MOVE}
|
|
{$endif FPC_SYSTEM_HAS_MOVE}
|