Browse Source

Attempt to save push/pop ebx on small non-GPR moves.

Rika Ichinose 1 year ago
parent
commit
ecc56d7e68
1 changed files with 47 additions and 12 deletions
  1. 47 12
      rtl/i386/fastmove.inc

+ 47 - 12
rtl/i386/fastmove.inc

@@ -4,8 +4,12 @@
 { at least valgrind up to 3.3 has a bug which prevents the default code to
 { at least valgrind up to 3.3 has a bug which prevents the default code to
   work so we use a rather simple implementation here }
   work so we use a rather simple implementation here }
 procedure Move_8OrMore_Valgrind; assembler; nostackframe;
 procedure Move_8OrMore_Valgrind; assembler; nostackframe;
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
+  If FPC_PIC: ebx pushed. }
 asm
 asm
+{$ifndef FPC_PIC}
+    push   %ebx
+{$endif}
     sub    %edx, %eax
     sub    %edx, %eax
     jae    .LForward
     jae    .LForward
     mov    %ecx, %ebx
     mov    %ecx, %ebx
@@ -38,7 +42,8 @@ asm
 end;
 end;
 
 
 procedure Move_8OrMore_IA32; assembler; nostackframe;
 procedure Move_8OrMore_IA32; assembler; nostackframe;
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
+  If FPC_PIC: ebx pushed. }
 asm
 asm
     fildq  (%eax)                { First and last 8 bytes, used both in .L33OrMore and ladder ending (.L9to16). }
     fildq  (%eax)                { First and last 8 bytes, used both in .L33OrMore and ladder ending (.L9to16). }
     fildq  -8(%eax,%ecx)
     fildq  -8(%eax,%ecx)
@@ -53,18 +58,25 @@ asm
 .L9to16:
 .L9to16:
     fistpq -8(%edx,%ecx)         { 9–16 bytes }
     fistpq -8(%edx,%ecx)         { 9–16 bytes }
     fistpq (%edx)
     fistpq (%edx)
+{$ifdef FPC_PIC}
     pop    %ebx
     pop    %ebx
+{$endif}
     ret
     ret
 
 
 .Lcancel:
 .Lcancel:
     fucompp                      { Pop two elements loaded at the beginning. }
     fucompp                      { Pop two elements loaded at the beginning. }
+{$ifdef FPC_PIC}
     pop    %ebx
     pop    %ebx
+{$endif}
     ret
     ret
-    .byte  0x66,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16f into a no-op. }
+    .byte  {$ifndef FPC_PIC}102,{$endif}102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16f into a no-op. }
 
 
 .L33OrMore:
 .L33OrMore:
     sub    %edx, %eax            { eax = src - dest }
     sub    %edx, %eax            { eax = src - dest }
     jz     .Lcancel              { exit if src=dest }
     jz     .Lcancel              { exit if src=dest }
+{$ifndef FPC_PIC}
+    push   %ebx
+{$endif}
     jnb    .LForward             { src>dest => forward move }
     jnb    .LForward             { src>dest => forward move }
 
 
     mov    %ecx, %ebx
     mov    %ecx, %ebx
@@ -101,7 +113,7 @@ asm
     fistpq (%ebx)                { Important for <8-byte step between src and dest. }
     fistpq (%ebx)                { Important for <8-byte step between src and dest. }
     pop    %ebx
     pop    %ebx
     ret
     ret
-    .byte  0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16b into a no-op. }
+    .byte  102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16b into a no-op. }
 
 
 { backwards move }
 { backwards move }
 .Lback:
 .Lback:
@@ -137,10 +149,14 @@ asm
 end;
 end;
 
 
 procedure Move_8OrMore_MMX; assembler; nostackframe;
 procedure Move_8OrMore_MMX; assembler; nostackframe;
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
+  If FPC_PIC: ebx pushed. }
 asm
 asm
     cmp    $72, %ecx             { Size at which using MMX becomes worthwhile. }
     cmp    $72, %ecx             { Size at which using MMX becomes worthwhile. }
     jl     Move_8OrMore_IA32
     jl     Move_8OrMore_IA32
+{$ifndef FPC_PIC}
+    push   %ebx
+{$endif}
     movq   (%eax), %mm4          { First and last 8 bytes. }
     movq   (%eax), %mm4          { First and last 8 bytes. }
     movq   -8(%eax,%ecx), %mm5
     movq   -8(%eax,%ecx), %mm5
     sub    %edx, %eax            { eax = src - dest }
     sub    %edx, %eax            { eax = src - dest }
@@ -183,7 +199,7 @@ asm
     emms
     emms
     pop    %ebx
     pop    %ebx
     ret
     ret
-    .byte  0x66,0x66,0x66,0x66,0x66,0x2E,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop16b into a no-op. }
+    .byte  102,102,102,102,102,102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop16b into a no-op. }
 
 
 { backwards move }
 { backwards move }
 .Lback:
 .Lback:
@@ -221,7 +237,8 @@ end;
 
 
 {$ifndef FASTMOVE_DISABLE_SSE}
 {$ifndef FASTMOVE_DISABLE_SSE}
 procedure Move_8OrMore_SSE; assembler; nostackframe;
 procedure Move_8OrMore_SSE; assembler; nostackframe;
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
+  If FPC_PIC: ebx pushed. }
 const
 const
   ErmsThreshold = 1536;
   ErmsThreshold = 1536;
   NtThreshold = 256 * 1024; { this limit must be processor-specific (1/2 L2 cache size) }
   NtThreshold = 256 * 1024; { this limit must be processor-specific (1/2 L2 cache size) }
@@ -235,7 +252,9 @@ asm
     jg     .L33OrMore
     jg     .L33OrMore
     movups %xmm4, (%edx)         { 17–32 bytes }
     movups %xmm4, (%edx)         { 17–32 bytes }
     movups %xmm5, -16(%edx,%ecx)
     movups %xmm5, -16(%edx,%ecx)
+{$ifdef FPC_PIC}
     pop    %ebx
     pop    %ebx
+{$endif}
     ret
     ret
 
 
 .L9to16:
 .L9to16:
@@ -244,13 +263,18 @@ asm
     movq   %xmm0, (%edx)
     movq   %xmm0, (%edx)
     movq   %xmm1, -8(%edx,%ecx)
     movq   %xmm1, -8(%edx,%ecx)
 .Lquit:
 .Lquit:
+{$ifdef FPC_PIC}
     pop    %ebx
     pop    %ebx
+{$endif}
     ret
     ret
-    .byte  0x66,0x66,0x66,0x66,0x66,0x2E,0x0F,0x1F,0x84,0,0,0,0,0 { Turns .balign 16 before .Lloop32f into a no-op. }
+    .byte  {$ifndef FPC_PIC}102,{$endif}102,102,102,102,102,102,102,102,102,102,102,102,102,144 { Turns .balign 16 before .Lloop32f into a no-op. }
 
 
 .L33OrMore:
 .L33OrMore:
     sub    %edx, %eax            { eax = src - dest }
     sub    %edx, %eax            { eax = src - dest }
     jz     .Lquit                { exit if src=dest }
     jz     .Lquit                { exit if src=dest }
+{$ifndef FPC_PIC}
+    push   %ebx
+{$endif}
     jnb    .LForward             { src>dest => forward move }
     jnb    .LForward             { src>dest => forward move }
 
 
     mov    %ecx, %ebx
     mov    %ecx, %ebx
@@ -386,7 +410,7 @@ asm
     sfence
     sfence
     add    $PrefetchDistance+64, %ecx
     add    $PrefetchDistance+64, %ecx
     jmp    .LRestAfterNTf
     jmp    .LRestAfterNTf
-    .byte  0x66,0x0F,0x1F,0x44,0,0 { Turns .balign 16 before .Lloop32b into a no-op. }
+    .byte  102,102,102,102,102,144 { Turns .balign 16 before .Lloop32b into a no-op. }
 
 
 { backwards move }
 { backwards move }
 .Lback:
 .Lback:
@@ -480,8 +504,12 @@ begin
 end;
 end;
 
 
 procedure Move_8OrMore_Dispatch; assembler; nostackframe;
 procedure Move_8OrMore_Dispatch; assembler; nostackframe;
-{ ebx pushed, eax = source, edx = dest, ecx = count (ecx >= 8). }
+{ eax = source, edx = dest, ecx = count (ecx >= 8).
+  If FPC_PIC: ebx pushed. }
 asm
 asm
+{$ifndef FPC_PIC}
+    push %ebx
+{$endif}
     push %eax
     push %eax
     push %edx
     push %edx
     push %ecx
     push %ecx
@@ -490,15 +518,20 @@ asm
     pop  %ecx
     pop  %ecx
     pop  %edx
     pop  %edx
     pop  %eax
     pop  %eax
+{$ifdef FPC_PIC}
     jmp  %ebx
     jmp  %ebx
+{$else}
+    call %ebx
+    pop  %ebx
+{$endif}
 end;
 end;
 
 
 procedure Move(const source;var dest;count:SizeInt); [public, alias: 'FPC_MOVE']; assembler; nostackframe;
 procedure Move(const source;var dest;count:SizeInt); [public, alias: 'FPC_MOVE']; assembler; nostackframe;
 asm
 asm
-    push   %ebx
     cmp    $8, %ecx
     cmp    $8, %ecx
     jle    .L8OrLess
     jle    .L8OrLess
 {$ifdef FPC_PIC}
 {$ifdef FPC_PIC}
+    push   %ebx
     call   fpc_geteipasebx
     call   fpc_geteipasebx
     addl   $_GLOBAL_OFFSET_TABLE_, %ebx
     addl   $_GLOBAL_OFFSET_TABLE_, %ebx
     movl   fastmoveproc@GOT(%ebx), %ebx
     movl   fastmoveproc@GOT(%ebx), %ebx
@@ -510,6 +543,7 @@ asm
 .L8OrLess:
 .L8OrLess:
     cmp    $3, %ecx
     cmp    $3, %ecx
     jle    .L3OrLess
     jle    .L3OrLess
+    push   %ebx
     mov    (%eax), %ebx
     mov    (%eax), %ebx
     mov    -4(%eax,%ecx), %eax
     mov    -4(%eax,%ecx), %eax
     mov    %ebx, (%edx)
     mov    %ebx, (%edx)
@@ -520,14 +554,15 @@ asm
 .L3OrLess:
 .L3OrLess:
     cmp    $1, %ecx
     cmp    $1, %ecx
     jl     .LZero
     jl     .LZero
+    push   %ebx
     movzbl (%eax), %ebx
     movzbl (%eax), %ebx
     je     .LOne
     je     .LOne
     movzwl -2(%eax,%ecx), %eax
     movzwl -2(%eax,%ecx), %eax
     mov    %ax, -2(%edx,%ecx)
     mov    %ax, -2(%edx,%ecx)
 .LOne:
 .LOne:
     mov    %bl, (%edx)
     mov    %bl, (%edx)
-.LZero:
     pop    %ebx
     pop    %ebx
+.LZero:
 end;
 end;
 
 
 {$endif  FPC_SYSTEM_HAS_MOVE}
 {$endif  FPC_SYSTEM_HAS_MOVE}