Browse Source

Improved CompareDWord for i386 and x86_64.

Rika Ichinose 2 years ago
parent
commit
d36e96ea74
2 changed files with 87 additions and 48 deletions
  1. 26 48
      rtl/i386/i386.inc
  2. 61 0
      rtl/x86_64/x86_64.inc

+ 26 - 48
rtl/i386/i386.inc

@@ -639,59 +639,37 @@ end;
 {$define FPC_SYSTEM_HAS_COMPAREDWORD}
 function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
 asm
-        cmpl    $32,%ecx          { empirical average value, on a Athlon XP the
-                                    break even is at 12, on a Core 2 Duo > 100 }
-        jg      .LCmpDWordFull
-        testl   %ecx,%ecx
-        je      .LCmpDWordZero
-
-        pushl   %ebx
-.LCmpDWordLoop:
-        movl    (%eax),%ebx
-        cmpl    (%edx),%ebx
-        leal    4(%eax),%eax
-        leal    4(%edx),%edx
-        jne     .LCmpDWordExitFast
-        decl    %ecx
-        jne     .LCmpDWordLoop
-.LCmpDWordExitFast:
-        xorl    %eax,%eax
-        movl    -4(%edx),%edx       // Compare failing (or equal) position
-        subl    %edx,%ebx           // calculate end result.
-        setb    %dl
-        seta    %cl
-        addb    %cl,%al
-        subb    %dl,%al
-        movsbl  %al,%eax
+        cmp     $536870912, %ecx
+        push    %ebx
+        jnb     .LUnbounded
+        lea     (%eax,%ecx,4), %ebx
+        cmp     %ebx, %eax
+        je      .LNothing
 
-        popl    %ebx
+.balign 16
+.LDwordwise_Body:
+        mov     (%edx), %ecx
+        cmp     (%eax), %ecx
+        jne     .LDoSbb
+        add     $4, %eax
+        add     $4, %edx
+        cmp     %eax, %ebx
+        jne     .LDwordwise_Body
+.LNothing:
+        xor     %eax, %eax
+        pop     %ebx
         ret
 
-.LCmpDWordZero:
-        movl    $0,%eax
+.LDoSbb:
+        pop     %ebx
+        sbb     %eax, %eax
+        and     $2, %eax
+        sub     $1, %eax
         ret
 
-.LCmpDWordFull:
-        pushl   %esi
-        pushl   %edi
-{$ifdef FPC_ENABLED_CLD}
-        cld
-{$endif FPC_ENABLED_CLD}
-        movl    %eax,%edi
-        movl    %edx,%esi
-        xorl    %eax,%eax
-        repe                     { Compare entire DWords}
-        cmpsl
-        movl    -4(%edi),%edi        // Compare failing (or equal) position
-        subl    -4(%esi),%edi        // calculate end result.
-        setb    %dl
-        seta    %cl
-        addb    %cl,%al
-        subb    %dl,%al
-        movsbl  %al,%eax
-.LCmpDwordExit:
-        popl    %edi
-        popl    %esi
+.LUnbounded:
+        mov     %eax, %ebx
+        jmp     .LDwordwise_Body
 end;
 {$endif FPC_SYSTEM_HAS_COMPAREDWORD}
 

+ 61 - 0
rtl/x86_64/x86_64.inc

@@ -772,6 +772,67 @@ end;
 {$endif FPC_SYSTEM_HAS_COMPAREBYTE}
 
 
+{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
+{$define FPC_SYSTEM_HAS_COMPAREDWORD}
+function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler; nostackframe;
+asm
+{$ifndef win64}
+    mov      %rdx, %r8
+    mov      %rsi, %rdx
+    mov      %rdi, %rcx
+{$endif win64}
+    mov      %r8, %rax
+    shr      $61, %rax
+    jnz      .LUnbounded
+    cmp      $3, %r8
+    lea      (%rcx,%r8,4), %r9
+    jle      .LDwordwise_Test
+    and      $-4, %r8
+    lea      (%rcx,%r8,4), %r8
+.balign 16
+.L4x_Body:
+    movdqu   (%rcx), %xmm1
+    movdqu   (%rdx), %xmm0
+    pcmpeqd  %xmm1, %xmm0
+    pmovmskb %xmm0, %eax
+    xor      $65535, %eax
+    jne      .L4x_Found
+    add      $16, %rcx
+    add      $16, %rdx
+    cmp      %rcx, %r8
+    jne      .L4x_Body
+.LDwordwise_Test:
+    cmp      %rcx, %r9
+    je       .LNothing
+.LDwordwise_Body:
+    mov      (%rcx), %eax
+    cmp      %eax, (%rdx)
+    jne      .LDoSbb
+    add      $4, %rcx
+    add      $4, %rdx
+    cmp      %rcx, %r9
+    jne      .LDwordwise_Body
+.LNothing:
+    xor      %eax, %eax
+    ret
+
+.L4x_Found:
+    bsf      %eax, %eax
+    mov      (%rcx,%rax), %ecx
+    cmp      %ecx, (%rdx,%rax)
+.LDoSbb:
+    sbb      %rax, %rax
+    and      $2, %eax
+    sub      $1, %rax
+    ret
+
+.LUnbounded:
+    mov      %rcx, %r9
+    jmp      .LDwordwise_Body
+end;
+{$endif FPC_SYSTEM_HAS_COMPAREDWORD}
+
+
 {$define FPC_SYSTEM_HAS_DECLOCKED_LONGINT}
 { does a thread save inc/dec }
 function declocked(var l : longint) : boolean;assembler; nostackframe;