Browse Source

* patch by Rika: Trivial adjustments to !379, resolves #40168

florian 2 years ago
parent
commit
7cc94fc000
1 changed files with 21 additions and 18 deletions
  1. 21 18
      rtl/x86_64/x86_64.inc

+ 21 - 18
rtl/x86_64/x86_64.inc

@@ -705,22 +705,8 @@ asm
     and      $-16, %r9
     and      $-16, %r9
     add      %rcx, %r9 { r9 = end of full XMMs in buf1 }
     add      %rcx, %r9 { r9 = end of full XMMs in buf1 }
     cmp      %r9, %rcx
     cmp      %r9, %rcx
-    je       .L16x_Tail
-.balign 16
-.L16x_Body:
-    movdqu   (%rdx), %xmm0
-    movdqu   (%rcx), %xmm1
-    pcmpeqb  %xmm1, %xmm0
-    pmovmskb %xmm0, %eax
-    xor      $65535, %eax
-    jne      .L16x_Found
-    add      $16, %rcx
-    add      $16, %rdx
-    cmp      %rcx, %r9
     jne      .L16x_Body
     jne      .L16x_Body
 .L16x_Tail:
 .L16x_Tail:
-    cmp      %r9, %r10
-    je       .LNothing
     lea      15(%r9), %eax { check if tails don't cross page boundaries and can be over-read to XMMs }
     lea      15(%r9), %eax { check if tails don't cross page boundaries and can be over-read to XMMs }
     lea      15(%rdx), %ecx
     lea      15(%rdx), %ecx
     xor      %r9d, %eax
     xor      %r9d, %eax
@@ -733,21 +719,38 @@ asm
     pcmpeqb  %xmm1, %xmm0
     pcmpeqb  %xmm1, %xmm0
     pmovmskb %xmm0, %eax
     pmovmskb %xmm0, %eax
     xor      $65535, %eax
     xor      $65535, %eax
-    je       .LNothing
+    je       .L16x_Nothing
     bsf      %eax, %ecx
     bsf      %eax, %ecx
     add      %rcx, %r9
     add      %rcx, %r9
     cmp      %r10, %r9 { ignore over-read garbage bytes }
     cmp      %r10, %r9 { ignore over-read garbage bytes }
-    jnb      .LNothing
+    jnb      .L16x_Nothing
     movzbl   (%r9), %eax
     movzbl   (%r9), %eax
     movzbl   (%rdx,%rcx), %edx
     movzbl   (%rdx,%rcx), %edx
     sub      %rdx, %rax
     sub      %rdx, %rax
     ret
     ret
 
 
+.balign 16
+.L16x_Body:
+    movdqu   (%rdx), %xmm0
+    movdqu   (%rcx), %xmm1
+    pcmpeqb  %xmm1, %xmm0
+    pmovmskb %xmm0, %eax
+    xor      $65535, %eax
+    jne      .L16x_Found
+    add      $16, %rcx
+    add      $16, %rdx
+    cmp      %rcx, %r9
+    jne      .L16x_Body
+    cmp      %r9, %r10
+    jne      .L16x_Tail
+.L16x_Nothing:
+    xor      %eax, %eax
+    ret
+
 .L16x_Found:
 .L16x_Found:
     bsf      %eax, %eax
     bsf      %eax, %eax
-    movzbl   (%rcx,%rax), %ecx
     movzbl   (%rdx,%rax), %edx
     movzbl   (%rdx,%rax), %edx
-    mov      %rcx, %rax
+    movzbl   (%rcx,%rax), %eax
     sub      %rdx, %rax
     sub      %rdx, %rax
     ret
     ret