|
@@ -415,32 +415,6 @@ asm
|
|
|
sub $16, %ecx
|
|
|
jl .LFallback { probably dead branch... }
|
|
|
|
|
|
-{$if false}
|
|
|
-{ Scans 16 bytes at a time left to right with early exits.
|
|
|
- Would be better for large enough sets (maybe around 64 bytes or even more) — if they existed, but worse for actually existing 32.
|
|
|
- Kept for the future. }
|
|
|
- push %ebx
|
|
|
- pxor %xmm2, %xmm2 { xmm2 = 0 }
|
|
|
- add %ecx, %eax { set1 += size }
|
|
|
- add %ecx, %edx { set2 += size }
|
|
|
- neg %ecx { Now "size" (ecx) = -(orig.size - 16), "set1" (eax) points to orig.set1 + orig.size - 16, "set2" (edx) points to orig.set2 + orig.size - 16.
|
|
|
- Loop ends on "size" >= 0, leaving up to 16 tail bytes. }
|
|
|
-.L16x_Loop:
|
|
|
- movdqu (%eax,%ecx), %xmm1
|
|
|
- movdqu (%edx,%ecx), %xmm0
|
|
|
- pandn %xmm1, %xmm0
|
|
|
- pcmpeqb %xmm2, %xmm0
|
|
|
- pmovmskb %xmm0, %ebx
|
|
|
- inc %bx
|
|
|
- jnz .LNo
|
|
|
- add $16, %ecx
|
|
|
- js .L16x_Loop
|
|
|
- pop %ebx
|
|
|
-
|
|
|
- movdqu (%eax), %xmm1
|
|
|
- movdqu (%edx), %xmm0
|
|
|
- pandn %xmm1, %xmm0
|
|
|
-{$else}
|
|
|
{ Folds all 16-byte "set1 and not set2" chunks with OR and checks the final result for zero. Better for small enough sets. }
|
|
|
movdqu (%eax), %xmm1
|
|
|
movdqu (%edx), %xmm2
|
|
@@ -455,7 +429,6 @@ asm
|
|
|
ja .L16x_Loop
|
|
|
|
|
|
pxor %xmm0, %xmm0
|
|
|
-{$endif}
|
|
|
pcmpeqb %xmm2,%xmm0
|
|
|
pmovmskb %xmm0, %ecx
|
|
|
xor %eax, %eax
|
|
@@ -466,12 +439,6 @@ asm
|
|
|
.LFallback:
|
|
|
add $16, %ecx
|
|
|
jmp fpc_varset_contains_sets_plain
|
|
|
-
|
|
|
-{$if false}
|
|
|
-.LNo:
|
|
|
- xor %eax, %eax
|
|
|
- pop %ebx
|
|
|
-{$endif}
|
|
|
end;
|
|
|
|
|
|
function fpc_varset_contains_sets_dispatch(const set1,set2;size : ptrint):boolean; forward;
|