|
@@ -14,10 +14,14 @@
|
|
**********************************************************************}
|
|
**********************************************************************}
|
|
|
|
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
-procedure fpc_varset_add_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
|
|
|
|
|
|
+label
|
|
|
|
+ fpc_varset_add_sets_plain_fallback;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_add_sets_plain(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
asm
|
|
asm
|
|
push %ebx
|
|
push %ebx
|
|
|
|
+fpc_varset_add_sets_plain_fallback:
|
|
push %esi
|
|
push %esi
|
|
mov 12(%esp), %esi { esi = size }
|
|
mov 12(%esp), %esi { esi = size }
|
|
sub $4, %esi
|
|
sub $4, %esi
|
|
@@ -48,14 +52,60 @@ asm
|
|
pop %esi
|
|
pop %esi
|
|
pop %ebx
|
|
pop %ebx
|
|
end;
|
|
end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_add_sets_sse(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
|
|
+asm
|
|
|
|
+ push %ebx
|
|
|
|
+ mov 8(%esp), %ebx
|
|
|
|
+ sub $16, %ebx { ebx = position }
|
|
|
|
+ jl fpc_varset_add_sets_plain_fallback { probably dead branch... }
|
|
|
|
+
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movups (%eax,%ebx), %xmm0
|
|
|
|
+ movups (%edx,%ebx), %xmm1
|
|
|
|
+ orps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx,%ebx)
|
|
|
|
+ sub $16, %ebx
|
|
|
|
+ ja .L16x_Loop
|
|
|
|
+
|
|
|
|
+ movups (%eax), %xmm0 { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
|
|
|
|
+ movups (%edx), %xmm1
|
|
|
|
+ orps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx)
|
|
|
|
+ pop %ebx
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_add_sets_dispatch(const set1,set2; var dest;size : ptrint); forward;
|
|
|
|
+
|
|
|
|
+var
|
|
|
|
+ fpc_varset_add_sets_impl: procedure(const set1,set2; var dest;size : ptrint) = @fpc_varset_add_sets_dispatch;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_add_sets_dispatch(const set1,set2; var dest;size : ptrint);
|
|
|
|
+begin
|
|
|
|
+ if has_sse_support then
|
|
|
|
+ fpc_varset_add_sets_impl:=@fpc_varset_add_sets_sse
|
|
|
|
+ else
|
|
|
|
+ fpc_varset_add_sets_impl:=@fpc_varset_add_sets_plain;
|
|
|
|
+ fpc_varset_add_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_add_sets(const set1,set2; var dest;size : ptrint); compilerproc; inline;
|
|
|
|
+begin
|
|
|
|
+ fpc_varset_add_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_ADD_SETS}
|
|
|
|
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
-procedure fpc_varset_mul_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
|
|
|
|
-{ Same as fpc_varset_add_sets but with 'and' instead of 'or'. }
|
|
|
|
|
|
+label
|
|
|
|
+ fpc_varset_mul_sets_plain_fallback;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_mul_sets_plain(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ Same as fpc_varset_add_sets_plain but with 'and' instead of 'or'. }
|
|
asm
|
|
asm
|
|
push %ebx
|
|
push %ebx
|
|
|
|
+fpc_varset_mul_sets_plain_fallback:
|
|
push %esi
|
|
push %esi
|
|
mov 12(%esp), %esi { esi = size }
|
|
mov 12(%esp), %esi { esi = size }
|
|
sub $4, %esi
|
|
sub $4, %esi
|
|
@@ -86,14 +136,60 @@ asm
|
|
pop %esi
|
|
pop %esi
|
|
pop %ebx
|
|
pop %ebx
|
|
end;
|
|
end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_mul_sets_sse(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ Same as fpc_varset_add_sets_sse but with 'and' instead of 'or'. }
|
|
|
|
+asm
|
|
|
|
+ push %ebx
|
|
|
|
+ mov 8(%esp), %ebx
|
|
|
|
+ sub $16, %ebx { ebx = position }
|
|
|
|
+ jl fpc_varset_mul_sets_plain_fallback { probably dead branch... }
|
|
|
|
+
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movups (%eax,%ebx), %xmm0
|
|
|
|
+ movups (%edx,%ebx), %xmm1
|
|
|
|
+ andps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx,%ebx)
|
|
|
|
+ sub $16, %ebx
|
|
|
|
+ ja .L16x_Loop
|
|
|
|
+
|
|
|
|
+ movups (%eax), %xmm0 { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
|
|
|
|
+ movups (%edx), %xmm1
|
|
|
|
+ andps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx)
|
|
|
|
+ pop %ebx
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_mul_sets_dispatch(const set1,set2; var dest;size : ptrint); forward;
|
|
|
|
+
|
|
|
|
+var
|
|
|
|
+ fpc_varset_mul_sets_impl: procedure(const set1,set2; var dest;size : ptrint) = @fpc_varset_mul_sets_dispatch;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_mul_sets_dispatch(const set1,set2; var dest;size : ptrint);
|
|
|
|
+begin
|
|
|
|
+ if has_sse_support then
|
|
|
|
+ fpc_varset_mul_sets_impl:=@fpc_varset_mul_sets_sse
|
|
|
|
+ else
|
|
|
|
+ fpc_varset_mul_sets_impl:=@fpc_varset_mul_sets_plain;
|
|
|
|
+ fpc_varset_mul_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_mul_sets(const set1,set2; var dest;size : ptrint); compilerproc; inline;
|
|
|
|
+begin
|
|
|
|
+ fpc_varset_mul_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_MUL_SETS}
|
|
|
|
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
-procedure fpc_varset_sub_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
|
|
|
|
|
|
+label
|
|
|
|
+ fpc_varset_sub_sets_plain_fallback;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_sub_sets_plain(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
asm
|
|
asm
|
|
push %ebx
|
|
push %ebx
|
|
|
|
+fpc_varset_sub_sets_plain_fallback:
|
|
push %esi
|
|
push %esi
|
|
mov 12(%esp), %esi { esi = size }
|
|
mov 12(%esp), %esi { esi = size }
|
|
sub $4, %esi
|
|
sub $4, %esi
|
|
@@ -129,15 +225,62 @@ asm
|
|
pop %esi
|
|
pop %esi
|
|
pop %ebx
|
|
pop %ebx
|
|
end;
|
|
end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_sub_sets_sse(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
|
|
+asm
|
|
|
|
+ push %ebx
|
|
|
|
+ mov 8(%esp), %ebx
|
|
|
|
+ sub $16, %ebx { ebx = position }
|
|
|
|
+ jl fpc_varset_sub_sets_plain_fallback { probably dead branch... }
|
|
|
|
+
|
|
|
|
+ movups (%eax), %xmm1 { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
|
|
|
|
+ movups (%edx), %xmm2 { Precalculated because operation is not idempotent and dest can be equal to set1/set2. }
|
|
|
|
+ andnps %xmm1, %xmm2
|
|
|
|
+
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movups (%eax,%ebx), %xmm1
|
|
|
|
+ movups (%edx,%ebx), %xmm0
|
|
|
|
+ andnps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx,%ebx)
|
|
|
|
+ sub $16, %ebx
|
|
|
|
+ ja .L16x_Loop
|
|
|
|
+
|
|
|
|
+ movups %xmm2, (%ecx) { Write precalculated tail. }
|
|
|
|
+ pop %ebx
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_sub_sets_dispatch(const set1,set2; var dest;size : ptrint); forward;
|
|
|
|
+
|
|
|
|
+var
|
|
|
|
+ fpc_varset_sub_sets_impl: procedure(const set1,set2; var dest;size : ptrint) = @fpc_varset_sub_sets_dispatch;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_sub_sets_dispatch(const set1,set2; var dest;size : ptrint);
|
|
|
|
+begin
|
|
|
|
+ if has_sse_support then
|
|
|
|
+ fpc_varset_sub_sets_impl:=@fpc_varset_sub_sets_sse
|
|
|
|
+ else
|
|
|
|
+ fpc_varset_sub_sets_impl:=@fpc_varset_sub_sets_plain;
|
|
|
|
+ fpc_varset_sub_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_sub_sets(const set1,set2; var dest;size : ptrint); compilerproc; inline;
|
|
|
|
+begin
|
|
|
|
+ fpc_varset_sub_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SUB_SETS}
|
|
|
|
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
-procedure fpc_varset_symdif_sets(const set1,set2; var dest;size : ptrint); compilerproc; assembler; nostackframe;
|
|
|
|
-{ Same as fpc_varset_mul_sets but with 'xor' instead of 'and not'.
|
|
|
|
|
|
+label
|
|
|
|
+ fpc_varset_symdif_sets_plain_fallback;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_symdif_sets_plain(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ Same as fpc_varset_sub_sets_plain but with 'xor' instead of 'and not'.
|
|
eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
asm
|
|
asm
|
|
push %ebx
|
|
push %ebx
|
|
|
|
+fpc_varset_symdif_sets_plain_fallback:
|
|
push %esi
|
|
push %esi
|
|
mov 12(%esp), %esi { esi = size }
|
|
mov 12(%esp), %esi { esi = size }
|
|
sub $4, %esi
|
|
sub $4, %esi
|
|
@@ -170,11 +313,55 @@ asm
|
|
pop %esi
|
|
pop %esi
|
|
pop %ebx
|
|
pop %ebx
|
|
end;
|
|
end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_symdif_sets_sse(const set1,set2; var dest;size : ptrint); assembler; nostackframe;
|
|
|
|
+{ Same as fpc_varset_sub_sets_sse but with 'xor' instead of 'and not'.
|
|
|
|
+ eax = set1, edx = set2, ecx = dest, [esp + 4] = size }
|
|
|
|
+asm
|
|
|
|
+ push %ebx
|
|
|
|
+ mov 8(%esp), %ebx
|
|
|
|
+ sub $16, %ebx { ebx = position }
|
|
|
|
+ jl fpc_varset_symdif_sets_plain_fallback { probably dead branch... }
|
|
|
|
+
|
|
|
|
+ movups (%eax), %xmm1 { Tail, just in case (if size is always divisible by 16, 16x_Loop can be altered to handle everything instead). }
|
|
|
|
+ movups (%edx), %xmm2 { Precalculated because operation is not idempotent and dest can be equal to set1/set2. }
|
|
|
|
+ xorps %xmm1, %xmm2
|
|
|
|
+
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movups (%eax,%ebx), %xmm1
|
|
|
|
+ movups (%edx,%ebx), %xmm0
|
|
|
|
+ xorps %xmm1, %xmm0
|
|
|
|
+ movups %xmm0, (%ecx,%ebx)
|
|
|
|
+ sub $16, %ebx
|
|
|
|
+ ja .L16x_Loop
|
|
|
|
+
|
|
|
|
+ movups %xmm2, (%ecx) { Write precalculated tail. }
|
|
|
|
+ pop %ebx
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_symdif_sets_dispatch(const set1,set2; var dest;size : ptrint); forward;
|
|
|
|
+
|
|
|
|
+var
|
|
|
|
+ fpc_varset_symdif_sets_impl: procedure(const set1,set2; var dest;size : ptrint) = @fpc_varset_symdif_sets_dispatch;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_symdif_sets_dispatch(const set1,set2; var dest;size : ptrint);
|
|
|
|
+begin
|
|
|
|
+ if has_sse_support then
|
|
|
|
+ fpc_varset_symdif_sets_impl:=@fpc_varset_symdif_sets_sse
|
|
|
|
+ else
|
|
|
|
+ fpc_varset_symdif_sets_impl:=@fpc_varset_symdif_sets_plain;
|
|
|
|
+ fpc_varset_symdif_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
|
|
+
|
|
|
|
+procedure fpc_varset_symdif_sets(const set1,set2; var dest;size : ptrint); compilerproc; inline;
|
|
|
|
+begin
|
|
|
|
+ fpc_varset_symdif_sets_impl(set1,set2,dest,size);
|
|
|
|
+end;
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
{$define FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_SYMDIF_SETS}
|
|
|
|
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
{$ifndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
-function fpc_varset_contains_sets(const set1,set2;size : ptrint):boolean; compilerproc; assembler; nostackframe;
|
|
|
|
|
|
+function fpc_varset_contains_sets_plain(const set1,set2;size : ptrint):boolean; assembler; nostackframe;
|
|
{ eax = set1, edx = set2, ecx = size }
|
|
{ eax = set1, edx = set2, ecx = size }
|
|
asm
|
|
asm
|
|
push %ebx
|
|
push %ebx
|
|
@@ -221,292 +408,90 @@ asm
|
|
mov $1, %eax
|
|
mov $1, %eax
|
|
pop %ebx
|
|
pop %ebx
|
|
end;
|
|
end;
|
|
-{$define FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
|
|
-{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
|
|
-
|
|
|
|
-{ the following code is exactly big endian set-related, but specific to the old
|
|
|
|
- scheme whereby sets were either 4 or 32 bytes. I've left the routines here
|
|
|
|
- so if someone wants to, they can create equivalents of the new varset helpers
|
|
|
|
- from rtl/inc/genset.inc
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-{$ifdef FPC_OLD_BIGENDIAN_SETS}
|
|
|
|
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_LOAD_SMALL}
|
|
|
|
-function fpc_set_load_small(l: fpc_small_set): fpc_normal_set;assembler;[public,alias:'FPC_SET_LOAD_SMALL']; compilerproc;
|
|
|
|
-{
|
|
|
|
- load a normal set p from a smallset l
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveedi : longint;
|
|
|
|
-asm
|
|
|
|
- movl %edi,saveedi
|
|
|
|
- movl __RESULT,%edi
|
|
|
|
- movl l,%eax
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- stosl
|
|
|
|
- xorl %eax,%eax
|
|
|
|
- movl $7,%ecx
|
|
|
|
- rep
|
|
|
|
- stosl
|
|
|
|
- movl saveedi,%edi
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_CREATE_ELEMENT}
|
|
|
|
-
|
|
|
|
-function fpc_set_create_element(b : byte): fpc_normal_set;assembler;[public,alias:'FPC_SET_CREATE_ELEMENT']; compilerproc;
|
|
|
|
-{
|
|
|
|
- create a new set in p from an element b
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveedi : longint;
|
|
|
|
-asm
|
|
|
|
- movl %edi,saveedi
|
|
|
|
- movl __RESULT,%edi
|
|
|
|
- movzbl b,%edx
|
|
|
|
- xorl %eax,%eax
|
|
|
|
- movl $8,%ecx
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- rep
|
|
|
|
- stosl
|
|
|
|
- leal -32(%edi),%eax
|
|
|
|
- btsl %edx,(%eax)
|
|
|
|
- movl saveedi,%edi
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_SET_BYTE}
|
|
|
|
-function fpc_set_set_byte(const source: fpc_normal_set; b : byte): fpc_normal_set;assembler; compilerproc;
|
|
|
|
-{
|
|
|
|
- add the element b to the set pointed by source
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveesi,saveedi : longint;
|
|
|
|
-asm
|
|
|
|
- movl %edi,saveedi
|
|
|
|
- movl %esi,saveesi
|
|
|
|
- movl source,%esi
|
|
|
|
- movl __RESULT,%edi
|
|
|
|
- movzbl b,%edx
|
|
|
|
- movl $8,%ecx
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- rep
|
|
|
|
- movsl
|
|
|
|
- leal -32(%edi),%eax
|
|
|
|
- btsl %edx,(%eax)
|
|
|
|
- movl saveedi,%edi
|
|
|
|
- movl saveesi,%esi
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_UNSET_BYTE}
|
|
|
|
-function fpc_set_unset_byte(const source: fpc_normal_set; b : byte): fpc_normal_set;assembler; compilerproc;
|
|
|
|
-{
|
|
|
|
- add the element b to the set pointed by source
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveesi,saveedi : longint;
|
|
|
|
-asm
|
|
|
|
- movl %edi,saveedi
|
|
|
|
- movl %esi,saveesi
|
|
|
|
- movl source,%esi
|
|
|
|
- movl __RESULT,%edi
|
|
|
|
- movzbl b,%edx
|
|
|
|
- movl $8,%ecx
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- rep
|
|
|
|
- movsl
|
|
|
|
- leal -32(%edi),%eax
|
|
|
|
- btrl %edx,(%eax)
|
|
|
|
- movl saveedi,%edi
|
|
|
|
- movl saveesi,%esi
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_SET_RANGE}
|
|
|
|
-
|
|
|
|
-function fpc_set_set_range(const orgset: fpc_normal_set; l,h : byte): fpc_normal_set;assembler; compilerproc;
|
|
|
|
-{
|
|
|
|
- adds the range [l..h] to the set pointed to by p
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveh : byte;
|
|
|
|
- saveesi,saveedi,saveebx : longint;
|
|
|
|
|
|
+function fpc_varset_contains_sets_sse2(const set1,set2;size : ptrint):boolean; assembler; nostackframe;
|
|
|
|
+{ eax = set1, edx = set2, ecx = size }
|
|
asm
|
|
asm
|
|
- movl %edi,saveedi
|
|
|
|
- movl %esi,saveesi
|
|
|
|
- movl %ebx,saveebx
|
|
|
|
- movl __RESULT,%edi // target set address in edi
|
|
|
|
- movl orgset, %esi // source set address in esi
|
|
|
|
- movzbl l,%eax // lowest bit to be set in eax
|
|
|
|
- movzbl h,%ebx // highest in ebx
|
|
|
|
- movb %bl,saveh
|
|
|
|
- movl $8,%ecx // we have to copy 32 bytes
|
|
|
|
- cmpl %eax,%ebx // high < low?
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- rep // copy source to dest (it's possible to do the range
|
|
|
|
- movsl // setting and copying simultanuously of course, but
|
|
|
|
- // that would result in many more jumps and code)
|
|
|
|
- movl %eax,%ecx // lowest also in ecx
|
|
|
|
- jb .Lset_range_done // if high > low, then dest := source
|
|
|
|
- shrl $3,%eax // divide by 8 to get starting and ending byte
|
|
|
|
- shrl $3,%ebx // address
|
|
|
|
- andb $31,%cl // low five bits of lo determine start of bit mask
|
|
|
|
- andl $0x0fffffffc,%eax // clear two lowest bits to get start/end longint
|
|
|
|
- subl $32,%edi // get back to start of dest
|
|
|
|
- andl $0x0fffffffc,%ebx // address * 4
|
|
|
|
- movl $0x0ffffffff,%edx // edx = bitmask to be inserted
|
|
|
|
- shll %cl,%edx // shift bitmask to clear bits below lo
|
|
|
|
- addl %eax,%edi // go to starting pos in set
|
|
|
|
- subl %eax,%ebx // are bit lo and hi in the same longint?
|
|
|
|
- jz .Lset_range_hi // yes, keep current mask and adjust for hi bit
|
|
|
|
- orl %edx,(%edi) // no, store current mask
|
|
|
|
- movl $0x0ffffffff,%edx // new mask
|
|
|
|
- addl $4,%edi // next longint of set
|
|
|
|
- subl $4,%ebx // bit hi in this longint?
|
|
|
|
- jz .Lset_range_hi // yes, keep full mask and adjust for hi bit
|
|
|
|
-.Lset_range_loop:
|
|
|
|
- movl %edx,(%edi) // no, fill longints in between with full mask
|
|
|
|
- addl $4,%edi
|
|
|
|
- subl $4,%ebx
|
|
|
|
- jnz .Lset_range_loop
|
|
|
|
-.Lset_range_hi:
|
|
|
|
- movb saveh,%cl // this is ok, h is on the stack
|
|
|
|
- movl %edx,%ebx // save current bitmask
|
|
|
|
- andb $31,%cl
|
|
|
|
- subb $31,%cl // cl := (31 - (hi and 31)) = shift count to
|
|
|
|
- negb %cl // adjust bitmask for hi bit
|
|
|
|
- shrl %cl,%edx // shift bitmask to clear bits higher than hi
|
|
|
|
- andl %edx,%ebx // combine both bitmasks
|
|
|
|
- orl %ebx,(%edi) // store to set
|
|
|
|
-.Lset_range_done:
|
|
|
|
- movl saveedi,%edi
|
|
|
|
- movl saveesi,%esi
|
|
|
|
- movl saveebx,%ebx
|
|
|
|
-end;
|
|
|
|
|
|
+ sub $16, %ecx
|
|
|
|
+ jl .LFallback { probably dead branch... }
|
|
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_IN_BYTE}
|
|
|
|
|
|
+{$if false}
|
|
|
|
+{ Scans 16 bytes at a time left to right with early exits.
|
|
|
|
+ Would be better for large enough sets (maybe around 64 bytes or even more) — if they existed, but worse for actually existing 32.
|
|
|
|
+ Kept for the future. }
|
|
|
|
+ push %ebx
|
|
|
|
+ pxor %xmm2, %xmm2 { xmm2 = 0 }
|
|
|
|
+ add %ecx, %eax { set1 += size }
|
|
|
|
+ add %ecx, %edx { set2 += size }
|
|
|
|
+ neg %ecx { Now "size" (ecx) = -(orig.size - 16), "set1" (eax) points to orig.set1 + orig.size - 16, "set2" (edx) points to orig.set2 + orig.size - 16.
|
|
|
|
+ Loop ends on "size" >= 0, leaving up to 16 tail bytes. }
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movdqu (%eax,%ecx), %xmm1
|
|
|
|
+ movdqu (%edx,%ecx), %xmm0
|
|
|
|
+ pandn %xmm1, %xmm0
|
|
|
|
+ pcmpeqb %xmm2, %xmm0
|
|
|
|
+ pmovmskb %xmm0, %ebx
|
|
|
|
+ inc %bx
|
|
|
|
+ jnz .LNo
|
|
|
|
+ add $16, %ecx
|
|
|
|
+ js .L16x_Loop
|
|
|
|
+ pop %ebx
|
|
|
|
|
|
-function fpc_set_in_byte(const p: fpc_normal_set; b: byte): boolean; assembler; [public,alias:'FPC_SET_IN_BYTE']; compilerproc;
|
|
|
|
-{
|
|
|
|
- tests if the element b is in the set p the carryflag is set if it present
|
|
|
|
-}
|
|
|
|
-asm
|
|
|
|
-{$ifdef REGCALL}
|
|
|
|
- xchgl %edx,%eax
|
|
|
|
- andl $0xff,%eax
|
|
|
|
|
|
+ movdqu (%eax), %xmm1
|
|
|
|
+ movdqu (%edx), %xmm0
|
|
|
|
+ pandn %xmm1, %xmm0
|
|
{$else}
|
|
{$else}
|
|
- movl p,%edx
|
|
|
|
- movzbl b,%eax
|
|
|
|
|
|
+{ Folds all 16-byte "set1 and not set2" chunks with OR and checks the final result for zero. Better for small enough sets. }
|
|
|
|
+ movdqu (%eax), %xmm1
|
|
|
|
+ movdqu (%edx), %xmm2
|
|
|
|
+ pandn %xmm1, %xmm2
|
|
|
|
+
|
|
|
|
+.L16x_Loop:
|
|
|
|
+ movdqu (%eax,%ecx), %xmm1
|
|
|
|
+ movdqu (%edx,%ecx), %xmm0
|
|
|
|
+ pandn %xmm1, %xmm0
|
|
|
|
+ por %xmm0, %xmm2
|
|
|
|
+ sub $16, %ecx
|
|
|
|
+ ja .L16x_Loop
|
|
|
|
+
|
|
|
|
+ pxor %xmm0, %xmm0
|
|
{$endif}
|
|
{$endif}
|
|
- btl %eax,(%edx)
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-{$define FPC_SYSTEM_HAS_FPC_SET_COMP_SETS}
|
|
|
|
-
|
|
|
|
-function fpc_set_comp_sets(const set1,set2: fpc_normal_set): boolean;assembler;[public,alias:'FPC_SET_COMP_SETS']; compilerproc;
|
|
|
|
-{
|
|
|
|
- compares set1 and set2 zeroflag is set if they are equal
|
|
|
|
-}
|
|
|
|
-var
|
|
|
|
- saveesi,saveedi : longint;
|
|
|
|
-asm
|
|
|
|
- movl %edi,saveedi
|
|
|
|
- movl %esi,saveesi
|
|
|
|
- movl set1,%esi
|
|
|
|
- movl set2,%edi
|
|
|
|
- movl $8,%ecx
|
|
|
|
- .LMCOMPSETS1:
|
|
|
|
- movl (%esi),%eax
|
|
|
|
- movl (%edi),%edx
|
|
|
|
- cmpl %edx,%eax
|
|
|
|
- jne .LMCOMPSETEND
|
|
|
|
- addl $4,%esi
|
|
|
|
- addl $4,%edi
|
|
|
|
- decl %ecx
|
|
|
|
- jnz .LMCOMPSETS1
|
|
|
|
- { we are here only if the two sets are equal
|
|
|
|
- we have zero flag set, and that what is expected }
|
|
|
|
- .LMCOMPSETEND:
|
|
|
|
- seteb %al
|
|
|
|
- movl saveedi,%edi
|
|
|
|
- movl saveesi,%esi
|
|
|
|
-end;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-{$ifdef LARGESETS}
|
|
|
|
|
|
+ pcmpeqb %xmm2,%xmm0
|
|
|
|
+ pmovmskb %xmm0, %ecx
|
|
|
|
+ xor %eax, %eax
|
|
|
|
+ inc %cx
|
|
|
|
+ setz %al
|
|
|
|
+ ret
|
|
|
|
|
|
-{$error Needs to be fixed for register calling first!}
|
|
|
|
|
|
+.LFallback:
|
|
|
|
+ add $16, %ecx
|
|
|
|
+ jmp fpc_varset_contains_sets_plain
|
|
|
|
|
|
-procedure fpc_largeset_set_word(p : pointer;b : word);assembler;[public,alias:'FPC_LARGESET_SET_WORD']; compilerproc;
|
|
|
|
-{
|
|
|
|
- sets the element b in set p works for sets larger than 256 elements
|
|
|
|
- not yet use by the compiler so
|
|
|
|
-}
|
|
|
|
-asm
|
|
|
|
- pushl %eax
|
|
|
|
- movl p,%edi
|
|
|
|
- movw b,%ax
|
|
|
|
- andl $0xfff8,%eax
|
|
|
|
- shrl $3,%eax
|
|
|
|
- addl %eax,%edi
|
|
|
|
- movb 12(%ebp),%al
|
|
|
|
- andl $7,%eax
|
|
|
|
- btsl %eax,(%edi)
|
|
|
|
- popl %eax
|
|
|
|
|
|
+{$if false}
|
|
|
|
+.LNo:
|
|
|
|
+ xor %eax, %eax
|
|
|
|
+ pop %ebx
|
|
|
|
+{$endif}
|
|
end;
|
|
end;
|
|
|
|
|
|
|
|
+function fpc_varset_contains_sets_dispatch(const set1,set2;size : ptrint):boolean; forward;
|
|
|
|
|
|
-procedure fpc_largeset_in_word(p : pointer;b : word);assembler;[public,alias:'FPC_LARGESET_IN_WORD']; compilerproc;
|
|
|
|
-{
|
|
|
|
- tests if the element b is in the set p the carryflag is set if it present
|
|
|
|
- works for sets larger than 256 elements
|
|
|
|
-}
|
|
|
|
-asm
|
|
|
|
- pushl %eax
|
|
|
|
- movl p,%edi
|
|
|
|
- movw b,%ax
|
|
|
|
- andl $0xfff8,%eax
|
|
|
|
- shrl $3,%eax
|
|
|
|
- addl %eax,%edi
|
|
|
|
- movb 12(%ebp),%al
|
|
|
|
- andl $7,%eax
|
|
|
|
- btl %eax,(%edi)
|
|
|
|
- popl %eax
|
|
|
|
|
|
+var
|
|
|
|
+ fpc_varset_contains_sets_impl: function(const set1,set2;size : ptrint):boolean = @fpc_varset_contains_sets_dispatch;
|
|
|
|
+
|
|
|
|
+function fpc_varset_contains_sets_dispatch(const set1,set2;size : ptrint):boolean;
|
|
|
|
+begin
|
|
|
|
+ if has_sse2_support then
|
|
|
|
+ fpc_varset_contains_sets_impl:=@fpc_varset_contains_sets_sse2
|
|
|
|
+ else
|
|
|
|
+ fpc_varset_contains_sets_impl:=@fpc_varset_contains_sets_plain;
|
|
|
|
+ result:=fpc_varset_contains_sets_impl(set1,set2,size);
|
|
end;
|
|
end;
|
|
|
|
|
|
-
|
|
|
|
-procedure fpc_largeset_comp_sets(set1,set2 : pointer;size : longint);assembler;[public,alias:'FPC_LARGESET_COMP_SETS']; compilerproc;
|
|
|
|
-asm
|
|
|
|
- movl set1,%esi
|
|
|
|
- movl set2,%edi
|
|
|
|
- movl size,%ecx
|
|
|
|
-{$ifdef FPC_ENABLED_CLD}
|
|
|
|
- cld
|
|
|
|
-{$endif FPC_ENABLED_CLD}
|
|
|
|
- .LMCOMPSETSIZES1:
|
|
|
|
- lodsl
|
|
|
|
- movl (%edi),%edx
|
|
|
|
- cmpl %edx,%eax
|
|
|
|
- jne .LMCOMPSETSIZEEND
|
|
|
|
- addl $4,%edi
|
|
|
|
- decl %ecx
|
|
|
|
- jnz .LMCOMPSETSIZES1
|
|
|
|
- { we are here only if the two sets are equal
|
|
|
|
- we have zero flag set, and that what is expected }
|
|
|
|
- .LMCOMPSETSIZEEND:
|
|
|
|
|
|
+function fpc_varset_contains_sets(const set1,set2;size : ptrint):boolean; compilerproc; inline;
|
|
|
|
+begin
|
|
|
|
+ result:=fpc_varset_contains_sets_impl(set1,set2,size);
|
|
end;
|
|
end;
|
|
|
|
+{$define FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
|
|
+{$endif ndef FPC_SYSTEM_HAS_FPC_VARSET_CONTAINS_SET}
|
|
|
|
|
|
-
|
|
|
|
-{$endif LARGESET}
|
|
|
|
-
|
|
|
|
-{$endif FPC_OLD_BIGENDIAN_SETS}
|
|
|