Browse Source

Make use of CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in Bsf*/Bsr*.

Rika Ichinose 1 year ago
parent
commit
fc1050a834
3 changed files with 54 additions and 12 deletions
  1. 30 4
      compiler/x86/cgx86.pas
  2. 20 4
      rtl/i386/i386.inc
  3. 4 4
      tests/test/tbsx1.pp

+ 30 - 4
compiler/x86/cgx86.pas

@@ -2492,14 +2492,40 @@ unit cgx86;
        else
          tmpreg:=dst;
        opsize:=tcgsize2opsize[srcsize];
+
+       { AMD docs: BSF/R dest, 0 “sets ZF to 1 and does not change the contents of the destination register.”
+         Intel docs: “If the content source operand is 0, the content of the destination operand is undefined.”
+         (However, Intel silently implements the same behavior as AMD, which is understandable.)
+
+         If relying on this behavior, do
+
+         mov tmpreg, $FF
+         bsx tmpreg, src
+
+         If not relying, do
+
+         bsx tmpreg, src
+         jnz .LDone
+         mov tmpreg, $FF
+.LDone:
+         }
+
+       if CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype] then
+         list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
+
        if not reverse then
          list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
        else
          list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
-       current_asmdata.getjumplabel(l);
-       a_jmp_cond(list,OC_NE,l);
-       list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
-       a_label(list,l);
+
+       if not (CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1 in cpu_optimization_hints[current_settings.optimizecputype]) then
+         begin
+           current_asmdata.getjumplabel(l);
+           a_jmp_cond(list,OC_NE,l);
+           list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
+           a_label(list,l);
+         end;
+
        if tmpreg<>dst then
          a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
      end;

+ 20 - 4
rtl/i386/i386.inc

@@ -2708,9 +2708,14 @@ end;
 
 {$ifndef FPC_SYSTEM_HAS_BSF_QWORD}
 {$define FPC_SYSTEM_HAS_BSF_QWORD}
-
 function BsfQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
 asm
+{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
+     mov     $255-32,%eax { On AMD, BSF/R are documented to not change the destination on zero input. }
+     bsfl    8(%esp),%eax { On Intel, destination is formally undefined on zero input, but in practice the behavior is the same. }
+     add     $32,%eax
+     bsfl    4(%esp),%eax
+{$else}
      bsfl    4(%esp),%eax
      jz     .L1
      ret     $8
@@ -2721,6 +2726,7 @@ asm
      ret     $8
 .L2:
      movl    $255,%eax
+{$endif}
 end;
 {$endif FPC_SYSTEM_HAS_BSF_QWORD}
 
@@ -2729,16 +2735,26 @@ end;
 {$define FPC_SYSTEM_HAS_BSR_QWORD}
 function BsrQWord(Const AValue : QWord): cardinal; assembler; nostackframe;
 asm
+{$ifdef CPUX86_HINT_BSX_DEST_UNCHANGED_ON_ZF_1}
+     mov     $255,%eax
+     bsrl    4(%esp),%eax
+     sub     $32,%eax
      bsrl    8(%esp),%eax
-     jz     .L1
      add     $32,%eax
+{$else}
+     mov     8(%esp),%eax
+     test    %eax,%eax
+     jnz    .L1 { Speculate Hi(q) = 0. }
+     bsrl    4(%esp),%eax
+     jz     .L2
      ret     $8
 .L1:
-     bsrl    4(%esp),%eax
-     jz      .L2
+     bsrl    %eax,%eax
+     add     $32,%eax
      ret     $8
 .L2:
      movl    $255,%eax
+{$endif}
 end;
 {$endif FPC_SYSTEM_HAS_BSR_QWORD}
 

+ 4 - 4
tests/test/tbsx1.pp

@@ -23,7 +23,7 @@ begin
       exit(false);
     end;
   end;
-  x8:=0;
+  x8:=random(0);
   f:=BsfByte(x8);
   if (f<>$ff) then
   begin
@@ -60,7 +60,7 @@ begin
       exit(false);
     end;
   end;
-  x16:=0;
+  x16:=random(0);
   f:=BsfWord(x16);
   if (f<>$ff) then
   begin
@@ -97,7 +97,7 @@ begin
       exit(false);
     end;
   end;
-  x32:=0;
+  x32:=random(0);
   f:=BsfDWord(x32);
   if (f<>$ff) then
   begin
@@ -132,7 +132,7 @@ begin
       exit(false);
     end;
   end;
-  x64:=0;
+  x64:=random(0);
   f:=BsfQWord(x64);
   if (f<>$ff) then
   begin