浏览代码

* moved x86-specific requirements from the generic bsr/bsf code to the
x86 code generator (register size constraints)

git-svn-id: trunk@29984 -

Jonas Maebe 10 年之前
父节点
当前提交
2ab7f5c35d

+ 6 - 6
compiler/aarch64/cgcpu.pas

@@ -81,7 +81,7 @@ interface
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
 
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
         { comparison operations }
         procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
         procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
@@ -1078,12 +1078,12 @@ implementation
       end;
 
 
-    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       var
         bitsize,
         signbit: longint;
       begin
-        if size in [OS_64,OS_S64] then
+        if srcsize in [OS_64,OS_S64] then
           begin
             bitsize:=64;
             signbit:=6;
@@ -1097,18 +1097,18 @@ implementation
         list.concat(taicpu.op_reg_const(A_CMP,src,0));
         if reverse then
           begin
-            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
             { xor 31/63 is the same as setting the lower 5/6 bits to
               "31/63-(lower 5/6 bits of dst)" }
             list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
           end
         else
           begin
-            list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
             list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
           end;
         { set dst to -1 if src was 0 }
-        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,src,C_NE));
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
         { mask the -1 to 255 if src was 0 (anyone find a two-instruction
           branch-free version? All of mine are 3...) }
         list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));

+ 2 - 2
compiler/arm/cgcpu.pas

@@ -104,7 +104,7 @@ unit cgcpu;
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
@@ -1706,7 +1706,7 @@ unit cgcpu;
       end;
 
 
-    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
       begin
         if reverse then
           begin

+ 2 - 2
compiler/cghlcpu.pas

@@ -65,14 +65,14 @@ uses
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
     end;
 
 implementation
 
    { thlbasecgcpu }
 
-    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2012042801);
       end;

+ 2 - 2
compiler/cgobj.pas

@@ -247,7 +247,7 @@ unit cgobj;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual;
 
           { Multiplication with doubling result size.
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
@@ -2528,7 +2528,7 @@ implementation
       end;
 
 
-    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2014070601);
       end;

+ 3 - 3
compiler/hlcg2ll.pas

@@ -171,7 +171,7 @@ unit hlcg2ll;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
@@ -586,9 +586,9 @@ implementation
       cg.a_loadaddr_ref_reg(list,ref,r);
     end;
 
-  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
-      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(size),src,dst);
+      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst);
     end;
 
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);

+ 1 - 1
compiler/hlcgobj.pas

@@ -301,7 +301,7 @@ unit hlcgobj;
          public
 
           { bit scan instructions (still need transformation to thlcgobj) }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); virtual; abstract;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;

+ 2 - 2
compiler/jvm/hlcgcpu.pas

@@ -114,7 +114,7 @@ uses
       procedure gen_exit_code(list: TAsmList); override;
 
       { unimplemented/unnecessary routines }
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
       procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override;
@@ -1884,7 +1884,7 @@ implementation
       { nothing }
     end;
 
-  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
       internalerror(2012090201);
     end;

+ 4 - 8
compiler/ncginl.pas

@@ -749,16 +749,12 @@ implementation
       secondpass(left);
 
       opsize:=tcgsize2unsigned[left.location.size];
-      if opsize<OS_32 then
-        opsize:=OS_32;
-
-      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) or
-         (left.location.size<>opsize) then
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
 
-      location_reset(location,LOC_REGISTER,opsize);
-      location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
-      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,left.location.register,location.register);
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,location.size,left.location.register,location.register);
     end;
 
 

+ 22 - 6
compiler/x86/cgx86.pas

@@ -82,7 +82,7 @@ unit cgx86;
         procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);override;
 
         { bit scan instructions }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
         { fpu move instructions }
         procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
@@ -2029,20 +2029,36 @@ unit cgx86;
         end;
       end;
 
-     procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+     procedure tcgx86.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
      var
+       tmpreg: tregister;
        opsize: topsize;
        l : TAsmLabel;
      begin
-       opsize:=tcgsize2opsize[size];
+       { no bsf/bsr for byte }
+       if srcsize in [OS_8,OS_S8] then
+         begin
+           tmpreg:=getintregister(list,OS_INT);
+           a_load_reg_reg(list,srcsize,OS_INT,src,tmpreg);
+           src:=tmpreg;
+           srcsize:=OS_INT;
+         end;
+       { source and destination register must have the same size }
+       if tcgsize2size[srcsize]<>tcgsize2size[dstsize] then
+         tmpreg:=getintregister(list,srcsize)
+       else
+         tmpreg:=dst;
+       opsize:=tcgsize2opsize[srcsize];
        if not reverse then
-         list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,dst))
+         list.concat(taicpu.op_reg_reg(A_BSF,opsize,src,tmpreg))
        else
-         list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,dst));
+         list.concat(taicpu.op_reg_reg(A_BSR,opsize,src,tmpreg));
        current_asmdata.getjumplabel(l);
        a_jmp_cond(list,OC_NE,l);
-       list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,dst));
+       list.concat(taicpu.op_const_reg(A_MOV,opsize,$ff,tmpreg));
        a_label(list,l);
+       if tmpreg<>dst then
+         a_load_reg_reg(list,srcsize,dstsize,tmpreg,dst);
      end;
 
 {*************** compare instructructions ****************}