瀏覽代碼

* various fixes and optimizations

Jonas Maebe 23 年之前
父節點
當前提交
ee7d5f7b2a
共有 5 個文件被更改,包括 193 次插入82 次删除
  1. 137 47
      compiler/powerpc/cgcpu.pas
  2. 20 17
      compiler/powerpc/nppcadd.pas
  3. 8 4
      compiler/powerpc/nppccnv.pas
  4. 16 7
      compiler/powerpc/nppcmat.pas
  5. 12 7
      compiler/powerpc/nppcmem.pas

+ 137 - 47
compiler/powerpc/cgcpu.pas

@@ -96,7 +96,8 @@ unit cgcpu;
         procedure g_restore_standard_registers(list : taasmoutput);override;
         procedure g_save_all_registers(list : taasmoutput);override;
         procedure g_restore_all_registers(list : taasmoutput;selfused,accused,acchiused:boolean);override;
-        private
+
+      private
 
         procedure a_jmp_cond(list : taasmoutput;cond : TOpCmp;l: tasmlabel);
 
@@ -106,7 +107,15 @@ unit cgcpu;
 
         { Make sure ref is a valid reference for the PowerPC and sets the }
         { base to the value of the index if (base = R_NO).                }
-        procedure fixref(list: taasmoutput; var ref: treference);
+        { Returns true if the reference contained a base, index and an    }
+        { offset or symbol, in which case the base will have been changed }
+        { to a tempreg (which has to be freed by the caller) containing   }
+        { the sum of part of the original reference                       }
+        function fixref(list: taasmoutput; var ref: treference): boolean;
+
+        { returns whether a reference can be used immediately in a powerpc }
+        { instruction                                                      }
+        function issimpleref(const ref: treference): boolean;
 
         { contains the common code of a_load_reg_ref and a_load_ref_reg }
         procedure a_load_store(list:taasmoutput;op: tasmop;reg:tregister;
@@ -153,7 +162,7 @@ const
 
       begin
         case locpara.loc of
-          LOC_REGISTER:
+          LOC_REGISTER,LOC_CREGISTER:
             a_load_const_reg(list,size,a,locpara.register);
           LOC_REFERENCE:
             begin
@@ -178,7 +187,7 @@ const
 
       begin
         case locpara.loc of
-          LOC_REGISTER:
+          LOC_REGISTER,LOC_CREGISTER:
             a_load_ref_reg(list,size,r,locpara.register);
           LOC_REFERENCE:
             begin
@@ -190,7 +199,7 @@ const
                a_load_reg_ref(list,size,tmpreg,ref);
                free_scratch_reg(list,tmpreg);
             end;
-          LOC_FPUREGISTER:
+          LOC_FPUREGISTER,LOC_CFPUREGISTER:
             case size of
                OS_32:
                  a_loadfpu_ref_reg(list,OS_F32,r,locpara.register);
@@ -215,7 +224,7 @@ const
 
       begin
          case locpara.loc of
-            LOC_REGISTER:
+            LOC_REGISTER,LOC_CREGISTER:
               a_loadaddr_ref_reg(list,r,locpara.register);
             LOC_REFERENCE:
               begin
@@ -266,7 +275,8 @@ const
           else if ((a and $ffff) <> 0) then
             begin
               list.concat(taicpu.op_reg_const(A_LI,reg,smallint(a and $ffff)));
-              if ((a shr 16) <> 0) then
+              if ((a shr 16) <> 0) or
+                 (smallint(a and $ffff) < 0) then
                 list.concat(taicpu.op_reg_const(A_ADDIS,reg,
                   smallint((a shr 16)+ord(smallint(a and $ffff) < 0))))
             end
@@ -286,10 +296,10 @@ const
        var
          op: TAsmOp;
          ref2: TReference;
-
+         freereg: boolean;
        begin
          ref2 := ref;
-         FixRef(list,ref2);
+         freereg := fixref(list,ref2);
          if size in [OS_S8..OS_S16] then
            { storing is the same for signed and unsigned values }
            size := tcgsize(ord(size)-(ord(OS_S8)-ord(OS_8)));
@@ -298,6 +308,8 @@ const
            internalerror(200109236);
          op := storeinstr[tcgsize2unsigned[size],ref2.index<>R_NO,false];
          a_load_store(list,op,reg,ref2);
+         if freereg then
+           cg.free_scratch_reg(list,ref2.base);
        End;
 
 
@@ -319,12 +331,15 @@ const
          op: tasmop;
          tmpreg: tregister;
          ref2, tmpref: treference;
+         freereg: boolean;
 
        begin
           ref2 := ref;
-          fixref(list,ref2);
+          freereg := fixref(list,ref2);
           op := loadinstr[size,ref2.index<>R_NO,false];
           a_load_store(list,op,reg,ref2);
+          if freereg then
+            free_scratch_reg(list,ref2.base);
           { sign extend shortint if necessary, since there is no }
           { load instruction that does that automatically (JM)   }
           if size = OS_S8 then
@@ -366,7 +381,7 @@ const
      procedure tcgppc.a_loadfpu_reg_reg(list: taasmoutput; reg1, reg2: tregister);
 
        begin
-         list.concat(taicpu.op_reg_reg(A_FMR,reg1,reg2));
+         list.concat(taicpu.op_reg_reg(A_FMR,reg2,reg1));
        end;
 
      procedure tcgppc.a_loadfpu_ref_reg(list: taasmoutput; size: tcgsize; const ref: treference; reg: tregister);
@@ -379,6 +394,8 @@ const
        var
          op: tasmop;
          ref2: treference;
+         freereg: boolean;
+
        begin
           { several functions call this procedure with OS_32 or OS_64 }
           { so this makes life easier (FK)                            }
@@ -391,9 +408,11 @@ const
                internalerror(200201121);
           end;
          ref2 := ref;
-         fixref(list,ref2);
+         freereg := fixref(list,ref2);
          op := fpuloadinstr[size,ref2.index <> R_NO,false];
          a_load_store(list,op,reg,ref2);
+         if freereg then
+           cg.free_scratch_reg(list,ref2.base);
        end;
 
      procedure tcgppc.a_loadfpu_reg_ref(list: taasmoutput; size: tcgsize; reg: tregister; const ref: treference);
@@ -406,13 +425,17 @@ const
        var
          op: tasmop;
          ref2: treference;
+         freereg: boolean;
+
        begin
          if not(size in [OS_F32,OS_F64]) then
            internalerror(200201122);
          ref2 := ref;
-         fixref(list,ref2);
+         freereg := fixref(list,ref2);
          op := fpustoreinstr[size,ref2.index <> R_NO,false];
          a_load_store(list,op,reg,ref2);
+         if freereg then
+           cg.free_scratch_reg(list,ref2.base);
        end;
 
 
@@ -465,7 +488,7 @@ const
         ophi := TOpCG2AsmOpConstHi[op];
         oplo := TOpCG2AsmOpConstLo[op];
         gotrlwi := get_rlwi_const(a,l1,l2);
-        if (op in [OP_ADD,OP_AND,OP_OR,OP_XOR]) then
+        if (op in [OP_AND,OP_OR,OP_XOR]) then
           begin
             if (a = 0) then
               begin
@@ -473,8 +496,7 @@ const
                   list.concat(taicpu.op_reg_const(A_LI,dst,0));
                 exit;
               end
-            else if (a = high(aword)) and
-                    (op in [OP_AND,OP_OR,OP_XOR]) then
+            else if (a = high(aword)) then
               begin
                 case op of
                   OP_OR:
@@ -484,15 +506,11 @@ const
                 end;
                 exit;
               end
-            else if (longint(a) >= 0) and
-               (longint(a) <= high(word)) and
+            else if (a <= high(word)) and
                ((op <> OP_AND) or
                 not gotrlwi) then
               begin
-                if (op = OP_ADD) then
-                  list.concat(taicpu.op_reg_reg_const(oplo,dst,src,smallint(a)))
-                else
-                  list.concat(taicpu.op_reg_reg_const(oplo,dst,src,word(a)));
+                list.concat(taicpu.op_reg_reg_const(oplo,dst,src,word(a)));
                 exit;
               end;
             { all basic constant instructions also have a shifted form that }
@@ -505,15 +523,58 @@ const
                 list.concat(taicpu.op_reg_reg_const(ophi,dst,src,word(a shr 16)));
                 exit;
               end;
-          end;
+          end
+        else if (op = OP_ADD) then
+          if a = 0 then
+            exit
+          else if (longint(a) >= low(smallint)) and
+              (longint(a) <= high(smallint)) then
+             begin
+               list.concat(taicpu.op_reg_reg_const(A_ADDI,dst,src,smallint(a)));
+               exit;
+             end;
+
         { otherwise, the instructions we can generate depend on the }
         { operation                                                 }
         useReg := false;
         case op of
           OP_DIV,OP_IDIV:
-            useReg := true;
+             if (a = 0) then
+               internalerror(200208103)
+             else if (a = 1) then
+               begin
+                 a_load_reg_reg(list,OS_INT,src,dst);
+                 exit
+               end
+            else if ispowerof2(a,l1) then
+              begin
+                case op of
+                  OP_DIV:
+                    list.concat(taicpu.op_reg_reg_const(A_SRWI,dst,src,l1));
+                  OP_IDIV:
+                    begin
+                       list.concat(taicpu.op_reg_reg_const(A_SRAWI,dst,src,l1));
+                       list.concat(taicpu.op_reg_reg(A_ADDZE,dst,dst));
+                    end;
+                end;
+                exit;
+              end
+            else
+              usereg := true;
            OP_IMUL, OP_MUL:
-             if (longint(a) >= low(smallint)) and
+             if (a = 0) then
+               begin
+                 list.concat(taicpu.op_reg_const(A_LI,dst,0));
+                 exit
+               end
+             else if (a = 1) then
+               begin
+                 a_load_reg_reg(list,OS_INT,src,dst);
+                 exit
+               end
+             else if ispowerof2(a,l1) then
+               list.concat(taicpu.op_reg_reg_const(A_SLWI,dst,src,l1))
+             else if (longint(a) >= low(smallint)) and
                 (longint(a) <= high(smallint)) then
                list.concat(taicpu.op_reg_reg_const(A_MULLI,dst,src,smallint(a)))
              else
@@ -1090,10 +1151,11 @@ const
 
        var
          ref2, tmpref: treference;
+         freereg: boolean;
 
        begin
          ref2 := ref;
-         FixRef(list,ref2);
+         freereg := fixref(list,ref2);
          if assigned(ref2.symbol) then
            { add the symbol's value to the base of the reference, and if the }
            { reference doesn't have a base, create one                       }
@@ -1103,8 +1165,15 @@ const
              tmpref.symbol := ref2.symbol;
              tmpref.symaddr := refs_ha;
              if ref2.base <> R_NO then
-               list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r,
-                 ref2.base,tmpref))
+               begin
+                 list.concat(taicpu.op_reg_reg_ref(A_ADDIS,r,
+                   ref2.base,tmpref));
+                 if freereg then
+                   begin
+                     cg.free_scratch_reg(list,ref2.base);
+                     freereg := false;
+                   end;
+               end
              else
                list.concat(taicpu.op_reg_ref(A_LIS,r,tmpref));
              tmpref.base := R_NO;
@@ -1124,6 +1193,8 @@ const
          else if (ref2.base <> R_NO) and
                  (r <> ref2.base) then
            list.concat(taicpu.op_reg_reg(A_MR,r,ref2.base));
+         if freereg then
+           cg.free_scratch_reg(list,ref2.base);
        end;
 
 { ************* concatcopy ************ }
@@ -1135,7 +1206,7 @@ const
         src, dst: TReference;
         lab: tasmlabel;
         count, count2: aword;
-        orgsrc, orgdst : boolean;
+        orgsrc, orgdst: boolean;
 
       begin
 {$ifdef extdebug}
@@ -1166,11 +1237,6 @@ const
               exit;
             end;
 
-        { make sure source and dest are valid }
-        src := source;
-        fixref(list,src);
-        dst := dest;
-        fixref(list,dst);
         reference_reset(src);
         reference_reset(dst);
         { load the address of source into src.base }
@@ -1180,8 +1246,9 @@ const
             a_load_ref_reg(list,OS_32,source,src.base);
             orgsrc := false;
           end
-        else if assigned(source.symbol) or
-                ((source.offset + longint(len)) > high(smallint)) then
+        else if not issimpleref(source) or
+                ((source.index <> R_NO) and
+                 ((source.offset + longint(len)) > high(smallint))) then
           begin
             src.base := get_scratch_reg_address(list);
             a_loadaddr_ref_reg(list,source,src.base);
@@ -1195,8 +1262,9 @@ const
         if not orgsrc and delsource then
           reference_release(exprasmlist,source);
         { load the address of dest into dst.base }
-        if assigned(dest.symbol) or
-           ((dest.offset + longint(len)) > high(smallint)) then
+        if not issimpleref(dest) or
+           ((dest.index <> R_NO) and
+            ((dest.offset + longint(len)) > high(smallint))) then
           begin
             dst.base := get_scratch_reg_address(list);
             a_loadaddr_ref_reg(list,dest,dst.base);
@@ -1340,35 +1408,54 @@ const
       end;
 
 
-    procedure tcgppc.fixref(list: taasmoutput; var ref: treference);
+    function tcgppc.issimpleref(const ref: treference): boolean;
 
+      begin
+        if (ref.base = R_NO) and
+           (ref.index <> R_NO) then
+          internalerror(200208101);
+        result :=
+          not(assigned(ref.symbol)) and
+          (((ref.index = R_NO) and
+            (ref.offset >= low(smallint)) and
+            (ref.offset <= high(smallint))) or
+           ((ref.index <> R_NO) and
+            (ref.offset = 0)));
+      end;
+
+    function tcgppc.fixref(list: taasmoutput; var ref: treference): boolean;
+
+       var
+         tmpreg: tregister;
        begin
-         If (ref.base <> R_NO) then
+         result := false;
+         if (ref.base <> R_NO) then
            begin
              if (ref.index <> R_NO) and
                 ((ref.offset <> 0) or assigned(ref.symbol)) then
                begin
+                 result := true;
+                 tmpreg := cg.get_scratch_reg_int(list);
                  if not assigned(ref.symbol) and
                     (cardinal(ref.offset-low(smallint)) <=
                       high(smallint)-low(smallint)) then
                    begin
                      list.concat(taicpu.op_reg_reg_const(
-                       A_ADDI,ref.base,ref.base,ref.offset));
+                       A_ADDI,tmpreg,ref.base,ref.offset));
                      ref.offset := 0;
                    end
                  else
                    begin
                      list.concat(taicpu.op_reg_reg_reg(
-                       A_ADD,ref.base,ref.base,ref.index));
+                       A_ADD,tmpreg,ref.base,ref.index));
                      ref.index := R_NO;
                    end;
+                 ref.base := tmpreg;
                end
            end
          else
-           begin
-             ref.base := ref.index;
-             ref.index := R_NO
-           end
+           if ref.index <> R_NO then
+             internalerror(200208102);
        end;
 
 
@@ -1586,7 +1673,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.36  2002-08-06 20:55:23  florian
+  Revision 1.37  2002-08-10 17:15:31  jonas
+    * various fixes and optimizations
+
+  Revision 1.36  2002/08/06 20:55:23  florian
     * first part of ppc calling conventions fix
 
   Revision 1.35  2002/08/06 07:12:05  jonas

+ 20 - 17
compiler/powerpc/nppcadd.pas

@@ -198,24 +198,24 @@ interface
         if (right.location.loc = LOC_CONSTANT) then
           begin
 {$ifdef extdebug}
-            if (qword(right.location.value) > high(cardinal)) then
+            if (qword(right.location.valuehigh) <> 0) then
               internalerror(2002080301);
 {$endif extdebug}
             if (nodetype in [equaln,unequaln]) then
               if (unsigned and
                   (right.location.value > high(word))) or
                  (not unsigned and
-                  (right.location.value < low(smallint)) or
-                   (right.location.value > high(smallint))) then
+                  (longint(right.location.value) < low(smallint)) or
+                   (longint(right.location.value) > high(smallint))) then
                 // we can then maybe use a constant in the 'othersigned' case
                 // (the sign doesn't matter for // equal/unequal)
                 unsigned := not unsigned;
 
             if (unsigned and
-                (qword(right.location.value) <= high(word))) or
+                (right.location.value) <= high(word)) or
                (not(unsigned) and
-                (right.location.value >= low(smallint)) and
-                (right.location.value <= high(smallint))) then
+                (longint(right.location.value) >= low(smallint)) and
+                (longint(right.location.value) <= high(smallint))) then
                useconst := true
             else
               begin
@@ -243,7 +243,7 @@ interface
         if (right.location.loc = LOC_CONSTANT) then
           if useconst then
             exprasmlist.concat(taicpu.op_reg_const(op,
-              left.location.register,right.location.value))
+              left.location.register,longint(right.location.value)))
           else
             begin
               exprasmlist.concat(taicpu.op_reg_reg(op,
@@ -348,7 +348,7 @@ interface
                       left.location.register,right.location.register))
                   else
                     exprasmlist.concat(taicpu.op_reg_const(A_CMPLWI,
-                      left.location.register,right.location.value));
+                      left.location.register,longint(right.location.value)));
                   location.resflags := getresflags;
                 end;
               else
@@ -693,11 +693,11 @@ interface
           location_copy(oldleft,left.location);
           location_copy(oldright,right.location);
           if left.location.loc = LOC_CONSTANT then
-            left.location.value := left.location.value shr 32
+            left.location.valueqword := left.location.valueqword shr 32
           else
             left.location.registerlow := left.location.registerhigh;
           if right.location.loc = LOC_CONSTANT then
-            right.location.value := right.location.value shr 32
+            right.location.valueqword := right.location.valueqword shr 32
           else
             right.location.registerlow := right.location.registerhigh;
 
@@ -848,8 +848,8 @@ interface
               swapleftright;
             if left.location.loc = LOC_CONSTANT then
               if not(cs_check_overflow in aktlocalswitches) and
-                 (left.location.value >= low(smallint)) and
-                 (left.location.value <= high(smallint)) then
+                 (longint(left.location.value) >= low(smallint)) and
+                 (longint(left.location.value) <= high(smallint)) then
                 begin
                   // optimize
                   exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC,
@@ -887,7 +887,7 @@ interface
                   if left.location.loc = LOC_CONSTANT then
                     swapleftright;
                   if (right.location.loc = LOC_CONSTANT) then
-                    cg64.a_op64_const_reg_reg(exprasmlist,op,qword(right.location.value),
+                    cg64.a_op64_const_reg_reg(exprasmlist,op,right.location.valueqword,
                       left.location.register64,location.register64)
                   else
                     cg64.a_op64_reg_reg_reg(exprasmlist,op,right.location.register64,
@@ -1281,11 +1281,11 @@ interface
            begin
              case nodetype of
                addn:
-                 op := A_ADDO;
+                 op := A_ADDO_;
                subn:
-                 op := A_SUBO;
+                 op := A_SUBO_;
                muln:
-                  op := A_MULLWO;
+                  op := A_MULLWO_;
                else
                  internalerror(2002072601);
              end;
@@ -1302,7 +1302,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.6  2002-08-06 20:55:24  florian
+  Revision 1.7  2002-08-10 17:15:31  jonas
+    * various fixes and optimizations
+
+  Revision 1.6  2002/08/06 20:55:24  florian
     * first part of ppc calling conventions fix
 
   Revision 1.5  2002/08/05 08:58:54  jonas

+ 8 - 4
compiler/powerpc/nppccnv.pas

@@ -121,7 +121,8 @@ implementation
         size := resulttype.def.size;
         leftsize := left.resulttype.def.size;
         if (size < leftsize) or
-           ((left.location.loc <> LOC_REGISTER) and
+           (((newsize in [OS_64,OS_S64]) or
+             (left.location.loc <> LOC_REGISTER)) and
             (size > leftsize)) then
           begin
             { reuse the left location by default }
@@ -254,8 +255,8 @@ implementation
            cg.free_scratch_reg(exprasmlist,valuereg);
 
          tmpfpureg := rg.getregisterfpu(exprasmlist);
-         exprasmlist.concat(taicpu.op_reg_ref(A_LFD,tmpfpureg,
-           tempconst.location.reference));
+         a_loadfpu_ref_reg(exprasmlist,OS_F64,tempconst.location.reference,
+           tmpfpureg);
          tempconst.free;
 
          location.register := rg.getregisterfpu(exprasmlist);
@@ -417,7 +418,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.19  2002-07-29 21:23:44  florian
+  Revision 1.20  2002-08-10 17:15:31  jonas
+    * various fixes and optimizations
+
+  Revision 1.19  2002/07/29 21:23:44  florian
     * more fixes for the ppc
     + wrappers for the tcnvnode.first_* stuff introduced
 

+ 16 - 7
compiler/powerpc/nppcmat.pas

@@ -371,7 +371,6 @@ implementation
                   end;
                 LOC_REFERENCE,LOC_CREFERENCE:
                   begin
-                     reference_release(exprasmlist,left.location.reference);
                      if (left.resulttype.def.deftype=floatdef) then
                        begin
                           src1 := rg.getregisterfpu(exprasmlist);
@@ -387,16 +386,23 @@ implementation
                           cg.a_load_ref_reg(exprasmlist,OS_32,
                             left.location.reference,src1);
                        end;
+                     reference_release(exprasmlist,left.location.reference);
                   end;
               end;
               { choose appropriate operand }
               if left.resulttype.def.deftype <> floatdef then
-                if not(cs_check_overflow in aktlocalswitches) then
-                  op := A_NEG
-                else
-                  op := A_NEGO_
+                begin
+                  if not(cs_check_overflow in aktlocalswitches) then
+                    op := A_NEG
+                  else
+                    op := A_NEGO_;
+                  location.loc := LOC_REGISTER;
+                end
               else
-                op := A_FNEG;
+                begin
+                  op := A_FNEG;
+                  location.loc := LOC_FPUREGISTER;
+                end;
               { emit operation }
               exprasmlist.concat(taicpu.op_reg_reg(op,location.register,src1));
            end;
@@ -489,7 +495,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.15  2002-07-26 10:48:34  jonas
+  Revision 1.16  2002-08-10 17:15:31  jonas
+    * various fixes and optimizations
+
+  Revision 1.15  2002/07/26 10:48:34  jonas
     * fixed bug in shl/shr code
 
   Revision 1.14  2002/07/20 11:58:05  florian

+ 12 - 7
compiler/powerpc/nppcmem.pas

@@ -397,18 +397,20 @@ implementation
                    end;
                end;
 
-              if location.reference.index=R_NO then
+              if location.reference.base=R_NO then
                begin
-                 location.reference.index:=right.location.register;
+                 location.reference.base:=right.location.register;
                  cg.a_op_const_reg(exprasmlist,OP_IMUL,get_mul_size,
                    right.location.register);
                end
               else
                begin
-                 if location.reference.base=R_NO then
-                   { this wouldn't make sense for the ppc since there are }
-                   { no scalefactors (JM)                                 }
-                   internalerror(2002072901)
+                 if location.reference.index=R_NO then
+                   begin
+                     location.reference.index:=right.location.register;
+                     cg.a_op_const_reg(exprasmlist,OP_IMUL,get_mul_size,
+                       right.location.register);
+                   end
                  else
                   begin
                     cg.a_loadaddr_ref_reg(exprasmlist,location.reference,
@@ -436,7 +438,10 @@ begin
 end.
 {
   $Log$
-  Revision 1.1  2002-07-29 09:21:30  jonas
+  Revision 1.2  2002-08-10 17:15:31  jonas
+    * various fixes and optimizations
+
+  Revision 1.1  2002/07/29 09:21:30  jonas
     + tppcvecnode, almost straight copy of the i386 code, can most likely
       be made generic if all treference type allow a base, index and offset