Просмотр исходного кода

+ some optimizations for comparisons with zero whereby the result has to
go to a register

git-svn-id: trunk@1379 -

Jonas Maebe 20 лет назад
Родитель
Сommit
b458554f5b
1 измененных файлов с 81 добавлено и 2 удалено
  1. 81 2
      compiler/powerpc/aoptcpu.pas

+ 81 - 2
compiler/powerpc/aoptcpu.pas

@@ -28,7 +28,7 @@ Interface
 
 {$i fpcdefs.inc}
 
-uses cpubase, aoptobj, aoptcpub, aopt, aasmtai;
+uses cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
 
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
@@ -37,12 +37,14 @@ Type
 
     function PostPeepHoleOptsCpu(var p: tai): boolean; override;
 
+   private
+     function cmpi_mfcr_opt(p, next1, next2: taicpu): boolean;
   End;
 
 Implementation
 
   uses
-    cutils, aasmcpu, cgbase;
+    cutils, cgbase;
 
 const
   calculation_target_op0: array[tasmop] of tasmop = (a_none,
@@ -93,6 +95,66 @@ const
     a_none, a_none, a_none, a_none, a_none, a_none, a_not, a_not_, a_none, a_none, a_none,
     a_none, a_none);
 
+  function TCpuAsmOptimizer.cmpi_mfcr_opt(p, next1, next2: taicpu): boolean;
+    var
+      next3: tai;
+      inverse: boolean;  
+    begin
+      result := true;
+      inverse :=
+        getnextinstruction(next2,next3) and
+        (next3.typ = ait_instruction) and
+        (taicpu(next3).opcode = A_XORI) and
+        (taicpu(next3).oper[0]^.reg = taicpu(next3).oper[1]^.reg) and
+        (taicpu(next3).oper[0]^.reg = taicpu(next2).oper[0]^.reg);
+      case taicpu(next2).oper[2]^.val of
+        1:
+         begin
+           // less than zero or greater/equal than zero (the xori remains in
+           // in the latter case). Doesn't make sense for unsigned comparisons.
+           if (p.opcode = A_CMPWI) then
+             begin
+               p.opcode := A_SRWI;
+               p.ops := 3;
+               p.loadreg(1,p.oper[0]^.reg);
+               p.loadreg(0,next1.oper[0]^.reg);
+               p.loadconst(2,31);
+               asml.remove(next1);
+               next1.free;
+               asml.remove(next2);
+               next2.free;
+             end
+           else
+             result := false;
+         end;
+{
+    needs two registers to work with
+        2:
+         begin
+           // greater or less/equal to zero
+         end;
+}
+        3:
+         begin
+           // equal/not equal to zero (the xori remains in the latter case;
+           // there's a more optimal sequence without it, but needs extra
+           // register)
+           p.opcode := A_CNTLZW;
+           p.loadreg(1,p.oper[0]^.reg);
+           p.loadreg(0,next1.oper[0]^.reg);
+           next1.ops := 3;
+           next1.opcode := A_SRWI;
+           next1.loadreg(1,next1.oper[0]^.reg);
+           next1.loadconst(2,5);
+           asml.remove(next2);
+           next2.free;
+         end;
+        else
+          result := false;
+      end;
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       next1, next2: tai;
@@ -103,6 +165,23 @@ const
         ait_instruction:
           begin
             case taicpu(p).opcode of
+              A_CMPWI,
+              A_CMPLWI:
+                begin
+                  if (taicpu(p).oper[1]^.typ = top_const) and
+                     (taicpu(p).oper[1]^.val = 0) and 
+                     getnextinstruction(p,next1) and
+                     (next1.typ = ait_instruction) and
+                     (taicpu(next1).opcode = A_MFCR) and
+                     getnextinstruction(next1,next2) and
+                     (taicpu(next2).opcode = A_RLWINM) and
+                     (taicpu(next2).oper[0]^.reg = taicpu(next2).oper[1]^.reg) and
+                     (taicpu(next2).oper[0]^.reg = taicpu(next1).oper[0]^.reg) and
+                     (taicpu(next2).oper[3]^.val = 31) and
+                     (taicpu(next2).oper[4]^.val = 31) and
+                     cmpi_mfcr_opt(taicpu(p),taicpu(next1),taicpu(next2)) then
+                    result := true;
+                end;
 { seems the register allocator doesn't generate superfluous fmr's }
 {              A_FMR, }
               A_MR: