2
0
Эх сурвалжийг харах

* fixed and enhanced rlwinm optimizations somewhat

git-svn-id: trunk@3555 -
Jonas Maebe 19 жил өмнө
parent
commit
67dc63048c

+ 56 - 39
compiler/powerpc/aoptcpu.pas

@@ -44,7 +44,7 @@ Type
 Implementation
 
   uses
-    cutils, cgbase;
+    cutils, cgbase, cgcpu, cgobj;
 
 const
   calculation_target_op0: array[tasmop] of tasmop = (a_none,
@@ -155,6 +155,18 @@ const
     end;
 
 
+  function rlwinm2mask(l1,l2: longint): longint;
+    begin
+       // 1 shl 32 = 1 instead of 0 on x86
+      if (l1 <> 0) then
+        result :=  (1 shl (32 - l1) - 1) xor (1 shl (31 - l2) - 1)
+      else
+        result := not(1 shl (31 - l2) - 1);
+      if (l1 > l2) then
+        result := not(result);        
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       next1, next2: tai;
@@ -277,39 +289,46 @@ const
                      (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
                      (taicpu(next1).oper[2]^.val = 0) then
                     begin
-                      l1 := taicpu(p).oper[4]^.val;
-                      if (l1 < taicpu(p).oper[3]^.val) then
-                        inc(l1,32);
-                      l2 := taicpu(next1).oper[4]^.val;
-                      if (l2 < taicpu(next1).oper[3]^.val) then
-                        inc(l2,32);
-
-                      if (taicpu(p).oper[3]^.val > l2) or
-                         (taicpu(next1).oper[3]^.val > l1) then
-                        begin
-                          // masks have no bits in common
-                          taicpu(p).opcode := A_LI;
-                          taicpu(p).loadconst(1,0);
-                          taicpu(p).clearop(2);
-                          taicpu(p).clearop(3);
-                          taicpu(p).clearop(4);
-                          taicpu(p).ops := 2;
-                          taicpu(p).opercnt := 2;
-                          asml.remove(next1);
-                          next1.free;
-                        end
-                      else
-                        // some of the cases with l1>32 or l2>32 can be
-                        // optimized, but others can't (like 19,17 and 25,23)
-                        if (l1 < 32) and
-                           (l2 < 32) then
-                        begin
-                          taicpu(p).oper[3]^.val := max(taicpu(p).oper[3]^.val,taicpu(next1).oper[3]^.val);
-                          taicpu(p).oper[4]^.val := min(taicpu(p).oper[4]^.val,taicpu(next1).oper[4]^.val);
-                          asml.remove(next1);
-                          next1.free;
-                          result := true;
-                        end;
+                      l1 := rlwinm2mask(taicpu(p).oper[3]^.val,taicpu(p).oper[4]^.val);
+                      l2 := rlwinm2mask(taicpu(next1).oper[3]^.val,taicpu(next1).oper[4]^.val);
+                      l1 := l1 and l2;
+                      case l1 of
+                        -1:
+                          begin
+                            asml.remove(next1);
+                            next1.free;
+                            if (taicpu(p).oper[2]^.val = 0) then
+                              begin
+                                next1 := tai(p.next);
+                                asml.remove(p);
+                                p.free;
+                                p := next1;
+                                result := true;
+                              end;
+                          end;
+                        0:
+                          begin
+                            // masks have no bits in common
+                            taicpu(p).opcode := A_LI;
+                            taicpu(p).loadconst(1,0);
+                            taicpu(p).clearop(2);
+                            taicpu(p).clearop(3);
+                            taicpu(p).clearop(4);
+                            taicpu(p).ops := 2;
+                            taicpu(p).opercnt := 2;
+                            asml.remove(next1);
+                            next1.free;
+                            result := true;
+                          end
+                        else if tcgppc(cg).get_rlwi_const(l1,l1,l2) then
+                          begin
+                            taicpu(p).oper[3]^.val := l1;
+                            taicpu(p).oper[4]^.val := l2;
+                            asml.remove(next1);
+                            next1.free;
+                            result := true;
+                          end;
+                      end;
                     end;
                 end;
             end;
@@ -395,8 +414,8 @@ const
                       begin
                         taicpu(p).opcode := A_ANDIS_;
                         taicpu(p).oper[2]^.val :=
-                          ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) and
-                          not((1 shl (15-taicpu(p).oper[4]^.val)) - 1);
+                          ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) xor
+                          ((1 shl (15-taicpu(p).oper[4]^.val)) - 1);
                         taicpu(p).clearop(3);
                         taicpu(p).clearop(4);
                         taicpu(p).ops := 3;
@@ -406,9 +425,7 @@ const
                        (taicpu(p).oper[4]^.val >= 16) then
                       begin
                         taicpu(p).opcode := A_ANDI_;
-                        taicpu(p).oper[2]^.val :=
-                          ((1 shl (32-taicpu(p).oper[3]^.val)) - 1) and
-                          not((1 shl (31-taicpu(p).oper[4]^.val)) - 1);
+                        taicpu(p).oper[2]^.val := rlwinm2mask(taicpu(p).oper[3]^.val,taicpu(p).oper[4]^.val);
                         taicpu(p).clearop(3);
                         taicpu(p).clearop(4);
                         taicpu(p).ops := 3;