Bläddra i källkod

* i386: For integer comparisons with zero, emit "test $-1,%reg" instead of "test %reg,%reg". It is more spilling-friendly, because it transforms into "test $-1,spilltemp" and does not require a register.
* Improved peephole optimizer to remove these instructions if preceded by flag-setting instruction that operates on same location and followed by conditional jump, and change them back into "test %reg,%reg" otherwise.

git-svn-id: trunk@27617 -

sergei 11 år sedan
förälder
incheckning
2ee0c8de45
2 ändrade filer med 30 tillägg och 14 borttagningar
  1. 21 12
      compiler/i386/popt386.pas
  2. 9 2
      compiler/x86/nx86add.pas

+ 21 - 12
compiler/i386/popt386.pas

@@ -2292,6 +2292,7 @@ end;
 procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
 var
   p,hp1,hp2: tai;
+  IsTestConstX: boolean;
 begin
   p := BlockStart;
   while (p <> BlockEnd) Do
@@ -2397,22 +2398,22 @@ See test/tgadint64 in the test suite.
               A_TEST, A_OR:
                 {removes the line marked with (x) from the sequence
                  and/or/xor/add/sub/... $x, %y
-                 test/or %y, %y   (x)
+                 test/or %y, %y  | test $-1, %y    (x)
                  j(n)z _Label
-                    as the first instruction already adjusts the ZF}
+                    as the first instruction already adjusts the ZF
+                    %y operand may also be a reference }
                  begin
-                   if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
-                    if GetLastInstruction(p, hp1) and
+                   IsTestConstX:=(taicpu(p).opcode=A_TEST) and
+                     MatchOperand(taicpu(p).oper[0]^,-1);
+                   if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
+                      GetLastInstruction(p, hp1) and
                       (tai(hp1).typ = ait_instruction) and
                       GetNextInstruction(p,hp2) and
-                      (hp2.typ = ait_instruction) and
-                      ((taicpu(hp2).opcode = A_SETcc) or
-                       (taicpu(hp2).opcode = A_Jcc) or
-                       (taicpu(hp2).opcode = A_CMOVcc)) then
+                      MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
                      case taicpu(hp1).opcode Of
                        A_ADD, A_SUB, A_OR, A_XOR, A_AND:
                          begin
-                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
+                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
                              { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
                              { and in case of carry for A(E)/B(E)/C/NC                  }
                               ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
@@ -2428,7 +2429,7 @@ See test/tgadint64 in the test suite.
                          end;
                        A_SHL, A_SAL, A_SHR, A_SAR:
                          begin
-                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
+                           if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
                              { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
                              { therefore, it's only safe to do this optimization for     }
                              { shifts by a (nonzero) constant                            }
@@ -2447,7 +2448,7 @@ See test/tgadint64 in the test suite.
                          end;
                        A_DEC, A_INC, A_NEG:
                          begin
-                           if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
+                           if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
                              { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
                              { and in case of carry for A(E)/B(E)/C/NC                  }
                              (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
@@ -2472,7 +2473,15 @@ See test/tgadint64 in the test suite.
                                continue
                              end;
                          end
-                     end
+                     else
+                       { change "test  $-1,%reg" into "test %reg,%reg" }
+                       if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
+                         taicpu(p).loadoper(0,taicpu(p).oper[1]^);
+                     end { case }
+                   else
+                     { change "test  $-1,%reg" into "test %reg,%reg" }
+                     if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
+                       taicpu(p).loadoper(0,taicpu(p).oper[1]^);
                  end;
             end;
           end;

+ 9 - 2
compiler/x86/nx86add.pas

@@ -66,7 +66,7 @@ unit nx86add;
   implementation
 
     uses
-      globtype,globals,
+      globtype,globals,systems,
       verbose,cutils,
       cpuinfo,
       aasmbase,aasmtai,aasmdata,aasmcpu,
@@ -131,7 +131,14 @@ unit nx86add;
                   (right.location.loc=LOC_CONSTANT) and
                   (right.location.value=0) then
                  begin
-                   emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
+                { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
+                   spilling, while 'test %reg,%reg' still requires loading into register.
+                   If spilling is not necessary, it is changed back into 'test %reg,%reg' by
+                   peephole optimizer (this optimization is currently available only for i386). }
+                   if (target_info.cpu=cpu_i386) then
+                     emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
+                   else  
+                     emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
                  end
                else
                  if (op=A_ADD) and