Browse Source

Look ahead more than one instruction in FoldShiftProcess for ARM

Up until now we only checked the next instruction, with the new load
scheduler this is insufficient as shift-instructions and next usage
might farther apart.

The new version uses GetNextInstructionUsingReg, this also comes with a
price as we very carefully have to check if one of the used registers is
changed and that the usage of RRX will not break when we fold and flags
get changed in between.

git-svn-id: trunk@22876 -
masta 12 years ago
parent
commit
3a017f76d0
1 changed files with 63 additions and 46 deletions
  1. 63 46
      compiler/arm/aoptcpu.pas

+ 63 - 46
compiler/arm/aoptcpu.pas

@@ -956,72 +956,89 @@ Implementation
                        (taicpu(p).oper[1]^.typ = top_reg) and
                        (taicpu(p).oper[2]^.typ = top_shifterop) and
                        (taicpu(p).oppostfix = PF_NONE) and
-                       GetNextInstruction(p, hp1) and
+                       GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        MatchInstruction(hp1, [A_ADD, A_ADC, A_RSB, A_RSC, A_SUB, A_SBC,
                                               A_AND, A_BIC, A_EOR, A_ORR, A_TEQ, A_TST,
                                               A_CMP, A_CMN],
                                         [taicpu(p).condition], [PF_None]) and
-                       (taicpu(hp1).ops >= 2) and {Currently we can't fold into another shifterop}
+                       (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
+                         regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and
+                       (taicpu(hp1).ops >= 2) and
+                       {Currently we can't fold into another shifterop}
                        (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
+                       {Folding rrx is problematic because of the C-Flag, as we currently can't check
+                        NR_DEFAULTFLAGS for modification}
+                       (
+                         {Everything is fine if we don't use RRX}
+                         (taicpu(p).oper[2]^.shifterop^.shiftmode <> SM_RRX) or
+                         (
+                           {If it is RRX, then check if we're just accessing the next instruction}
+                           GetNextInstruction(p, hp2) and
+                           (hp1 = hp2)
+                         )
+                       ) and
+                       { reg1 might not be modified inbetween }
+                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+                       { The shifterop can contain a register, might not be modified}
+                       (
+                         (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) or
+                         not(RegModifiedBetween(taicpu(p).oper[2]^.shifterop^.rs, p, hp1))
+                       ) and
                        (
                          {Only ONE of the two src operands is allowed to match}
                          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
                          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
                        ) then
                       begin
-                        CopyUsedRegs(TmpUsedRegs);
-                        UpdateUsedRegs(TmpUsedRegs, tai(p.next));
                         if taicpu(hp1).opcode in [A_TST, A_TEQ, A_CMN] then
                           I2:=0
                         else
                           I2:=1;
-                        if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp1,TmpUsedRegs)) then
-                          for I:=I2 to taicpu(hp1).ops-1 do
-                            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
-                              begin
-                                { If the parameter matched on the second op from the RIGHT
-                                  we have to switch the parameters, this will not happen for CMP
-                                  were we're only evaluating the most right parameter
-                                }
-                                if I <> taicpu(hp1).ops-1 then
-                                  begin
-                                    {The SUB operators need to be changed when we swap parameters}
-                                    case taicpu(hp1).opcode of
-                                      A_SUB: tempop:=A_RSB;
-                                      A_SBC: tempop:=A_RSC;
-                                      A_RSB: tempop:=A_SUB;
-                                      A_RSC: tempop:=A_SBC;
-                                      else tempop:=taicpu(hp1).opcode;
-                                    end;
-                                    if taicpu(hp1).ops = 3 then
-                                      hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
-                                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
-                                           taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
-                                    else
-                                      hp2:=taicpu.op_reg_reg_shifterop(tempop,
-                                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
-                                           taicpu(p).oper[2]^.shifterop^);
-                                  end
-                                else
+                        for I:=I2 to taicpu(hp1).ops-1 do
+                          if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
+                            begin
+                              { If the parameter matched on the second op from the RIGHT
+                                we have to switch the parameters, this will not happen for CMP
+                                were we're only evaluating the most right parameter
+                              }
+                              if I <> taicpu(hp1).ops-1 then
+                                begin
+                                  {The SUB operators need to be changed when we swap parameters}
+                                  case taicpu(hp1).opcode of
+                                    A_SUB: tempop:=A_RSB;
+                                    A_SBC: tempop:=A_RSC;
+                                    A_RSB: tempop:=A_SUB;
+                                    A_RSC: tempop:=A_SBC;
+                                    else tempop:=taicpu(hp1).opcode;
+                                  end;
                                   if taicpu(hp1).ops = 3 then
-                                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
-                                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                                    hp2:=taicpu.op_reg_reg_reg_shifterop(tempop,
+                                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
                                          taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
                                   else
-                                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                                    hp2:=taicpu.op_reg_reg_shifterop(tempop,
                                          taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                          taicpu(p).oper[2]^.shifterop^);
-                                asml.insertbefore(hp2, p);
-                                asml.remove(p);
-                                asml.remove(hp1);
-                                p.free;
-                                hp1.free;
-                                p:=hp2;
-                                GetNextInstruction(p,hp1);
-                                DebugMsg('Peephole FoldShiftProcess done', p);
-                                break;
-                              end;
-                        ReleaseUsedRegs(TmpUsedRegs);
+                                end
+                              else
+                                if taicpu(hp1).ops = 3 then
+                                  hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                                       taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                                       taicpu(p).oper[1]^.reg, taicpu(p).oper[2]^.shifterop^)
+                                else
+                                  hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                                       taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                                       taicpu(p).oper[2]^.shifterop^);
+                              asml.insertbefore(hp2, hp1);
+                              asml.remove(p);
+                              asml.remove(hp1);
+                              p.free;
+                              hp1.free;
+                              p:=hp2;
+                              GetNextInstruction(p,hp1);
+                              DebugMsg('Peephole FoldShiftProcess done', p);
+                              break;
+                            end;
                       end;
 
                     {