Browse Source

Apply patch proposed by J. Gareth Moreton in bug report #0038527

  The patch reworks the LeaLea2Lea optimisation and hopefully fixes the bug (admittedly by adding a brand new optimisation!).

git-svn-id: trunk@48792 -
pierre 4 years ago
parent
commit
2cd6951205
3 changed files with 166 additions and 53 deletions
  1. 1 0
      .gitattributes
  2. 150 53
      compiler/x86/aoptx86.pas
  3. 15 0
      tests/webtbs/tw38527.pp

+ 1 - 0
.gitattributes

@@ -18675,6 +18675,7 @@ tests/webtbs/tw38412.pp svneol=native#text/pascal
 tests/webtbs/tw38413.pp svneol=native#text/pascal
 tests/webtbs/tw38413.pp svneol=native#text/pascal
 tests/webtbs/tw38429.pp svneol=native#text/pascal
 tests/webtbs/tw38429.pp svneol=native#text/pascal
 tests/webtbs/tw38497.pp svneol=native#text/pascal
 tests/webtbs/tw38497.pp svneol=native#text/pascal
+tests/webtbs/tw38527.pp svneol=native#text/plain
 tests/webtbs/tw3863.pp svneol=native#text/plain
 tests/webtbs/tw3863.pp svneol=native#text/plain
 tests/webtbs/tw3864.pp svneol=native#text/plain
 tests/webtbs/tw3864.pp svneol=native#text/plain
 tests/webtbs/tw3865.pp svneol=native#text/plain
 tests/webtbs/tw3865.pp svneol=native#text/plain

+ 150 - 53
compiler/x86/aoptx86.pas

@@ -3360,67 +3360,164 @@ unit aoptx86;
         if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
         if (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
           GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
           GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) then
           begin
           begin
-            { changes
-                lea offset1(regX), reg1
-                lea offset2(reg1), reg1
-                to
-                lea offset1+offset2(regX), reg1 }
-
+            { Check common LEA/LEA conditions }
             if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
             if MatchInstruction(hp1,A_LEA,[taicpu(p).opsize]) and
-              MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
-              (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
-              (taicpu(p).oper[0]^.ref^.segment=NR_NO) and
-              (taicpu(p).oper[0]^.ref^.symbol=nil) and
-              (((taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
-                (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
-                (taicpu(p).oper[0]^.ref^.index=NR_NO) and
-                (taicpu(p).oper[0]^.ref^.index=taicpu(hp1).oper[0]^.ref^.index) and
-                (taicpu(p).oper[0]^.ref^.scalefactor=taicpu(hp1).oper[0]^.ref^.scalefactor)
-               ) or
-               ((taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg) and
-                (taicpu(p).oper[0]^.ref^.index=NR_NO)
-               ) or
-               ((taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg) and
-                (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
-                ((taicpu(p).oper[0]^.ref^.base=NR_NO) or
-                 ((taicpu(p).oper[0]^.ref^.base=taicpu(p).oper[0]^.ref^.base) and
-                  (taicpu(p).oper[0]^.ref^.index=NR_NO)
-                 )
-                ) and
-                not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index,p,hp1)))
-              ) and
-              not(RegUsedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1)) and
-              (taicpu(p).oper[0]^.ref^.relsymbol=taicpu(hp1).oper[0]^.ref^.relsymbol) and
-              (taicpu(p).oper[0]^.ref^.segment=taicpu(hp1).oper[0]^.ref^.segment) and
-              (taicpu(p).oper[0]^.ref^.symbol=taicpu(hp1).oper[0]^.ref^.symbol) then
+              (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) and
+              (taicpu(p).oper[0]^.ref^.relsymbol = nil) and
+              (taicpu(p).oper[0]^.ref^.segment = NR_NO) and
+              (taicpu(p).oper[0]^.ref^.symbol = nil) and
+              (taicpu(hp1).oper[0]^.ref^.relsymbol = nil) and
+              (taicpu(hp1).oper[0]^.ref^.segment = NR_NO) and
+              (taicpu(hp1).oper[0]^.ref^.symbol = nil) and
+              (
+                (taicpu(p).oper[0]^.ref^.base = NR_NO) or { Don't call RegUsedBetween unnecessarily }
+                not(RegUsedBetween(taicpu(p).oper[0]^.ref^.base,p,hp1))
+              ) then
               begin
               begin
-                DebugMsg(SPeepholeOptimization + 'LeaLea2Lea done',p);
-                if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
+                { changes
+                    lea (regX,scale), reg1
+                    lea offset(reg1,reg1), reg1
+                    to
+                    lea offset(regX,scale*2), reg1
+
+                  and
+                    lea (regX,scale1), reg1
+                    lea offset(reg1,scale2), reg1
+                    to
+                    lea offset(regX,scale1*scale2), reg1
+
+                  ... so long as the final scale does not exceed 8
+
+                  (Similarly, allow the first instruction to be "lea (regX,regX),reg1")
+                  }
+                if (taicpu(p).oper[0]^.ref^.offset = 0) and
+                  (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
+                  (
+                    (
+                      (taicpu(p).oper[0]^.ref^.base = NR_NO)
+                    ) or (
+                      (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
+                      (
+                        (taicpu(p).oper[0]^.ref^.base = taicpu(p).oper[0]^.ref^.index) and
+                        not(RegUsedBetween(taicpu(p).oper[0]^.ref^.index, p, hp1))
+                      )
+                    )
+                  ) and (
+                    (
+                      { lea (reg1,scale2), reg1 variant }
+                      (taicpu(hp1).oper[0]^.ref^.base = NR_NO) and
+                      (
+                        (
+                          (taicpu(p).oper[0]^.ref^.base = NR_NO) and
+                          (taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor <= 8)
+                        ) or (
+                          { lea (regX,regX), reg1 variant }
+                          (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
+                          (taicpu(hp1).oper[0]^.ref^.scalefactor <= 4)
+                        )
+                      )
+                    ) or (
+                      { lea (reg1,reg1), reg1 variant }
+                      (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
+                      (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1)
+                    )
+                  ) then
                   begin
                   begin
-                    taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
-                    inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
-                    { if the register is used as index and base, we have to increase for base as well
-                      and adapt base }
-                    if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
+                    DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 2 done',p);
+
+                    { Make everything homogeneous to make calculations easier }
+                    if (taicpu(p).oper[0]^.ref^.base <> NR_NO) then
                       begin
                       begin
-                        taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
-                        inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
+                        if taicpu(p).oper[0]^.ref^.index <> NR_NO then
+                          { Convert lea (regX,regX),reg1 to lea (regX,2),reg1 }
+                          taicpu(p).oper[0]^.ref^.scalefactor := 2
+                        else
+                          taicpu(p).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.base;
+
+                        taicpu(p).oper[0]^.ref^.base := NR_NO;
+                      end;
+
+                    if (taicpu(hp1).oper[0]^.ref^.base = NR_NO) then
+                      begin
+                        { Just to prevent miscalculations }
+                        if (taicpu(hp1).oper[0]^.ref^.scalefactor = 0) then
+                          taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor
+                        else
+                          taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(hp1).oper[0]^.ref^.scalefactor * taicpu(p).oper[0]^.ref^.scalefactor;
+                      end
+                    else
+                      begin
+                        taicpu(hp1).oper[0]^.ref^.base := NR_NO;
+                        taicpu(hp1).oper[0]^.ref^.scalefactor := taicpu(p).oper[0]^.ref^.scalefactor * 2;
                       end;
                       end;
+
+                    taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.ref^.index;
+                    RemoveCurrentP(p);
+                    result:=true;
+                    exit;
                   end
                   end
-                else
-                  begin
-                    inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
-                    taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
-                  end;
-                if taicpu(p).oper[0]^.ref^.index<>NR_NO then
+
+                { changes
+                    lea offset1(regX), reg1
+                    lea offset2(reg1), reg1
+                    to
+                    lea offset1+offset2(regX), reg1 }
+                else if
+                  (
+                    (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) and
+                    (taicpu(p).oper[0]^.ref^.index = NR_NO)
+                  ) or (
+                    (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) and
+                    (taicpu(hp1).oper[0]^.ref^.scalefactor <= 1) and
+                    (
+                      (
+                        (taicpu(p).oper[0]^.ref^.index = NR_NO) or
+                        (taicpu(p).oper[0]^.ref^.base = NR_NO)
+                      ) or (
+                        (taicpu(p).oper[0]^.ref^.scalefactor <= 1) and
+                        (
+                          (taicpu(p).oper[0]^.ref^.index = NR_NO) or
+                          (
+                            (taicpu(p).oper[0]^.ref^.index = taicpu(p).oper[0]^.ref^.base) and
+                            (
+                              (taicpu(hp1).oper[0]^.ref^.index = NR_NO) or
+                              (taicpu(hp1).oper[0]^.ref^.base = NR_NO)
+                            )
+                          )
+                        )
+                      )
+                    )
+                  ) then
                   begin
                   begin
-                    taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
-                    taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
-                    taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
+                    DebugMsg(SPeepholeOptimization + 'LeaLea2Lea 1 done',p);
+
+                    if taicpu(hp1).oper[0]^.ref^.index=taicpu(p).oper[1]^.reg then
+                      begin
+                        taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.base;
+                        inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset*max(taicpu(hp1).oper[0]^.ref^.scalefactor,1));
+                        { if the register is used as index and base, we have to increase for base as well
+                          and adapt base }
+                        if taicpu(hp1).oper[0]^.ref^.base=taicpu(p).oper[1]^.reg then
+                          begin
+                            taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
+                            inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
+                          end;
+                      end
+                    else
+                      begin
+                        inc(taicpu(hp1).oper[0]^.ref^.offset,taicpu(p).oper[0]^.ref^.offset);
+                        taicpu(hp1).oper[0]^.ref^.base:=taicpu(p).oper[0]^.ref^.base;
+                      end;
+                    if taicpu(p).oper[0]^.ref^.index<>NR_NO then
+                      begin
+                        taicpu(hp1).oper[0]^.ref^.base:=taicpu(hp1).oper[0]^.ref^.index;
+                        taicpu(hp1).oper[0]^.ref^.index:=taicpu(p).oper[0]^.ref^.index;
+                        taicpu(hp1).oper[0]^.ref^.scalefactor:=taicpu(p).oper[0]^.ref^.scalefactor;
+                      end;
+                    RemoveCurrentP(p);
+                    result:=true;
+                    exit;
                   end;
                   end;
-                RemoveCurrentP(p);
-                result:=true;
-                exit;
               end;
               end;
 
 
             { Change:
             { Change:

+ 15 - 0
tests/webtbs/tw38527.pp

@@ -0,0 +1,15 @@
+{%OPT=-O2}
+
+{$mode objfpc}
+
+function F(n: SizeUint): SizeUint;
+begin
+    result := 4 * n + 4 * n;
+end;
+
+begin
+    writeln('Reference F(5): ', 4 * 5 + 4 * 5);
+    writeln(' Actual F(5): ', F(5));
+    if (F(5) <> 40) then
+      halt(1);
+end.