2
0
Эх сурвалжийг харах

+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs

Jonas Maebe 25 жил өмнө
parent
commit
a4fde73649

+ 1 - 1
compiler/errore.msg

@@ -1870,7 +1870,7 @@ option_help_pages=11025_[
 3*2Ou_enable uncertain optimizations (see docs)
 3*2Ou_enable uncertain optimizations (see docs)
 3*2O1_level 1 optimizations (quick optimizations)
 3*2O1_level 1 optimizations (quick optimizations)
 3*2O2_level 2 optimizations (-O1 + slower optimizations)
 3*2O2_level 2 optimizations (-O1 + slower optimizations)
-3*2O3_level 3 optimizations (same as -O2u)
+3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)
 3*2Op<x>_target processor:
 3*2Op<x>_target processor:
 3*3Op1_set target processor to 386/486
 3*3Op1_set target processor to 386/486
 3*3Op2_set target processor to Pentium/PentiumMMX (tm)
 3*3Op2_set target processor to Pentium/PentiumMMX (tm)

+ 37 - 14
compiler/i386/aopt386.pas

@@ -43,15 +43,20 @@ Uses
 
 
 Procedure Optimize(AsmL: PAasmOutput);
 Procedure Optimize(AsmL: PAasmOutput);
 Var
 Var
-  count, max: longint;
   BlockStart, BlockEnd, HP: Pai;
   BlockStart, BlockEnd, HP: Pai;
+  pass: longint;
+  slowopt, changed, lastLoop: boolean;
 Begin
 Begin
-  if (cs_slowoptimize in aktglobalswitches) then
-   { Optimize twice }
-    max := 2
-  else max := 1;
-  for count := 1 to max do
-    begin
+  slowopt := (cs_slowoptimize in aktglobalswitches);
+  pass := 0;
+  changed := false;
+  repeat
+     lastLoop :=
+       not(slowopt) or
+       (not changed and (pass > 2)) or
+      { prevent endless loops }
+       (pass = 4);
+     changed := false;
    { Setup labeltable, always necessary }
    { Setup labeltable, always necessary }
      BlockStart := Pai(AsmL^.First);
      BlockStart := Pai(AsmL^.First);
      BlockEnd := DFAPass1(AsmL, BlockStart);
      BlockEnd := DFAPass1(AsmL, BlockStart);
@@ -59,13 +64,15 @@ Begin
    { or nil                                                                }
    { or nil                                                                }
      While Assigned(BlockStart) Do
      While Assigned(BlockStart) Do
        Begin
        Begin
+         if pass = 0 then
+           PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Peephole optimizations }
         { Peephole optimizations }
          PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
          PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Only perform them twice in the first pass }
         { Only perform them twice in the first pass }
-         if count = 1 then
+         if pass = 0 then
            PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
            PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Data flow analyzer }
         { Data flow analyzer }
-         If (cs_slowoptimize in aktglobalswitches) Then
+         If (cs_fastoptimize in aktglobalswitches) Then
            Begin
            Begin
              If DFAPass2(
              If DFAPass2(
 {$ifdef statedebug}
 {$ifdef statedebug}
@@ -73,10 +80,12 @@ Begin
 {$endif statedebug}
 {$endif statedebug}
                                BlockStart, BlockEnd) Then
                                BlockStart, BlockEnd) Then
               { common subexpression elimination }
               { common subexpression elimination }
-               CSE(AsmL, BlockStart, BlockEnd);
+               changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
            End;
            End;
         { More peephole optimizations }
         { More peephole optimizations }
          PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
          PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
+         if lastLoop then
+           PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Dispose labeltabel }
         { Dispose labeltabel }
          ShutDownDFA;
          ShutDownDFA;
         { Continue where we left off, BlockEnd is either the start of an }
         { Continue where we left off, BlockEnd is either the start of an }
@@ -100,15 +109,29 @@ Begin
                BlockEnd := DFAPass1(AsmL, BlockStart)
                BlockEnd := DFAPass1(AsmL, BlockStart)
              { Otherwise, skip the next assembler block }
              { Otherwise, skip the next assembler block }
              Else BlockStart := HP;
              Else BlockStart := HP;
-           End
-      End;
-   end;
+           End;
+       End;
+     inc(pass);
+  until lastLoop;
 End;
 End;
 
 
 End.
 End.
 {
 {
   $Log$
   $Log$
-  Revision 1.1  2000-10-15 09:47:42  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:42  peter
     * moved to i386/
     * moved to i386/
 
 
   Revision 1.5  2000/09/24 15:06:11  peter
   Revision 1.5  2000/09/24 15:06:11  peter

+ 210 - 154
compiler/i386/csopt386.pas

@@ -27,16 +27,21 @@ Unit CSOpt386;
 
 
 Interface
 Interface
 
 
-Uses aasm;
+Uses aasm, cpubase, cpuasm;
 
 
-{Procedure CSOpt386(First, Last: Pai);}
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(asmL: paasmoutput; first, last: pai; pass: longint): boolean;
+
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
+function RegSizesOK(oldReg,newReg: TRegister; p: paicpu): boolean;
 
 
 Implementation
 Implementation
 
 
 Uses
 Uses
   {$ifdef replaceregdebug}cutils,{$endif}
   {$ifdef replaceregdebug}cutils,{$endif}
-  verbose, hcodegen, globals,cpubase,cpuasm,DAOpt386, tgeni386;
+  globtype, verbose, hcodegen, globals, daopt386, tgeni386, rropt386;
 
 
 {
 {
 Function PaiInSequence(P: Pai; Const Seq: TContent): Boolean;
 Function PaiInSequence(P: Pai; Const Seq: TContent): Boolean;
@@ -83,7 +88,7 @@ begin
               end;
               end;
           end
           end
        else
        else
-        if is_reg_var[reg32(p^.oper[1].reg)] then
+{         if is_reg_var[reg32(p^.oper[1].reg)] then }
           for regCounter := R_EAX to R_EDI do
           for regCounter := R_EAX to R_EDI do
             begin
             begin
               if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
               if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
@@ -130,9 +135,9 @@ begin
       for opCount := 1 to MaxCh do
       for opCount := 1 to MaxCh do
         case InsProp[p^.opcode].Ch[opCount] of
         case InsProp[p^.opcode].Ch[opCount] of
           Ch_MOp1,CH_WOp1,CH_RWOp1:
           Ch_MOp1,CH_WOp1,CH_RWOp1:
-            if (p^.oper[0].typ = top_ref) or
-               ((p^.oper[0].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[0].reg)]) then
+{             if (p^.oper[0].typ = top_ref) or }
+{                ((p^.oper[0].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[0].reg)]) then }
               for regCounter := R_EAX to R_EDI do
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[0],regCounter,c[regCounter]) then
                 if writeDestroysContents(p^.oper[0],regCounter,c[regCounter]) then
                   begin
                   begin
@@ -140,9 +145,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
                   end;
           Ch_MOp2,CH_WOp2,CH_RWOp2:
           Ch_MOp2,CH_WOp2,CH_RWOp2:
-            if (p^.oper[1].typ = top_ref) or
-               ((p^.oper[1].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[1].reg)]) then
+{             if (p^.oper[1].typ = top_ref) or }
+{                ((p^.oper[1].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[1].reg)]) then }
               for regCounter := R_EAX to R_EDI do
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
                 if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
                   begin
                   begin
@@ -150,9 +155,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
                   end;
           Ch_MOp3,CH_WOp3,CH_RWOp3:
           Ch_MOp3,CH_WOp3,CH_RWOp3:
-            if (p^.oper[2].typ = top_ref) or
-               ((p^.oper[2].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[2].reg)]) then
+{             if (p^.oper[2].typ = top_ref) or }
+{                ((p^.oper[2].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[2].reg)]) then }
               for regCounter := R_EAX to R_EDI do
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[2],regCounter,c[regCounter]) then
                 if writeDestroysContents(p^.oper[2],regCounter,c[regCounter]) then
                   begin
                   begin
@@ -290,7 +295,7 @@ end;
  Found holds the number of instructions between StartMod and EndMod and false
  Found holds the number of instructions between StartMod and EndMod and false
  is returned}
  is returned}
 Function CheckSequence(p: Pai; var prev: pai; Reg: TRegister; Var Found: Longint;
 Function CheckSequence(p: Pai; var prev: pai; Reg: TRegister; Var Found: Longint;
-           Var RegInfo: TRegInfo): Boolean;
+           Var RegInfo: TRegInfo; findPrevSeqs: boolean): Boolean;
 
 
 const
 const
   checkingPrevSequences: boolean = false;
   checkingPrevSequences: boolean = false;
@@ -310,7 +315,8 @@ var
                  in [con_ref,con_noRemoveRef]);
                  in [con_ref,con_noRemoveRef]);
         if currentReg > R_EDI then
         if currentReg > R_EDI then
           begin
           begin
-            if isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
+            if (paicpu(p)^.oper[0].typ <> top_ref) or
+               isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
               begin
               begin
                 checkingPrevSequences := true;
                 checkingPrevSequences := true;
                 passedJump := false;
                 passedJump := false;
@@ -321,15 +327,19 @@ var
         else getNextRegToTest := currentReg;
         else getNextRegToTest := currentReg;
       end;
       end;
     if checkingPrevSequences then
     if checkingPrevSequences then
-      getNextRegToTest :=
-        getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid);
+      if findPrevSeqs then
+        getNextRegToTest :=
+          getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid)
+      else
+        getNextRegToTest := R_NO;
   end;
   end;
 
 
 Var hp2, hp3{, EndMod},highPrev, orgPrev: Pai;
 Var hp2, hp3{, EndMod},highPrev, orgPrev: Pai;
     {Cnt,} OldNrOfMods: Longint;
     {Cnt,} OldNrOfMods: Longint;
     startRegInfo, OrgRegInfo, HighRegInfo: TRegInfo;
     startRegInfo, OrgRegInfo, HighRegInfo: TRegInfo;
+    regModified: array[R_NO..R_EDI] of boolean;
     HighFound, OrgRegFound: Byte;
     HighFound, OrgRegFound: Byte;
-    RegCounter, regCounter2: TRegister;
+    RegCounter, regCounter2, tmpreg: TRegister;
     OrgRegResult: Boolean;
     OrgRegResult: Boolean;
     TmpResult: Boolean;
     TmpResult: Boolean;
     {TmpState: Byte;}
     {TmpState: Byte;}
@@ -356,6 +366,7 @@ Begin {CheckSequence}
   regCounter := getNextRegToTest(prev,R_NO);
   regCounter := getNextRegToTest(prev,R_NO);
   While (RegCounter <> R_NO) Do
   While (RegCounter <> R_NO) Do
     Begin
     Begin
+      fillchar(regModified,sizeof(regModified),0);
       regInfo := startRegInfo;
       regInfo := startRegInfo;
       Found := 0;
       Found := 0;
       hp2 := PPaiProp(prev^.OptInfo)^.Regs[RegCounter].StartMod;
       hp2 := PPaiProp(prev^.OptInfo)^.Regs[RegCounter].StartMod;
@@ -371,13 +382,34 @@ Begin {CheckSequence}
              ((paicpu(hp3)^.opcode = A_MOV) or
              ((paicpu(hp3)^.opcode = A_MOV) or
               (paicpu(hp3)^.opcode = A_MOVZX) or
               (paicpu(hp3)^.opcode = A_MOVZX) or
               (paicpu(hp3)^.opcode = A_MOVSX)) and
               (paicpu(hp3)^.opcode = A_MOVSX)) and
-             (paicpu(hp3)^.oper[0].typ in
-               [top_const,top_ref,top_symbol]) and
              (paicpu(hp3)^.oper[1].typ = top_reg) and
              (paicpu(hp3)^.oper[1].typ = top_reg) and
-             not(regInRef(reg32(paicpu(hp3)^.oper[1].reg),
-                   paicpu(hp3)^.oper[0].ref^)) then
-            regInfo.lastReload
-              [reg32(paicpu(hp3)^.oper[1].reg)] := hp3;
+             not(regInOp(paicpu(hp3)^.oper[1].reg,
+                   paicpu(hp3)^.oper[0])) then
+            begin
+              tmpreg := reg32(paicpu(hp3)^.oper[1].reg);
+              regInfo.lastReload[tmpreg] := hp3;
+              case paicpu(hp3)^.oper[0].typ of
+                top_ref:
+                  begin
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.base)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.index)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  end;
+                top_reg:
+                  if regModified[reg32(paicpu(hp3)^.oper[0].reg)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+              end;
+            end;
+          for regCounter2 := R_EAX to R_EDI do
+            regModified[regCounter2] := regModified[regCounter2] or
+              regModifiedByInstruction(regCounter2,hp3);
           GetNextInstruction(hp2, hp2);
           GetNextInstruction(hp2, hp2);
           GetNextInstruction(hp3, hp3);
           GetNextInstruction(hp3, hp3);
           Inc(Found)
           Inc(Found)
@@ -674,40 +706,6 @@ begin
 end;
 end;
 
 
 
 
-function FindRegDealloc(reg: tregister; p: pai): boolean;
-{ assumes reg is a 32bit register }
-var
-  hp: pai;
-  first: boolean;
-begin
-  findregdealloc := false;
-  first := true;
-  while assigned(p^.previous) and
-        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
-         ((Pai(p^.previous)^.typ = ait_label) and
-          labelCanBeSkipped(pai_label(p^.previous)))) do
-    begin
-      p := pai(p^.previous);
-      if (p^.typ = ait_regalloc) and
-         (pairegalloc(p)^.reg = reg) then
-        if not(pairegalloc(p)^.allocation) then
-          if first then
-            begin
-              findregdealloc := true;
-              break;
-            end
-          else
-            begin
-              findRegDealloc :=
-                getNextInstruction(p,hp) and
-                 regLoadedWithNewValue(reg,false,hp);
-              break
-            end
-        else
-          first := false;
-    end
-end;
-
 Procedure ClearRegContentsFrom(reg: TRegister; p, endP: pai);
 Procedure ClearRegContentsFrom(reg: TRegister; p, endP: pai);
 { first clears the contents of reg from p till endP. Then the contents are }
 { first clears the contents of reg from p till endP. Then the contents are }
 { cleared until the first instruction that changes reg                     }
 { cleared until the first instruction that changes reg                     }
@@ -753,7 +751,7 @@ begin
 {$endif replaceregdebug}
 {$endif replaceregdebug}
 end;
 end;
 
 
-function NoHardCodedRegs(p: paicpu; orgReg, newReg: tRegister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
 var chCount: byte;
 var chCount: byte;
 begin
 begin
   case p^.opcode of
   case p^.opcode of
@@ -775,7 +773,7 @@ begin
   end;
   end;
 end;
 end;
 
 
-function ChangeReg(var Reg: TRegister; orgReg, newReg: TRegister): boolean;
+function ChangeReg(var Reg: TRegister; newReg, orgReg: TRegister): boolean;
 begin
 begin
   changeReg := true;
   changeReg := true;
   if reg = newReg then
   if reg = newReg then
@@ -787,15 +785,15 @@ begin
   else changeReg := false;
   else changeReg := false;
 end;
 end;
 
 
-function changeOp(var o: toper; orgReg, newReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
 begin
 begin
   case o.typ of
   case o.typ of
-    top_reg: changeOp := changeReg(o.reg,orgReg,newReg);
+    top_reg: changeOp := changeReg(o.reg,newReg,orgReg);
     top_ref:
     top_ref:
       begin
       begin
         changeOp :=
         changeOp :=
-          changeReg(o.ref^.base,orgReg,newReg) or
-          changeReg(o.ref^.index,orgReg,newReg);
+          changeReg(o.ref^.base,newReg,orgReg) or
+          changeReg(o.ref^.index,newReg,orgReg);
       end;
       end;
   end;
   end;
 end;
 end;
@@ -829,14 +827,14 @@ begin
     end;
     end;
 end;
 end;
 
 
-function doReplaceReg(orgReg,newReg: tregister; hp: paicpu): boolean;
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
 var
 var
-  opCount: byte;
+  opCount: longint;
   tmpResult: boolean;
   tmpResult: boolean;
 begin
 begin
-  for opCount := 0 to 2 do
+  for opCount := 0 to hp^.ops-1 do
     tmpResult :=
     tmpResult :=
-      changeOp(hp^.oper[opCount],orgReg,newReg) or tmpResult;
+      changeOp(hp^.oper[opCount],newReg,orgReg) or tmpResult;
   doReplaceReg := tmpResult;
   doReplaceReg := tmpResult;
 end;
 end;
 
 
@@ -858,7 +856,7 @@ begin
     end;
     end;
 end;
 end;
 
 
-function doReplaceReadReg(orgReg,newReg: tregister; p: paicpu): boolean;
+function doReplaceReadReg(p: paicpu; newReg,orgReg: tregister): boolean;
 var opCount: byte;
 var opCount: byte;
 begin
 begin
   doReplaceReadReg := false;
   doReplaceReadReg := false;
@@ -870,13 +868,13 @@ begin
           1: internalerror(1301001);
           1: internalerror(1301001);
           2,3:
           2,3:
             begin
             begin
-              if changeOp(p^.oper[0],orgReg,newReg) then
+              if changeOp(p^.oper[0],newReg,orgReg) then
                 begin
                 begin
 {                  updateStates(orgReg,newReg,p,false);}
 {                  updateStates(orgReg,newReg,p,false);}
                   doReplaceReadReg := true;
                   doReplaceReadReg := true;
                 end;
                 end;
              if p^.ops = 3 then
              if p^.ops = 3 then
-                if changeOp(p^.oper[1],orgReg,newReg) then
+                if changeOp(p^.oper[1],newReg,orgReg) then
                   begin
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                     doReplaceReadReg := true;
@@ -889,7 +887,7 @@ begin
       begin
       begin
         for opCount := 0 to 2 do
         for opCount := 0 to 2 do
           if p^.oper[opCount].typ = top_ref then
           if p^.oper[opCount].typ = top_ref then
-            if changeOp(p^.oper[opCount],orgReg,newReg) then
+            if changeOp(p^.oper[opCount],newReg,orgReg) then
               begin
               begin
 {                updateStates(orgReg,newReg,p,false);}
 {                updateStates(orgReg,newReg,p,false);}
                 doReplaceReadReg := true;
                 doReplaceReadReg := true;
@@ -898,21 +896,21 @@ begin
           case InsProp[p^.opcode].Ch[opCount] of
           case InsProp[p^.opcode].Ch[opCount] of
             Ch_ROp1:
             Ch_ROp1:
               if p^.oper[0].typ = top_reg then
               if p^.oper[0].typ = top_reg then
-                if changeReg(p^.oper[0].reg,orgReg,newReg) then
+                if changeReg(p^.oper[0].reg,newReg,orgReg) then
                   begin
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                     doReplaceReadReg := true;
                   end;
                   end;
             Ch_ROp2:
             Ch_ROp2:
               if p^.oper[1].typ = top_reg then
               if p^.oper[1].typ = top_reg then
-                if changeReg(p^.oper[1].reg,orgReg,newReg) then
+                if changeReg(p^.oper[1].reg,newReg,orgReg) then
                   begin
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                     doReplaceReadReg := true;
                   end;
                   end;
             Ch_ROp3:
             Ch_ROp3:
               if p^.oper[2].typ = top_reg then
               if p^.oper[2].typ = top_reg then
-                if changeReg(p^.oper[2].reg,orgReg,newReg) then
+                if changeReg(p^.oper[2].reg,newReg,orgReg) then
                   begin
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                     doReplaceReadReg := true;
@@ -998,6 +996,20 @@ begin
 end;
 end;
 
 
 
 
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+{ returns true if p1 contains an instruction that stores the contents }
+{ of newReg back to orgReg                                            }
+begin
+  storeBack :=
+    (p1^.typ = ait_instruction) and
+    (paicpu(p1)^.opcode = A_MOV) and
+    (paicpu(p1)^.oper[0].typ = top_reg) and
+    (paicpu(p1)^.oper[0].reg = newReg) and
+    (paicpu(p1)^.oper[1].typ = top_reg) and
+    (paicpu(p1)^.oper[1].reg = orgReg);
+end;
+
+
 function ReplaceReg(asmL: PaasmOutput; orgReg, newReg: TRegister; p: pai;
 function ReplaceReg(asmL: PaasmOutput; orgReg, newReg: TRegister; p: pai;
            const c: TContent; orgRegCanBeModified: Boolean;
            const c: TContent; orgRegCanBeModified: Boolean;
            var returnEndP: pai): Boolean;
            var returnEndP: pai): Boolean;
@@ -1012,18 +1024,6 @@ var endP, hp: Pai;
     removeLast, sequenceEnd, tmpResult, newRegModified, orgRegRead,
     removeLast, sequenceEnd, tmpResult, newRegModified, orgRegRead,
       stateChanged, readStateChanged: Boolean;
       stateChanged, readStateChanged: Boolean;
 
 
-  function storeBack(p1: pai): boolean;
-  { returns true if p1 contains an instruction that stores the contents }
-  { of newReg back to orgReg                                            }
-  begin
-    storeBack :=
-      (p1^.typ = ait_instruction) and
-      (paicpu(p1)^.opcode = A_MOV) and
-      (paicpu(p1)^.oper[0].typ = top_reg) and
-      (paicpu(p1)^.oper[0].reg = newReg) and
-      (paicpu(p1)^.oper[1].typ = top_reg) and
-      (paicpu(p1)^.oper[1].reg = orgReg);
-  end;
 
 
 begin
 begin
   ReplaceReg := false;
   ReplaceReg := false;
@@ -1055,7 +1055,7 @@ begin
           { if the newReg gets stored back to the oldReg, we can change }
           { if the newReg gets stored back to the oldReg, we can change }
           { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
           { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
           { %oldReg" to "<operations on %oldReg>"                       }
           { %oldReg" to "<operations on %oldReg>"                       }
-          removeLast := storeBack(endP);
+          removeLast := storeBack(endP, orgReg, newReg);
           sequenceEnd :=
           sequenceEnd :=
             { no support for (i)div, mul and imul with hardcoded operands }
             { no support for (i)div, mul and imul with hardcoded operands }
             (noHardCodedRegs(paicpu(endP),orgReg,newReg) and
             (noHardCodedRegs(paicpu(endP),orgReg,newReg) and
@@ -1144,14 +1144,14 @@ begin
           if {not(PPaiProp(hp^.optInfo)^.canBeRemoved) and }
           if {not(PPaiProp(hp^.optInfo)^.canBeRemoved) and }
              (hp^.typ = ait_instruction) then
              (hp^.typ = ait_instruction) then
             stateChanged :=
             stateChanged :=
-              doReplaceReg(orgReg,newReg,paicpu(hp)) or stateChanged;
+              doReplaceReg(paicpu(hp),newReg,orgReg) or stateChanged;
             if stateChanged then
             if stateChanged then
               updateStates(orgReg,newReg,hp,true);
               updateStates(orgReg,newReg,hp,true);
           getNextInstruction(hp,hp)
           getNextInstruction(hp,hp)
         end;
         end;
       if assigned(endp) and (endp^.typ = ait_instruction) then
       if assigned(endp) and (endp^.typ = ait_instruction) then
         readStateChanged :=
         readStateChanged :=
-          DoReplaceReadReg(orgReg,newReg,paicpu(endP));
+          DoReplaceReadReg(paicpu(endP),newReg,orgReg);
       if stateChanged or readStateChanged then
       if stateChanged or readStateChanged then
         updateStates(orgReg,newReg,endP,stateChanged);
         updateStates(orgReg,newReg,endP,stateChanged);
 
 
@@ -1271,11 +1271,11 @@ begin
 end;
 end;
 
 
 
 
-Procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai);
+procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai; findPrevSeqs, doSubOpts: boolean);
 {marks the instructions that can be removed by RemoveInstructs. They're not
 {marks the instructions that can be removed by RemoveInstructs. They're not
  removed immediately because sometimes an instruction needs to be checked in
  removed immediately because sometimes an instruction needs to be checked in
  two different sequences}
  two different sequences}
-var cnt, cnt2, cnt3: longint;
+var cnt, cnt2, orgNrOfMods: longint;
     p, hp1, hp2, prevSeq, prevSeq_next: Pai;
     p, hp1, hp2, prevSeq, prevSeq_next: Pai;
     hp3, hp4: pai;
     hp3, hp4: pai;
     hp5 : pai;
     hp5 : pai;
@@ -1284,7 +1284,6 @@ var cnt, cnt2, cnt3: longint;
 Begin
 Begin
   p := First;
   p := First;
   SkipHead(p);
   SkipHead(p);
-  First := p;
   While (p <> Last) Do
   While (p <> Last) Do
     Begin
     Begin
       Case p^.typ Of
       Case p^.typ Of
@@ -1302,11 +1301,19 @@ Begin
                        PPaiProp(Pai(p)^.OptInfo)^.CanBeRemoved := True;
                        PPaiProp(Pai(p)^.OptInfo)^.CanBeRemoved := True;
               A_MOV, A_MOVZX, A_MOVSX:
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
                 Begin
+                  hp2 := p;
                   Case Paicpu(p)^.oper[0].typ Of
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
+                    top_ref, top_reg:
+                     if (paicpu(p)^.oper[1].typ = top_reg) then
+                       Begin
                         With PPaiProp(p^.OptInfo)^.Regs[Reg32(Paicpu(p)^.oper[1].reg)] Do
                         With PPaiProp(p^.OptInfo)^.Regs[Reg32(Paicpu(p)^.oper[1].reg)] Do
                           Begin
                           Begin
+                            if assigned(startmod) and
+                               (startmod = p)then
+                              orgNrOfMods := ppaiprop(startmod^.optinfo)^.
+                                regs[reg32(paicpu(p)^.oper[1].reg)].nrOfMods
+                            else
+                              orgNrOfMods := 0;
                             If (p = StartMod) And
                             If (p = StartMod) And
                                GetLastInstruction (p, hp1) And
                                GetLastInstruction (p, hp1) And
                                (hp1^.typ <> ait_marker) Then
                                (hp1^.typ <> ait_marker) Then
@@ -1317,7 +1324,7 @@ Begin
                                  'cse checking '+att_reg2str[Reg32(Paicpu(p)^.oper[1].reg)])));
                                  'cse checking '+att_reg2str[Reg32(Paicpu(p)^.oper[1].reg)])));
                                insertLLItem(asml,p,p^.next,hp5);
                                insertLLItem(asml,p,p^.next,hp5);
 {$endif csdebug}
 {$endif csdebug}
-                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo) And
+                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo, findPrevSeqs) And
                                   (Cnt > 0) Then
                                   (Cnt > 0) Then
                                  Begin
                                  Begin
                                    hp1 := nil;
                                    hp1 := nil;
@@ -1336,19 +1343,17 @@ Begin
 {   movl 16(%ebp), %eax                                                     }
 {   movl 16(%ebp), %eax                                                     }
 {   movl 8(%edx), %edx                                                      }
 {   movl 8(%edx), %edx                                                      }
 {   movl 4(%eax), eax                                                       }
 {   movl 4(%eax), eax                                                       }
-                                   hp2 := p;
                                    Cnt2 := 1;
                                    Cnt2 := 1;
                                    While Cnt2 <= Cnt Do
                                    While Cnt2 <= Cnt Do
                                      Begin
                                      Begin
-                                       If Not(RegInInstruction(Paicpu(hp2)^.oper[1].reg, p)) then
+(*                                       If not(regInInstruction(Paicpu(hp2)^.oper[1].reg, p)) and
+                                          not(ppaiprop(p^.optinfo)^.canBeRemoved) then
                                          begin
                                          begin
-                                           if ((p^.typ = ait_instruction) And
-                                               ((paicpu(p)^.OpCode = A_MOV)  or
-                                                (paicpu(p)^.opcode = A_MOVZX) or
-                                                (paicpu(p)^.opcode = A_MOVSX)) And
-                                               (paicpu(p)^.Oper[0].typ in
-                                                 [top_const,top_ref,top_symbol])) and
-                                               (paicpu(p)^.oper[1].typ = top_reg) then
+                                           if (p^.typ = ait_instruction) And
+                                              ((paicpu(p)^.OpCode = A_MOV)  or
+                                               (paicpu(p)^.opcode = A_MOVZX) or
+                                               (paicpu(p)^.opcode = A_MOVSX)) And
+                                              (paicpu(p)^.oper[1].typ = top_reg) then
                                              begin
                                              begin
                                                regCounter := reg32(paicpu(p)^.oper[1].reg);
                                                regCounter := reg32(paicpu(p)^.oper[1].reg);
                                                if (regCounter in reginfo.regsStillUsedAfterSeq) then
                                                if (regCounter in reginfo.regsStillUsedAfterSeq) then
@@ -1369,9 +1374,9 @@ Begin
                                                  end
                                                  end
 {$endif noremove}
 {$endif noremove}
                                              end
                                              end
-                                         end
+                                         end *)
 {$ifndef noremove}
 {$ifndef noremove}
-                                       else
+{                                       else }
                                          PPaiProp(p^.OptInfo)^.CanBeRemoved := True
                                          PPaiProp(p^.OptInfo)^.CanBeRemoved := True
 {$endif noremove}
 {$endif noremove}
                                        ; Inc(Cnt2);
                                        ; Inc(Cnt2);
@@ -1397,15 +1402,18 @@ Begin
                                         (RegInfo.New2OldReg[RegCounter] <> R_NO) Then
                                         (RegInfo.New2OldReg[RegCounter] <> R_NO) Then
                                        Begin
                                        Begin
                                          AllocRegBetween(AsmL,RegInfo.New2OldReg[RegCounter],
                                          AllocRegBetween(AsmL,RegInfo.New2OldReg[RegCounter],
-                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,prevSeq_next);
+                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,hp2);
                                          if hp4 <> prevSeq then
                                          if hp4 <> prevSeq then
                                            begin
                                            begin
                                              if assigned(reginfo.lastReload[regCounter]) then
                                              if assigned(reginfo.lastReload[regCounter]) then
                                                getLastInstruction(reginfo.lastReload[regCounter],hp3)
                                                getLastInstruction(reginfo.lastReload[regCounter],hp3)
+                                             else if assigned(reginfo.lastReload[regInfo.New2OldReg[regCounter]]) then
+                                               getLastInstruction(reginfo.lastReload[regInfo.new2OldReg[regCounter]],hp3)
                                              else hp3 := hp4;
                                              else hp3 := hp4;
-                                             if prevSeq <> hp3 then
+                                             if prevSeq_next <> hp3 then
                                                clearRegContentsFrom(regCounter,prevSeq_next,
                                                clearRegContentsFrom(regCounter,prevSeq_next,
                                                  hp3);
                                                  hp3);
+                                             getnextInstruction(hp3,hp3);
                                              allocRegBetween(asmL,regCounter,prevSeq,hp3);
                                              allocRegBetween(asmL,regCounter,prevSeq,hp3);
                                            end;
                                            end;
                                          If Not(RegCounter In RegInfo.RegsLoadedForRef) And
                                          If Not(RegCounter In RegInfo.RegsLoadedForRef) And
@@ -1421,20 +1429,23 @@ Begin
                                                begin
                                                begin
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoEnd));
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoEnd));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
-                                                 hp3 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
+                                                 hp5 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
                                                                          {old reg          new reg}
                                                                          {old reg          new reg}
                                                        RegInfo.New2OldReg[RegCounter], RegCounter));
                                                        RegInfo.New2OldReg[RegCounter], RegCounter));
-                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
+                                                 new(ppaiprop(hp5^.optinfo));
+                                                 ppaiprop(hp5^.optinfo)^ := ppaiprop(prevSeq_next^.optinfo)^;
+                                                 ppaiprop(hp5^.optinfo)^.canBeRemoved := false;
+                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp5);
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoStart));
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoStart));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
                                                  { adjusts states in previous instruction so that it will  }
                                                  { adjusts states in previous instruction so that it will  }
                                                  { definitely be different from the previous or next state }
                                                  { definitely be different from the previous or next state }
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[RegInfo.New2OldReg[RegCounter]].rstate,20);
                                                    regs[RegInfo.New2OldReg[RegCounter]].rstate,20);
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[regCounter].wstate,20);
                                                    regs[regCounter].wstate,20);
                                                  updateState(RegInfo.New2OldReg[RegCounter],
                                                  updateState(RegInfo.New2OldReg[RegCounter],
-                                                   prevSeq_next);
+                                                   hp5);
                                                end
                                                end
                                            End
                                            End
                                          Else
                                          Else
@@ -1479,7 +1490,7 @@ Begin
                                     (PPaiProp(p^.OptInfo)^.CanBeRemoved) Then
                                     (PPaiProp(p^.OptInfo)^.CanBeRemoved) Then
                                    if (cnt > 0) then
                                    if (cnt > 0) then
                                      begin
                                      begin
-                                       hp2 := p;
+                                       p := hp2;
                                        Cnt2 := 1;
                                        Cnt2 := 1;
                                        While Cnt2 <= Cnt Do
                                        While Cnt2 <= Cnt Do
                                          Begin
                                          Begin
@@ -1509,38 +1520,38 @@ Begin
                                      end;
                                      end;
                               End;
                               End;
                           End;
                           End;
-                        if not ppaiprop(p^.optinfo)^.canBeRemoved and
-                           not regInRef(reg32(paicpu(p)^.oper[1].reg),
-                                        paicpu(p)^.oper[0].ref^) then
-                          removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-                      End;
-                    top_Reg:
                       { try to replace the new reg with the old reg }
                       { try to replace the new reg with the old reg }
-                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) and
-                         { only remove if we're not storing something in a regvar }
-                         (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
-                         (paicpu(p)^.opcode = A_MOV) and
-                         getLastInstruction(p,hp4) then
-                        begin
-                          case paicpu(p)^.oper[1].typ of
-                            top_Reg:
-                              { we only have to start replacing from the instruction after the mov, }
-                              { but replacereg only starts with getnextinstruction(p,p)             }
-                              if ReplaceReg(asmL,paicpu(p)^.oper[0].reg,
-                                   paicpu(p)^.oper[1].reg,p,
-                                   PPaiProp(hp4^.optInfo)^.Regs[paicpu(p)^.oper[1].reg],false,hp1) then
-                                begin
-                                    PPaiProp(p^.optInfo)^.canBeRemoved := true;
-                                    allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
-                                    PPaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,
-                                    hp1);
-                                end
-                              else
-                                if reg32(paicpu(p)^.oper[0].reg) <> reg32(paicpu(p)^.oper[1].reg) then
-                                  removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-
+                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) then
+                        if (paicpu(p)^.oper[0].typ = top_reg) and
+                           (paicpu(p)^.oper[1].typ = top_reg) and
+                           { only remove if we're not storing something in a regvar }
+                           (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
+                           (paicpu(p)^.opcode = A_MOV) and
+                           getLastInstruction(p,hp4) and
+                          { we only have to start replacing from the instruction after the mov, }
+                          { but replacereg only starts with getnextinstruction(p,p)             }
+                            replaceReg(asmL,paicpu(p)^.oper[0].reg,
+                              paicpu(p)^.oper[1].reg,p,
+                              ppaiprop(hp4^.optInfo)^.regs[paicpu(p)^.oper[1].reg],false,hp1) then
+                          begin
+                            ppaiprop(p^.optInfo)^.canBeRemoved := true;
+                            allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
+                              ppaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,hp1);
                           end
                           end
-                        end;
+                        else
+                          if (paicpu(p)^.oper[1].typ = top_reg) and
+                             not regInOp(paicpu(p)^.oper[1].reg,paicpu(p)^.oper[0]) then
+                           removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
+                        { at first, only try optimizations of large blocks, because doing }
+                        { doing smaller ones may prevent bigger ones from completing in   }
+                        { in the next pass                                                }
+                        if not doSubOpts and (orgNrOfMods <> 0) then
+                          begin
+                            p := hp2;
+                            for cnt := 1 to pred(orgNrOfMods) do
+                              getNextInstruction(p,p);
+                          end;
+                      End;
                     top_symbol,Top_Const:
                     top_symbol,Top_Const:
                       Begin
                       Begin
                         Case Paicpu(p)^.oper[1].typ Of
                         Case Paicpu(p)^.oper[1].typ Of
@@ -1584,21 +1595,48 @@ Begin
     End;
     End;
 End;
 End;
 
 
-Procedure RemoveInstructs(AsmL: PAasmOutput; First, Last: Pai);
+function removeInstructs(asmL: paasmoutput; first, last: pai): boolean;
 { Removes the marked instructions and disposes the PPaiProps of the other }
 { Removes the marked instructions and disposes the PPaiProps of the other }
 { instructions                                                            }
 { instructions                                                            }
 Var p, hp1: Pai;
 Var p, hp1: Pai;
 begin
 begin
+  removeInstructs := false;
   p := First;
   p := First;
   While (p <> Last) Do
   While (p <> Last) Do
     Begin
     Begin
       If (p^.typ = ait_marker) and
       If (p^.typ = ait_marker) and
-         (pai_marker(p)^.kind in [noPropInfoStart,noPropInfoEnd]) then
+         (pai_marker(p)^.kind = noPropInfoStart) then
         begin
         begin
           hp1 := pai(p^.next);
           hp1 := pai(p^.next);
           asmL^.remove(p);
           asmL^.remove(p);
           dispose(p,done);
           dispose(p,done);
-          p := hp1
+          while not((hp1^.typ = ait_marker) and
+                    (pai_marker(p)^.kind = noPropInfoEnd)) do
+            begin
+              p := pai(hp1^.next);
+{$ifndef noinstremove}
+              { allocregbetween can insert new ait_regalloc objects }
+              { without optinfo                                     }
+              if assigned(hp1^.optinfo) then
+                if ppaiprop(hp1^.optinfo)^.canBeRemoved then
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                    asmL^.remove(hp1);
+                    dispose(hp1,done);
+                    hp1 := p;
+                  end
+                else
+{$endif noinstremove}
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                  end;
+              hp1 := p;
+            end;
+          p := pai(hp1^.next);
+          asmL^.remove(hp1);
+          dispose(hp1,done);
         end
         end
       else
       else
 {$ifndef noinstremove}
 {$ifndef noinstremove}
@@ -1609,6 +1647,7 @@ begin
             AsmL^.Remove(p);
             AsmL^.Remove(p);
             Dispose(p, Done);
             Dispose(p, Done);
             p := hp1;
             p := hp1;
+            removeInstructs := true;
           End
           End
         Else
         Else
 {$endif noinstremove}
 {$endif noinstremove}
@@ -1617,20 +1656,37 @@ begin
             p := pai(p^.next);;
             p := pai(p^.next);;
           End;
           End;
     End;
     End;
-    FreeMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4))
+    FreeMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp))
 End;
 End;
 
 
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(AsmL: PAasmOutput; First, Last: Pai; pass: longint): boolean;
 Begin
 Begin
-  DoCSE(AsmL, First, Last);
-  RemoveInstructs(AsmL, First, Last);
+  DoCSE(AsmL, First, Last, not(cs_slowoptimize in aktglobalswitches) or (pass >= 2),
+        not(cs_slowoptimize in aktglobalswitches) or (pass >= 1));
+ { register renaming }
+  if not(cs_slowoptimize in aktglobalswitches) or (pass > 0) then
+    doRenaming(asmL, first, last);
+  cse := removeInstructs(asmL, first, last);
 End;
 End;
 
 
 End.
 End.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
     * moved to i386/
 
 
   Revision 1.14  2000/09/30 13:07:23  jonas
   Revision 1.14  2000/09/30 13:07:23  jonas

+ 113 - 75
compiler/i386/daopt386.pas

@@ -183,6 +183,7 @@ Function regLoadedWithNewValue(reg: tregister; canDependOnPrevValue: boolean;
            hp: pai): boolean;
            hp: pai): boolean;
 Procedure UpdateUsedRegs(Var UsedRegs: TRegSet; p: Pai);
 Procedure UpdateUsedRegs(Var UsedRegs: TRegSet; p: Pai);
 Procedure AllocRegBetween(AsmL: PAasmOutput; Reg: TRegister; p1, p2: Pai);
 Procedure AllocRegBetween(AsmL: PAasmOutput; Reg: TRegister; p1, p2: Pai);
+function FindRegDealloc(reg: tregister; p: pai): boolean;
 
 
 Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
 Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
 Function InstructionsEquivalent(p1, p2: Pai; Var RegInfo: TRegInfo): Boolean;
 Function InstructionsEquivalent(p1, p2: Pai; Var RegInfo: TRegInfo): Boolean;
@@ -720,15 +721,14 @@ Begin
               End
               End
             Else Regsequivalent := False
             Else Regsequivalent := False
         Else
         Else
-          If Not(Reg32(NewReg) in NewRegsEncountered) and
-             ((OpAct = OpAct_Write) or
-              ((newReg = oldReg) and
-               not(newReg in usableregs + [R_EDI]))) Then
-            Begin
-              AddReg2RegInfo(OldReg, NewReg, RegInfo);
-              RegsEquivalent := True
-            End
-          Else RegsEquivalent := False
+           If Not(Reg32(NewReg) in NewRegsEncountered) and
+              ((OpAct = OpAct_Write) or
+               (newReg = oldReg)) Then
+             Begin
+               AddReg2RegInfo(OldReg, NewReg, RegInfo);
+               RegsEquivalent := True
+             End
+           Else RegsEquivalent := False 
     Else RegsEquivalent := False
     Else RegsEquivalent := False
   Else RegsEquivalent := OldReg = NewReg
   Else RegsEquivalent := OldReg = NewReg
 End;
 End;
@@ -973,7 +973,7 @@ Begin
            ((current^.typ = ait_label) and
            ((current^.typ = ait_label) and
             labelCanBeSkipped(pai_label(current)))) do
             labelCanBeSkipped(pai_label(current)))) do
       Current := Pai(Current^.Next);
       Current := Pai(Current^.Next);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoStart) Then
        (Pai_Marker(Current)^.Kind = NoPropInfoStart) Then
       Begin
       Begin
@@ -981,10 +981,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoEnd)) Do
                (Pai_Marker(Current)^.Kind <> NoPropInfoEnd)) Do
           Current := Pai(Current^.Next);
           Current := Pai(Current^.Next);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoEnd);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   Next := Current;
   Next := Current;
   If Assigned(Current) And
   If Assigned(Current) And
      Not((Current^.typ In SkipInstr) or
      Not((Current^.typ In SkipInstr) or
@@ -1009,12 +1009,12 @@ Begin
     Current := Pai(Current^.previous);
     Current := Pai(Current^.previous);
     While Assigned(Current) And
     While Assigned(Current) And
           (((Current^.typ = ait_Marker) And
           (((Current^.typ = ait_Marker) And
-            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd,NoPropInfoEnd])) or
+            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd{,NoPropInfoEnd}])) or
            (Current^.typ In SkipInstr) or
            (Current^.typ In SkipInstr) or
            ((Current^.typ = ait_label) And
            ((Current^.typ = ait_label) And
             labelCanBeSkipped(pai_label(current)))) Do
             labelCanBeSkipped(pai_label(current)))) Do
       Current := Pai(Current^.previous);
       Current := Pai(Current^.previous);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoEnd) Then
        (Pai_Marker(Current)^.Kind = NoPropInfoEnd) Then
       Begin
       Begin
@@ -1022,10 +1022,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoStart)) Do
                (Pai_Marker(Current)^.Kind <> NoPropInfoStart)) Do
           Current := Pai(Current^.previous);
           Current := Pai(Current^.previous);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoStart);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   If Not(Assigned(Current)) or
   If Not(Assigned(Current)) or
      (Current^.typ In SkipInstr) or
      (Current^.typ In SkipInstr) or
      ((Current^.typ = ait_label) And
      ((Current^.typ = ait_label) And
@@ -1177,6 +1177,41 @@ Begin
    end;
    end;
 End;
 End;
 
 
+function FindRegDealloc(reg: tregister; p: pai): boolean;
+{ assumes reg is a 32bit register }
+var
+  hp: pai;
+  first: boolean;
+begin
+  findregdealloc := false;
+  first := true;
+  while assigned(p^.previous) and
+        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
+         ((Pai(p^.previous)^.typ = ait_label) and
+          labelCanBeSkipped(pai_label(p^.previous)))) do
+    begin
+      p := pai(p^.previous);
+      if (p^.typ = ait_regalloc) and
+         (pairegalloc(p)^.reg = reg) then
+        if not(pairegalloc(p)^.allocation) then
+          if first then
+            begin
+              findregdealloc := true;
+              break;
+            end
+          else
+            begin
+              findRegDealloc :=
+                getNextInstruction(p,hp) and
+                 regLoadedWithNewValue(reg,false,hp);
+              break
+            end
+        else
+          first := false;
+    end
+end;
+
+
 
 
 Procedure IncState(Var S: Byte; amount: longint);
 Procedure IncState(Var S: Byte; amount: longint);
 {Increases S by 1, wraps around at $ffff to 0 (so we won't get overflow
 {Increases S by 1, wraps around at $ffff to 0 (so we won't get overflow
@@ -1233,7 +1268,7 @@ Begin
   sequenceDependsonReg := TmpResult
   sequenceDependsonReg := TmpResult
 End;
 End;
 
 
-procedure invalidateDepedingRegs(p1: ppaiProp; reg: tregister);
+procedure invalidateDependingRegs(p1: ppaiProp; reg: tregister);
 var
 var
   counter: tregister;
   counter: tregister;
 begin
 begin
@@ -1277,7 +1312,7 @@ Begin
             { con_invalid and con_noRemoveRef = con_unknown }
             { con_invalid and con_noRemoveRef = con_unknown }
             else typ := con_unknown;
             else typ := con_unknown;
         end;
         end;
-      invalidateDepedingRegs(p1,reg);
+      invalidateDependingRegs(p1,reg);
     end;
     end;
 End;
 End;
 
 
@@ -1644,7 +1679,7 @@ function writeToRegDestroysContents(destReg: tregister; reg: tregister;
 { modified                                                           }
 { modified                                                           }
 begin
 begin
   writeToRegDestroysContents :=
   writeToRegDestroysContents :=
-    (c.typ <> con_unknown) and
+    (c.typ in [con_ref,con_noRemoveRef,con_invalid]) and
     sequenceDependsOnReg(c,reg,reg32(destReg));
     sequenceDependsOnReg(c,reg,reg32(destReg));
 end;
 end;
 
 
@@ -2033,73 +2068,63 @@ Begin
               A_MOV, A_MOVZX, A_MOVSX:
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
                 Begin
                   Case Paicpu(p)^.oper[0].typ Of
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Reg:
-                      Case Paicpu(p)^.oper[1].typ Of
-                        Top_Reg:
+                    top_ref, top_reg:
+                      case paicpu(p)^.oper[1].typ Of
+                        top_reg:
                           Begin
                           Begin
 {$ifdef statedebug}
 {$ifdef statedebug}
                             hp := new(pai_asm_comment,init(strpnew('destroying '+
                             hp := new(pai_asm_comment,init(strpnew('destroying '+
                               att_reg2str[Paicpu(p)^.oper[1].reg])));
                               att_reg2str[Paicpu(p)^.oper[1].reg])));
                             insertllitem(asml,p,p^.next,hp);
                             insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
 {$endif statedebug}
-                            DestroyReg(CurProp, Paicpu(p)^.oper[1].reg, true);
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-{                            CurProp^.Regs[Paicpu(p)^.oper[1].reg] :=
-                              CurProp^.Regs[Paicpu(p)^.oper[0].reg];
-                            If (CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg = R_NO) Then
-                              CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg :=
-                                Paicpu(p)^.oper[0].reg;}
-                          End;
-                        Top_Ref:
-                          Begin
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
-                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
-                          End;
-                      End;
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
-                        ReadRef(CurProp, Paicpu(p)^.oper[0].ref);
-                        TmpReg := Reg32(Paicpu(p)^.oper[1].reg);
-                        If RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^) And
-                           (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef])
-                          Then
-                            Begin
-                              With CurProp^.Regs[TmpReg] Do
-                                Begin
-                                  incState(wstate,1);
- {also store how many instructions are part of the sequence in the first
-  instructions PPaiProp, so it can be easily accessed from within
-  CheckSequence}
-                                  Inc(NrOfMods, NrOfInstrSinceLastMod[TmpReg]);
-                                  PPaiProp(Pai(StartMod)^.OptInfo)^.Regs[TmpReg].NrOfMods := NrOfMods;
-                                  NrOfInstrSinceLastMod[TmpReg] := 0;
-                                  { Destroy the contents of the registers  }
-                                  { that depended on the previous value of }
-                                  { this register                          }
-                                  invalidateDepedingRegs(curProp,tmpReg);
-                                End;
-                            End
-                          Else
-                            Begin
+
+                            readOp(curprop, paicpu(p)^.oper[0]);
+                            tmpreg := reg32(paicpu(p)^.oper[1].reg);
+                            if regInOp(tmpreg, paicpu(p)^.oper[0]) and
+                               (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef]) then
+                              begin
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    incState(wstate,1);
+ { also store how many instructions are part of the sequence in the first }
+ { instruction's PPaiProp, so it can be easily accessed from within       }
+ { CheckSequence                                                          }
+                                    inc(nrOfMods, nrOfInstrSinceLastMod[tmpreg]);
+                                    ppaiprop(startmod^.optinfo)^.regs[tmpreg].nrOfMods := nrOfMods;
+                                    nrOfInstrSinceLastMod[tmpreg] := 0;
+                                   { Destroy the contents of the registers  }
+                                   { that depended on the previous value of }
+                                   { this register                          }
+                                    invalidateDependingRegs(curprop,tmpreg);
+                                end;
+                            end
+                          else
+                            begin
 {$ifdef statedebug}
 {$ifdef statedebug}
                               hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
                               hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
                               insertllitem(asml,p,p^.next,hp);
                               insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
 {$endif statedebug}
-                              DestroyReg(CurProp, TmpReg, true);
-                              If Not(RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^)) Then
-                                With CurProp^.Regs[TmpReg] Do
-                                  Begin
-                                    Typ := Con_Ref;
-                                    StartMod := p;
-                                    NrOfMods := 1;
-                                  End
-                            End;
+                              destroyReg(curprop, tmpreg, true);
+                              if not(reginop(tmpreg, paicpu(p)^.oper[0])) then
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    typ := con_ref;
+                                    startmod := p;
+                                    nrOfMods := 1;
+                                  end
+                            end;
 {$ifdef StateDebug}
 {$ifdef StateDebug}
                   hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
                   hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
                   InsertLLItem(AsmL, p, p^.next, hp);
                   InsertLLItem(AsmL, p, p^.next, hp);
 {$endif StateDebug}
 {$endif StateDebug}
-
+                          End;
+                        Top_Ref:
+                          { can only be if oper[0] = top_reg }
+                          Begin
+                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
+                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
+                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
+                          End;
                       End;
                       End;
                     top_symbol,Top_Const:
                     top_symbol,Top_Const:
                       Begin
                       Begin
@@ -2317,12 +2342,12 @@ Begin
       GetNextInstruction(p, p);
       GetNextInstruction(p, p);
     End;
     End;
 {Uncomment the next line to see how much memory the reloading optimizer needs}
 {Uncomment the next line to see how much memory the reloading optimizer needs}
-{  Writeln((NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4)));}
+{  Writeln(NrOfPaiObjs*SizeOf(TPaiProp));}
 {no need to check mem/maxavail, we've got as much virtual memory as we want}
 {no need to check mem/maxavail, we've got as much virtual memory as we want}
   If NrOfPaiObjs <> 0 Then
   If NrOfPaiObjs <> 0 Then
     Begin
     Begin
       InitDFAPass2 := True;
       InitDFAPass2 := True;
-      GetMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4));
+      GetMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp));
       p := BlockStart;
       p := BlockStart;
       SkipHead(p);
       SkipHead(p);
       For Count := 1 To NrOfPaiObjs Do
       For Count := 1 To NrOfPaiObjs Do
@@ -2362,7 +2387,20 @@ End.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.2  2000-10-19 15:59:40  jonas
+  Revision 1.3  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.2  2000/10/19 15:59:40  jonas
     * fixed bug in allocregbetween (the register wasn't added to the
     * fixed bug in allocregbetween (the register wasn't added to the
       usedregs set of the last instruction of the chain) ("merged")
       usedregs set of the last instruction of the chain) ("merged")
 
 

+ 456 - 390
compiler/i386/popt386.pas

@@ -28,14 +28,16 @@ Interface
 
 
 Uses Aasm;
 Uses Aasm;
 
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
 
 Implementation
 Implementation
 
 
 Uses
 Uses
   globtype,systems,
   globtype,systems,
-  globals,verbose,hcodegen,
+  globals,hcodegen,
 {$ifdef finaldestdebug}
 {$ifdef finaldestdebug}
   cobjects,
   cobjects,
 {$endif finaldestdebug}
 {$endif finaldestdebug}
@@ -97,6 +99,308 @@ begin
     end;
     end;
 end;
 end;
 
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1: pai;
+  l: longint;
+  tmpRef: treference;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_IMUL:
+                {changes certain "imul const, %reg"'s to lea sequences}
+                Begin
+                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
+                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
+                     (Paicpu(p)^.opsize = S_L) Then
+                    If (Paicpu(p)^.oper[0].val = 1) Then
+                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                       {remove "imul $1, reg"}
+                        Begin
+                          hp1 := Pai(p^.Next);
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          Continue;
+                        End
+                      Else
+                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
+                        Begin
+                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
+                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
+                          Dispose(p, Done);
+                          p := hp1;
+                        End
+                    Else If
+                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
+                      (Paicpu(p)^.oper[2].typ = Top_None)) And
+                     (aktoptprocessor < ClassP6) And
+                     (Paicpu(p)^.oper[0].val <= 12) And
+                     Not(CS_LittleSize in aktglobalswitches) And
+                     (Not(GetNextInstruction(p, hp1)) Or
+                       {GetNextInstruction(p, hp1) And}
+                       Not((Pai(hp1)^.typ = ait_instruction) And
+                           ((paicpu(hp1)^.opcode=A_Jcc) and
+                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
+                    Then
+                      Begin
+                        Reset_reference(tmpref);
+                        Case Paicpu(p)^.oper[0].val Of
+                          3: Begin
+                             {imul 3, reg1, reg2 to
+                                lea (reg1,reg1,2), reg2
+                              imul 3, reg1 to
+                                lea (reg1,reg1,2), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 2;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                            End;
+                         5: Begin
+                            {imul 5, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                             imul 5, reg1 to
+                               lea (reg1,reg1,4), reg1}
+                              TmpRef.base := Paicpu(p)^.oper[1].reg;
+                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                              TmpRef.ScaleFactor := 4;
+                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                              Else
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                              Dispose(p, Done);
+                              p := hp1;
+                            End;
+                         6: Begin
+                            {imul 6, reg1, reg2 to
+                               lea (,reg1,2), reg2
+                               lea (reg2,reg1,4), reg2
+                             imul 6, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               add reg1, reg1}
+                              If (aktoptprocessor <= Class386)
+                                Then
+                                  Begin
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                          TmpRef.ScaleFactor := 4;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p, p^.next, hp1);
+                                    Reset_reference(tmpref);
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    TmpRef.ScaleFactor := 2;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := R_NO;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
+                                            Paicpu(p)^.oper[2].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                    Dispose(p, Done);
+                                    p := Pai(hp1^.next);
+                                  End
+                            End;
+                          9: Begin
+                             {imul 9, reg1, reg2 to
+                                lea (reg1,reg1,8), reg2
+                              imul 9, reg1 to
+                                lea (reg1,reg1,8), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 8;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                             End;
+                         10: Begin
+                            {imul 10, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                               add reg2, reg2
+                             imul 10, reg1 to
+                               lea (reg1,reg1,4), reg1
+                               add reg1, reg1}
+                               If (aktoptprocessor <= Class386) Then
+                                 Begin
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
+                                   Else
+                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p, p^.next, hp1);
+                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.ScaleFactor := 4;
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                     Then
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
+                                     Else
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                   Dispose(p, Done);
+                                   p := Pai(hp1^.next);
+                                 End
+                             End;
+                         12: Begin
+                            {imul 12, reg1, reg2 to
+                               lea (,reg1,4), reg2
+                               lea (,reg1,8) reg2
+                             imul 12, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               lea (,reg1,4), reg1}
+                               If (aktoptprocessor <= Class386)
+                                 Then
+                                   Begin
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                         TmpRef.ScaleFactor := 8;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p, p^.next, hp1);
+                                     Reset_reference(tmpref);
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                         TmpRef.ScaleFactor := 2;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                     Dispose(p, Done);
+                                     p := Pai(hp1^.next);
+                                   End
+                             End
+                        End;
+                      End;
+                End;
+              A_SAR, A_SHR:
+                  {changes the code sequence
+                   shr/sar const1, x
+                   shl     const2, x
+                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
+                Begin
+                  If GetNextInstruction(p, hp1) And
+                     (pai(hp1)^.typ = ait_instruction) and
+                     (Paicpu(hp1)^.opcode = A_SHL) and
+                     (Paicpu(p)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
+                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
+                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
+                    Then
+                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
+                         Not(CS_LittleSize In aktglobalswitches)
+                        Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 > const2 }
+                          Begin
+                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
+                            Paicpu(hp1)^.opcode := A_AND;
+                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
+                            Case Paicpu(p)^.opsize Of
+                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
+                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
+                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
+                            End;
+                          End
+                        Else
+                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
+                             Not(CS_LittleSize In aktglobalswitches)
+                            Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 < const2 }
+                              Begin
+                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
+                                Paicpu(p)^.opcode := A_AND;
+                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                Case Paicpu(p)^.opsize Of
+                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                End;
+                              End
+                            Else
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 = const2 }
+                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
+                                Begin
+                                  Paicpu(p)^.opcode := A_AND;
+                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                  Case Paicpu(p)^.opsize Of
+                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  End;
+                                  AsmL^.remove(hp1);
+                                  dispose(hp1, done);
+                                End;
+                End;
+              A_XOR:
+                If (Paicpu(p)^.oper[0].typ = top_reg) And
+                   (Paicpu(p)^.oper[1].typ = top_reg) And
+                   (Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
+                 { temporarily change this to 'mov reg,0' to make it easier }
+                 { for the CSE. Will be changed back in pass 2              }
+                  begin
+                    paicpu(p)^.opcode := A_MOV;
+                    paicpu(p)^.loadconst(0,0);
+                  end;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
 
 
 Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
 {First pass of peepholeoptimizations}
 {First pass of peepholeoptimizations}
@@ -279,9 +583,7 @@ Begin
                If (paicpu(p)^.opcode = A_JMP) Then
                If (paicpu(p)^.opcode = A_JMP) Then
                  Begin
                  Begin
                    While GetNextInstruction(p, hp1) and
                    While GetNextInstruction(p, hp1) and
-                         ((hp1^.typ <> ait_label) or
-                   { skip unused labels, they're not referenced anywhere }
-                          labelCanBeSkipped(pai_label(hp1))) Do
+                         (hp1^.typ <> ait_label) do
                      If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
                      If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
                        Begin
                        Begin
                          AsmL^.Remove(hp1);
                          AsmL^.Remove(hp1);
@@ -289,6 +591,7 @@ Begin
                        End
                        End
                      else break;
                      else break;
                   End;
                   End;
+               { remove jumps to a label coming right after them }
                If GetNextInstruction(p, hp1) then
                If GetNextInstruction(p, hp1) then
                  Begin
                  Begin
                    if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
                    if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
@@ -481,259 +784,53 @@ Begin
                       { change                      to
                       { change                      to
                           fld/fst   mem1  (hp1)       fld/fst   mem1
                           fld/fst   mem1  (hp1)       fld/fst   mem1
                           fld       mem1  (p)         fadd/
                           fld       mem1  (p)         fadd/
-                          faddp/                       fmul     st, st
-                           fmulp  st, st1 (hp2) }
-                        Begin
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
-                            Paicpu(hp2)^.opcode := A_FADD
-                          Else
-                            Paicpu(hp2)^.opcode := A_FMUL;
-                          Paicpu(hp2)^.oper[1].reg := R_ST;
-                        End
-                      Else
-                      { change              to
-                          fld/fst mem1 (hp1)   fld/fst mem1
-                          fld     mem1 (p)     fld      st}
-                        Begin
-                          Paicpu(p)^.changeopsize(S_FL);
-                          Paicpu(p)^.loadreg(0,R_ST);
-                        End
-                    Else
-                      Begin
-                        Case Paicpu(hp2)^.opcode Of
-                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
-                     { change                        to
-                         fld/fst  mem1    (hp1)      fld/fst    mem1
-                         fld      mem2    (p)        fxxx       mem2
-                         fxxxp    st, st1 (hp2)                      }
-
-                            Begin
-                              Case Paicpu(hp2)^.opcode Of
-                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
-                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
-                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
-                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
-                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
-                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
-                              End;
-                              AsmL^.Remove(hp2);
-                              Dispose(hp2, Done)
-                            End
-                        End
-                      End
-                End;
-              A_FSTP,A_FISTP:
-                if doFpuLoadStoreOpt(asmL,p) then
-                  continue;
-              A_IMUL:
-                {changes certain "imul const, %reg"'s to lea sequences}
-                Begin
-                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
-                     (Paicpu(p)^.opsize = S_L) Then
-                    If (Paicpu(p)^.oper[0].val = 1) Then
-                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                       {remove "imul $1, reg"}
-                        Begin
-                          hp1 := Pai(p^.Next);
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          Continue;
-                        End
-                      Else
-                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
-                        Begin
-                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
-                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
-                          Dispose(p, Done);
-                          p := hp1;
-                        End
-                    Else If
-                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
-                      (Paicpu(p)^.oper[2].typ = Top_None)) And
-                     (aktoptprocessor < ClassP6) And
-                     (Paicpu(p)^.oper[0].val <= 12) And
-                     Not(CS_LittleSize in aktglobalswitches) And
-                     (Not(GetNextInstruction(p, hp1)) Or
-                       {GetNextInstruction(p, hp1) And}
-                       Not((Pai(hp1)^.typ = ait_instruction) And
-                           ((paicpu(hp1)^.opcode=A_Jcc) and
-                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
-                    Then
-                      Begin
-                        Reset_reference(tmpref);
-                        Case Paicpu(p)^.oper[0].val Of
-                          3: Begin
-                             {imul 3, reg1, reg2 to
-                                lea (reg1,reg1,2), reg2
-                              imul 3, reg1 to
-                                lea (reg1,reg1,2), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 2;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                            End;
-                         5: Begin
-                            {imul 5, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                             imul 5, reg1 to
-                               lea (reg1,reg1,4), reg1}
-                              TmpRef.base := Paicpu(p)^.oper[1].reg;
-                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                              TmpRef.ScaleFactor := 4;
-                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                              Else
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                              Dispose(p, Done);
-                              p := hp1;
-                            End;
-                         6: Begin
-                            {imul 6, reg1, reg2 to
-                               lea (,reg1,2), reg2
-                               lea (reg2,reg1,4), reg2
-                             imul 6, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               add reg1, reg1}
-                              If (aktoptprocessor <= Class386)
-                                Then
-                                  Begin
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                          TmpRef.ScaleFactor := 4;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p, p^.next, hp1);
-                                    Reset_reference(tmpref);
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    TmpRef.ScaleFactor := 2;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := R_NO;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
-                                            Paicpu(p)^.oper[2].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                    Dispose(p, Done);
-                                    p := Pai(hp1^.next);
-                                  End
-                            End;
-                          9: Begin
-                             {imul 9, reg1, reg2 to
-                                lea (reg1,reg1,8), reg2
-                              imul 9, reg1 to
-                                lea (reg1,reg1,8), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 8;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                             End;
-                         10: Begin
-                            {imul 10, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                               add reg2, reg2
-                             imul 10, reg1 to
-                               lea (reg1,reg1,4), reg1
-                               add reg1, reg1}
-                               If (aktoptprocessor <= Class386) Then
-                                 Begin
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
-                                   Else
-                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p, p^.next, hp1);
-                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.ScaleFactor := 4;
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                     Then
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
-                                     Else
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                   Dispose(p, Done);
-                                   p := Pai(hp1^.next);
-                                 End
-                             End;
-                         12: Begin
-                            {imul 12, reg1, reg2 to
-                               lea (,reg1,4), reg2
-                               lea (,reg1,8) reg2
-                             imul 12, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               lea (,reg1,4), reg1}
-                               If (aktoptprocessor <= Class386)
-                                 Then
-                                   Begin
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                         TmpRef.ScaleFactor := 8;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p, p^.next, hp1);
-                                     Reset_reference(tmpref);
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                         TmpRef.ScaleFactor := 2;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                     Dispose(p, Done);
-                                     p := Pai(hp1^.next);
-                                   End
-                             End
-                        End;
-                      End;
+                          faddp/                       fmul     st, st
+                           fmulp  st, st1 (hp2) }
+                        Begin
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
+                            Paicpu(hp2)^.opcode := A_FADD
+                          Else
+                            Paicpu(hp2)^.opcode := A_FMUL;
+                          Paicpu(hp2)^.oper[1].reg := R_ST;
+                        End
+                      Else
+                      { change              to
+                          fld/fst mem1 (hp1)   fld/fst mem1
+                          fld     mem1 (p)     fld      st}
+                        Begin
+                          Paicpu(p)^.changeopsize(S_FL);
+                          Paicpu(p)^.loadreg(0,R_ST);
+                        End
+                    Else
+                      Begin
+                        Case Paicpu(hp2)^.opcode Of
+                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
+                     { change                        to
+                         fld/fst  mem1    (hp1)      fld/fst    mem1
+                         fld      mem2    (p)        fxxx       mem2
+                         fxxxp    st, st1 (hp2)                      }
+
+                            Begin
+                              Case Paicpu(hp2)^.opcode Of
+                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
+                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
+                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
+                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
+                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
+                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
+                              End;
+                              AsmL^.Remove(hp2);
+                              Dispose(hp2, Done)
+                            End
+                        End
+                      End
                 End;
                 End;
+              A_FSTP,A_FISTP:
+                if doFpuLoadStoreOpt(asmL,p) then
+                  continue;
               A_LEA:
               A_LEA:
                 Begin
                 Begin
                 {removes seg register prefixes from LEA operations, as they
                 {removes seg register prefixes from LEA operations, as they
@@ -784,7 +881,6 @@ Begin
                                   end;
                                   end;
                               end;
                               end;
                             end;
                             end;
-
                 End;
                 End;
               A_MOV:
               A_MOV:
                 Begin
                 Begin
@@ -1420,71 +1516,6 @@ Begin
                                    p := hp1;
                                    p := hp1;
                                  End
                                  End
                 End;
                 End;
-              A_SAR, A_SHR:
-                  {changes the code sequence
-                   shr/sar const1, x
-                   shl     const2, x
-                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
-                Begin
-                  If GetNextInstruction(p, hp1) And
-                     (pai(hp1)^.typ = ait_instruction) and
-                     (Paicpu(hp1)^.opcode = A_SHL) and
-                     (Paicpu(p)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
-                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
-                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
-                    Then
-                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
-                         Not(CS_LittleSize In aktglobalswitches)
-                        Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 > const2 }
-                          Begin
-                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
-                            Paicpu(hp1)^.opcode := A_AND;
-                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
-                            Case Paicpu(p)^.opsize Of
-                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
-                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
-                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
-                            End;
-                          End
-                        Else
-                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
-                             Not(CS_LittleSize In aktglobalswitches)
-                            Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 < const2 }
-                              Begin
-                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
-                                Paicpu(p)^.opcode := A_AND;
-                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                Case Paicpu(p)^.opsize Of
-                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                End;
-                              End
-                            Else
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 = const2 }
-                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
-                                Begin
-                                  Paicpu(p)^.opcode := A_AND;
-                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                  Case Paicpu(p)^.opsize Of
-                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  End;
-                                  AsmL^.remove(hp1);
-                                  dispose(hp1, done);
-                                End;
-                End;
               A_SETcc :
               A_SETcc :
                 { changes
                 { changes
                     setcc (funcres)             setcc reg
                     setcc (funcres)             setcc reg
@@ -1604,6 +1635,7 @@ end;
 
 
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
 
+{$ifdef USECMOV}
   function CanBeCMOV(p : pai) : boolean;
   function CanBeCMOV(p : pai) : boolean;
 
 
     begin
     begin
@@ -1613,6 +1645,7 @@ Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
          (paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
          (paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
          (paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
          (paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
     end;
     end;
+{$endif USECMOV}
 
 
 var
 var
   p,hp1,hp2: pai;
   p,hp1,hp2: pai;
@@ -1633,20 +1666,6 @@ Begin
         Ait_Instruction:
         Ait_Instruction:
           Begin
           Begin
             Case Paicpu(p)^.opcode Of
             Case Paicpu(p)^.opcode Of
-              A_CALL:
-                If (AktOptProcessor < ClassP6) And
-                   GetNextInstruction(p, hp1) And
-                   (hp1^.typ = ait_instruction) And
-                   (paicpu(hp1)^.opcode = A_JMP) Then
-                  Begin
-                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
-                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
-                    InsertLLItem(AsmL, p^.previous, p, hp2);
-                    Paicpu(p)^.opcode := A_JMP;
-                    AsmL^.Remove(hp1);
-                    Dispose(hp1, Done)
-                  End;
-
 {$ifdef USECMOV}
 {$ifdef USECMOV}
               A_Jcc:
               A_Jcc:
                 if (aktspecificoptprocessor=ClassP6) then
                 if (aktspecificoptprocessor=ClassP6) then
@@ -1833,59 +1852,6 @@ Begin
                        p := hp1
                        p := hp1
                      End;
                      End;
                    End
                    End
-                  else if (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[0].val = 0) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) Then
-                    { change "mov $0, %reg" into "xor %reg, %reg" }
-                    Begin
-                      Paicpu(p)^.opcode := A_XOR;
-                      Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
-                    End
-                End;
-              A_MOVZX:
-                Begin
-                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
-                    If (Paicpu(p)^.oper[0].typ = top_reg)
-                      Then
-                        Case Paicpu(p)^.opsize of
-                          S_BL:
-                            Begin
-                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                                 Not(CS_LittleSize in aktglobalswitches) And
-                                 (aktoptprocessor = ClassP5)
-                                Then
-                                  {Change "movzbl %reg1, %reg2" to
-                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
-                                   PentiumMMX}
-                                  Begin
-                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
-                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
-                                    InsertLLItem(AsmL,p^.previous, p, hp1);
-                                    Paicpu(p)^.opcode := A_MOV;
-                                    Paicpu(p)^.changeopsize(S_B);
-                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                                  End;
-                            End;
-                        End
-                      Else
-                        If (Paicpu(p)^.oper[0].typ = top_ref) And
-                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
-                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
-                           Not(CS_LittleSize in aktglobalswitches) And
-                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                           (aktoptprocessor = ClassP5) And
-                           (Paicpu(p)^.opsize = S_BL)
-                          Then
-                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
-                             Pentium and PentiumMMX}
-                            Begin
-                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
-                                         Paicpu(p)^.oper[1].reg));
-                              Paicpu(p)^.opcode := A_MOV;
-                              Paicpu(p)^.changeopsize(S_B);
-                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                              InsertLLItem(AsmL,p^.previous, p, hp1);
-                            End;
                 End;
                 End;
               A_TEST, A_OR:
               A_TEST, A_OR:
                 {removes the line marked with (x) from the sequence
                 {removes the line marked with (x) from the sequence
@@ -1942,11 +1908,111 @@ Begin
     End;
     End;
 End;
 End;
 
 
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1,hp2: pai;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_CALL:
+                If (AktOptProcessor < ClassP6) And
+                   GetNextInstruction(p, hp1) And
+                   (hp1^.typ = ait_instruction) And
+                   (paicpu(hp1)^.opcode = A_JMP) Then
+                  Begin
+                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
+                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
+                    InsertLLItem(AsmL, p^.previous, p, hp2);
+                    Paicpu(p)^.opcode := A_JMP;
+                    AsmL^.Remove(hp1);
+                    Dispose(hp1, Done)
+                  End;
+              A_MOV:
+                if (Paicpu(p)^.oper[0].typ = Top_Const) And
+                   (Paicpu(p)^.oper[0].val = 0) And
+                   (Paicpu(p)^.oper[1].typ = Top_Reg) Then
+                  { change "mov $0, %reg" into "xor %reg, %reg" }
+                  Begin
+                    Paicpu(p)^.opcode := A_XOR;
+                    Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
+                  End;
+              A_MOVZX:
+                Begin
+                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
+                    If (Paicpu(p)^.oper[0].typ = top_reg)
+                      Then
+                        Case Paicpu(p)^.opsize of
+                          S_BL:
+                            Begin
+                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                                 Not(CS_LittleSize in aktglobalswitches) And
+                                 (aktoptprocessor = ClassP5)
+                                Then
+                                  {Change "movzbl %reg1, %reg2" to
+                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
+                                   PentiumMMX}
+                                  Begin
+                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
+                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
+                                    InsertLLItem(AsmL,p^.previous, p, hp1);
+                                    Paicpu(p)^.opcode := A_MOV;
+                                    Paicpu(p)^.changeopsize(S_B);
+                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                                  End;
+                            End;
+                        End
+                      Else
+                        If (Paicpu(p)^.oper[0].typ = top_ref) And
+                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
+                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
+                           Not(CS_LittleSize in aktglobalswitches) And
+                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                           (aktoptprocessor = ClassP5) And
+                           (Paicpu(p)^.opsize = S_BL)
+                          Then
+                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
+                             Pentium and PentiumMMX}
+                            Begin
+                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
+                                         Paicpu(p)^.oper[1].reg));
+                              Paicpu(p)^.opcode := A_MOV;
+                              Paicpu(p)^.changeopsize(S_B);
+                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                              InsertLLItem(AsmL,p^.previous, p, hp1);
+                            End;
+                End;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
+
 End.
 End.
 
 
 {
 {
   $Log$
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
     * moved to i386/
 
 
   Revision 1.13  2000/10/02 13:01:29  jonas
   Revision 1.13  2000/10/02 13:01:29  jonas

+ 350 - 0
compiler/i386/rropt386.pas

@@ -0,0 +1,350 @@
+{
+    $Id$
+    Copyright (c) 1998-2000 by Jonas Maebe, member of the Free Pascal
+      development team
+
+    This unit contains register renaming functionality
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit rrOpt386;
+
+{$i defines.inc}
+
+Interface
+
+Uses aasm;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+
+Implementation
+
+Uses
+  {$ifdef replaceregdebug}cutils,{$endif}
+  verbose,globals,cpubase,cpuasm,daopt386,csopt386,tgeni386;
+
+function canBeFirstSwitch(p: paicpu; reg: tregister): boolean;
+{ checks whether an operation on reg can be switched to another reg without an }
+{ additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
+begin
+  canBeFirstSwitch := false;
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (reg32(p^.oper[1].reg) = reg);
+    A_IMUL:
+      canBeFirstSwitch :=
+        (p^.ops >= 2) and
+        (reg32(p^.oper[p^.ops-1].reg) = reg);
+    A_INC,A_DEC,A_SUB,A_ADD:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (p^.opsize = S_L) and
+        (reg32(p^.oper[1].reg) = reg) and
+        (p^.oper[0].typ <> top_ref) and
+        ((p^.opcode <> A_SUB) or
+         (p^.oper[0].typ = top_const));
+    A_SHL:
+      canBeFirstSwitch :=
+        (p^.opsize = S_L) and
+        (p^.oper[1].typ = top_reg) and
+        (p^.oper[1].reg = reg) and
+        (p^.oper[0].typ = top_const) and
+        (p^.oper[0].val in [1,2,3]);
+  end;
+end;
+
+procedure switchReg(var reg: tregister; reg1, reg2: tregister);
+begin
+  if reg = reg1 then
+    reg := reg2
+  else if reg = reg2 then
+    reg := reg1
+  else if reg = regtoreg8(reg1) then
+         reg := regtoreg8(reg2)
+  else if reg = regtoreg8(reg2) then
+         reg := regtoreg8(reg1)
+  else if reg = regtoreg16(reg1) then
+         reg := regtoreg16(reg2)
+  else if reg = regtoreg16(reg2) then
+         reg := regtoreg16(reg1)
+end;
+
+
+procedure switchOp(var op: toper; reg1, reg2: tregister);
+begin
+  case op.typ of
+    top_reg:
+      switchReg(op.reg,reg1,reg2);
+    top_ref:
+      begin
+        switchReg(op.ref^.base,reg1,reg2);
+        switchReg(op.ref^.index,reg1,reg2);
+      end;
+  end;
+end;
+
+procedure doSwitchReg(hp: paicpu; reg1,reg2: tregister);
+var
+  opCount: longint;
+begin
+  for opCount := 0 to hp^.ops-1 do
+    switchOp(hp^.oper[opCount],reg1,reg2);
+end;
+
+
+procedure doFirstSwitch(p: paicpu; reg1, reg2: tregister);
+var
+  tmpRef: treference;
+begin
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+       begin
+         changeOp(p^.oper[1],reg1,reg2);
+         changeOp(p^.oper[0],reg2,reg1);
+       end;
+    A_IMUL:
+      begin
+        p^.ops := 3;
+        p^.loadreg(2,p^.oper[1].reg);
+        changeOp(p^.oper[2],reg1,reg2);
+      end;
+    A_INC,A_DEC:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.opcode of
+          A_INC:
+            tmpref.offset := 1;
+          A_DEC:
+            tmpref.offset := -1;
+        end;
+        p^.ops := 2;
+        p^.opcode := A_LEA;
+        p^.loadreg(1,reg2);
+        p^.loadref(0,newreference(tmpref));
+      end;
+    A_SUB,A_ADD:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.oper[0].typ of
+          top_const:
+            begin
+              tmpref.offset := p^.oper[0].val;
+              if p^.opcode = A_SUB then
+                tmpref.offset := - tmpRef.offset;
+            end;
+          top_symbol:
+            tmpref.symbol := p^.oper[0].sym;
+          top_reg:
+            begin
+              tmpref.index := p^.oper[0].reg;
+              tmpref.scalefactor := 1;
+            end;
+          else internalerror(200010031);
+        end;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    A_SHL:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        tmpref.scalefactor := 1 shl p^.oper[0].val;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    else internalerror(200010032);
+  end;
+end;
+
+
+function switchRegs(asml: paasmoutput; reg1, reg2: tregister; start: pai): Boolean;
+{ change movl  %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
+var
+  endP, hp: pai;
+  switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
+  reg1StillUsed, reg2StillUsed, isInstruction: boolean;
+begin
+  switchRegs := false;
+  tmpResult := true;
+  sequenceEnd := false;
+  reg1Modified := false;
+  reg2Modified := false;
+  endP := start;
+  while tmpResult and not sequenceEnd do
+    begin
+      tmpResult :=
+        getNextInstruction(endP,endP);
+      If tmpResult and
+         not ppaiprop(endP^.optinfo)^.canBeRemoved then
+        begin
+          { if the newReg gets stored back to the oldReg, we can change }
+          { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
+          { %oldReg" to "<operations on %oldReg>"                       }
+          switchLast := storeBack(endP,reg1,reg2);
+          reg1StillUsed := reg1 in ppaiprop(endP^.optinfo)^.usedregs;
+          reg2StillUsed := reg2 in ppaiprop(endP^.optinfo)^.usedregs;
+          isInstruction := endP^.typ = ait_instruction;
+          sequenceEnd :=
+            switchLast or
+            { if both registers are released right before an instruction }
+            { that contains hardcoded regs, it's ok too                  }
+            (not reg1StillUsed and not reg2StillUsed) or
+            { no support for (i)div, mul and imul with hardcoded operands }
+            (((not isInstruction) or
+              noHardCodedRegs(paicpu(endP),reg1,reg2)) and
+             (not reg1StillUsed or
+              (isInstruction and findRegDealloc(reg1,endP) and
+               regLoadedWithNewValue(reg1,false,paicpu(endP)))) and
+             (not reg2StillUsed or
+              (isInstruction and findRegDealloc(reg2,endP) and
+               regLoadedWithNewValue(reg2,false,paicpu(endP)))));
+
+          { we can't switch reg1 and reg2 in something like }
+          {   movl  %reg1,%reg2                             }
+          {   movl  (%reg2),%reg2                           }
+          {   movl  4(%reg1),%reg1                          }
+          if reg2Modified and not(reg1Modified) and
+             regReadByInstruction(reg1,endP) then
+            begin
+              tmpResult := false;
+              break
+            end;
+
+          if not reg1Modified then
+            begin
+              reg1Modified := regModifiedByInstruction(reg1,endP);
+              if reg1Modified and not canBeFirstSwitch(paicpu(endP),reg1) then
+                begin
+                  tmpResult := false;
+                  break;
+                end;
+            end;
+          if not reg2Modified then
+            reg2Modified := regModifiedByInstruction(reg2,endP);
+
+          if sequenceEnd then
+            break;
+
+          tmpResult :=
+            (endP^.typ <> ait_label) and
+            ((not isInstruction) or
+             (NoHardCodedRegs(paicpu(endP),reg1,reg2) and
+               RegSizesOk(reg1,reg2,paicpu(endP))));
+        end;
+    end;
+
+  if tmpResult and sequenceEnd then
+    begin
+      switchRegs := true;
+      reg1Modified := false;
+      reg2Modified := false;
+      getNextInstruction(start,hp);
+      while hp <> endP do
+        begin
+          if (not ppaiprop(hp^.optinfo)^.canberemoved) and
+             (hp^.typ = ait_instruction) then
+            begin
+              switchDone := false;
+              if not reg1Modified then
+                begin
+                  reg1Modified := regModifiedByInstruction(reg1,hp);
+                  if reg1Modified then
+                    begin
+                      doFirstSwitch(paicpu(hp),reg1,reg2);
+                      switchDone := true;
+                    end;
+                end;
+              if not switchDone then
+                if reg1Modified then
+                  doSwitchReg(paicpu(hp),reg1,reg2)
+                else
+                  doReplaceReg(paicpu(hp),reg2,reg1);
+            end;
+          getNextInstruction(hp,hp);
+        end;
+      if switchLast then
+        doSwitchReg(paicpu(hp),reg1,reg2)
+      else getLastInstruction(hp,hp);
+      allocRegBetween(asmL,reg1,start,hp);
+      allocRegBetween(asmL,reg2,start,hp);
+    end;
+end;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+var
+  p: pai;
+begin
+  p := First;
+  SkipHead(p);
+  while p <> last do
+    begin
+      case p^.typ of
+        ait_instruction:
+          begin
+            case paicpu(p)^.opcode of
+              A_MOV:
+                begin
+                  if not(ppaiprop(p^.optinfo)^.canBeRemoved) and
+                     (paicpu(p)^.oper[0].typ = top_reg) and
+                     (paicpu(p)^.oper[1].typ = top_reg) and
+                     (paicpu(p)^.opsize = S_L) and
+                     (paicpu(p)^.oper[0].reg in (usableregs+[R_EDI])) and
+                     (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) then
+                    if switchRegs(asml,paicpu(p)^.oper[0].reg,
+                         paicpu(p)^.oper[1].reg,p) then
+                      begin
+{                        getnextinstruction(p,hp);
+                        asmL^.remove(p);
+                        dispose(p,done);
+                        p := hp;
+                        continue }
+                        ppaiprop(p^.optinfo)^.canBeRemoved := true;
+                      end;
+                end;
+            end;
+          end;
+      end;
+      getNextInstruction(p,p);
+    end;
+end;
+
+
+End.
+
+{
+  $Log$
+  Revision 1.1  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+
+}

+ 1 - 1
compiler/msgidx.inc

@@ -557,7 +557,7 @@ const
   option_info=11024;
   option_info=11024;
   option_help_pages=11025;
   option_help_pages=11025;
 
 
-  MsgTxtSize = 31210;
+  MsgTxtSize = 31225;
 
 
   MsgIdxMax : array[1..20] of longint=(
   MsgIdxMax : array[1..20] of longint=(
     17,58,165,34,41,41,86,14,35,40,
     17,58,165,34,41,41,86,14,35,40,

+ 10 - 10
compiler/msgtxt.inc

@@ -728,37 +728,37 @@ const msgtxt : array[0..000130,1..240] of char=(
   '3*2Ou_enable uncertain optimizations (see docs)'#010+
   '3*2Ou_enable uncertain optimizations (see docs)'#010+
   '3*2O1_level 1 optimizat','ions (quick optimizations)'#010+
   '3*2O1_level 1 optimizat','ions (quick optimizations)'#010+
   '3*2O2_level 2 optimizations (-O1 + slower optimizations)'#010+
   '3*2O2_level 2 optimizations (-O1 + slower optimizations)'#010+
-  '3*2O3_level 3 optimizations (same as -O2u)'#010+
+  '3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)'#010+
   '3*2Op<x>_target processor:'#010+
   '3*2Op<x>_target processor:'#010+
   '3*3Op1_set target processor to 386/486'#010+
   '3*3Op1_set target processor to 386/486'#010+
-  '3*3Op2_set target processor to Pentium/PentiumM','MX (tm)'#010+
+  '3*3Op2_set target processor to P','entium/PentiumMMX (tm)'#010+
   '3*3Op3_set target processor to PPro/PII/c6x86/K6 (tm)'#010+
   '3*3Op3_set target processor to PPro/PII/c6x86/K6 (tm)'#010+
   '3*1T<x>_Target operating system:'#010+
   '3*1T<x>_Target operating system:'#010+
   '3*2TGO32V1_version 1 of DJ Delorie DOS extender'#010+
   '3*2TGO32V1_version 1 of DJ Delorie DOS extender'#010+
   '3*2TGO32V2_version 2 of DJ Delorie DOS extender'#010+
   '3*2TGO32V2_version 2 of DJ Delorie DOS extender'#010+
   '3*2TLINUX_Linux'#010+
   '3*2TLINUX_Linux'#010+
-  '3*2Tnetware_Novell Netware Module',' (experimental)'#010+
+  '3*2Tnetware_Novell',' Netware Module (experimental)'#010+
   '3*2TOS2_OS/2 2.x'#010+
   '3*2TOS2_OS/2 2.x'#010+
   '3*2TWin32_Windows 32 Bit'#010+
   '3*2TWin32_Windows 32 Bit'#010+
   '3*1W<x>_Win32 target options'#010+
   '3*1W<x>_Win32 target options'#010+
   '3*2WB<x>_Set Image base to Hexadecimal <x> value'#010+
   '3*2WB<x>_Set Image base to Hexadecimal <x> value'#010+
   '3*2WC_Specify console type application'#010+
   '3*2WC_Specify console type application'#010+
-  '3*2WD_Use DEFFILE to export functions of DLL or EXE'#010+
-  '3*2WG_Specify',' graphic type application'#010+
+  '3*2WD_Use DEFFILE to export functions of DLL or EX','E'#010+
+  '3*2WG_Specify graphic type application'#010+
   '3*2WN_Do not generate relocation code (necessary for debugging)'#010+
   '3*2WN_Do not generate relocation code (necessary for debugging)'#010+
   '3*2WR_Generate relocation code'#010+
   '3*2WR_Generate relocation code'#010+
   '6*1A<x>_output format'#010+
   '6*1A<x>_output format'#010+
   '6*2Aas_Unix o-file using GNU AS'#010+
   '6*2Aas_Unix o-file using GNU AS'#010+
   '6*2Agas_GNU Motorola assembler'#010+
   '6*2Agas_GNU Motorola assembler'#010+
-  '6*2Amit_MIT Syntax (old GAS)'#010+
-  '6*2Am','ot_Standard Motorola assembler'#010+
+  '6*2Amit_MIT Syntax ','(old GAS)'#010+
+  '6*2Amot_Standard Motorola assembler'#010+
   '6*1O_optimizations:'#010+
   '6*1O_optimizations:'#010+
   '6*2Oa_turn on the optimizer'#010+
   '6*2Oa_turn on the optimizer'#010+
   '6*2Og_generate smaller code'#010+
   '6*2Og_generate smaller code'#010+
   '6*2OG_generate faster code (default)'#010+
   '6*2OG_generate faster code (default)'#010+
   '6*2Ox_optimize maximum (still BUGGY!!!)'#010+
   '6*2Ox_optimize maximum (still BUGGY!!!)'#010+
-  '6*2O2_set target processor to a MC68020+'#010+
-  '6*1R<x>_assembl','er reading style:'#010+
+  '6*2O2_set target processor to a MC68020+'#010,
+  '6*1R<x>_assembler reading style:'#010+
   '6*2RMOT_read motorola style assembler'#010+
   '6*2RMOT_read motorola style assembler'#010+
   '6*1T<x>_Target operating system:'#010+
   '6*1T<x>_Target operating system:'#010+
   '6*2TAMIGA_Commodore Amiga'#010+
   '6*2TAMIGA_Commodore Amiga'#010+
@@ -767,5 +767,5 @@ const msgtxt : array[0..000130,1..240] of char=(
   '6*2TLINUX_Linux-68k'#010+
   '6*2TLINUX_Linux-68k'#010+
   '**1*_'#010+
   '**1*_'#010+
   '**1?_shows this help'#010+
   '**1?_shows this help'#010+
-  '**1h_shows this help withou','t waiting'#000
+  '**1h_shows t','his help without waiting'#000
 );
 );

+ 18 - 5
compiler/opts386.pas

@@ -58,10 +58,10 @@ begin
                  'g' : initglobalswitches:=initglobalswitches+[cs_littlesize];
                  'g' : initglobalswitches:=initglobalswitches+[cs_littlesize];
                  'G' : initglobalswitches:=initglobalswitches-[cs_littlesize];
                  'G' : initglobalswitches:=initglobalswitches-[cs_littlesize];
                  'r' : initglobalswitches:=initglobalswitches+[cs_regalloc];
                  'r' : initglobalswitches:=initglobalswitches+[cs_regalloc];
-                 'u' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_uncertainopts];
-                 '1' : initglobalswitches:=initglobalswitches-[cs_slowoptimize,cs_uncertainopts]+[cs_optimize,cs_fastoptimize];
-                 '2' : initglobalswitches:=initglobalswitches-[cs_uncertainopts]+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
-                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize,cs_uncertainopts];
+                 'u' : initglobalswitches:=initglobalswitches+[cs_uncertainopts];
+                 '1' : initglobalswitches:=initglobalswitches-[cs_fastoptimize,cs_slowoptimize]+[cs_optimize];
+                 '2' : initglobalswitches:=initglobalswitches-[cs_slowoptimize]+[cs_optimize,cs_fastoptimize];
+                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
                  'p' :
                  'p' :
                    Begin
                    Begin
                      If j < Length(Opt) Then
                      If j < Length(Opt) Then
@@ -115,7 +115,20 @@ end;
 end.
 end.
 {
 {
   $Log$
   $Log$
-  Revision 1.5  2000-09-24 15:06:20  peter
+  Revision 1.6  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.5  2000/09/24 15:06:20  peter
     * use defines.inc
     * use defines.inc
 
 
   Revision 1.4  2000/08/27 16:11:51  peter
   Revision 1.4  2000/08/27 16:11:51  peter