Parcourir la source

+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs

Jonas Maebe il y a 25 ans
Parent
commit
a4fde73649

+ 1 - 1
compiler/errore.msg

@@ -1870,7 +1870,7 @@ option_help_pages=11025_[
 3*2Ou_enable uncertain optimizations (see docs)
 3*2O1_level 1 optimizations (quick optimizations)
 3*2O2_level 2 optimizations (-O1 + slower optimizations)
-3*2O3_level 3 optimizations (same as -O2u)
+3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)
 3*2Op<x>_target processor:
 3*3Op1_set target processor to 386/486
 3*3Op2_set target processor to Pentium/PentiumMMX (tm)

+ 37 - 14
compiler/i386/aopt386.pas

@@ -43,15 +43,20 @@ Uses
 
 Procedure Optimize(AsmL: PAasmOutput);
 Var
-  count, max: longint;
   BlockStart, BlockEnd, HP: Pai;
+  pass: longint;
+  slowopt, changed, lastLoop: boolean;
 Begin
-  if (cs_slowoptimize in aktglobalswitches) then
-   { Optimize twice }
-    max := 2
-  else max := 1;
-  for count := 1 to max do
-    begin
+  slowopt := (cs_slowoptimize in aktglobalswitches);
+  pass := 0;
+  changed := false;
+  repeat
+     lastLoop :=
+       not(slowopt) or
+       (not changed and (pass > 2)) or
+      { prevent endless loops }
+       (pass = 4);
+     changed := false;
    { Setup labeltable, always necessary }
      BlockStart := Pai(AsmL^.First);
      BlockEnd := DFAPass1(AsmL, BlockStart);
@@ -59,13 +64,15 @@ Begin
    { or nil                                                                }
      While Assigned(BlockStart) Do
        Begin
+         if pass = 0 then
+           PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Peephole optimizations }
          PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Only perform them twice in the first pass }
-         if count = 1 then
+         if pass = 0 then
            PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Data flow analyzer }
-         If (cs_slowoptimize in aktglobalswitches) Then
+         If (cs_fastoptimize in aktglobalswitches) Then
            Begin
              If DFAPass2(
 {$ifdef statedebug}
@@ -73,10 +80,12 @@ Begin
 {$endif statedebug}
                                BlockStart, BlockEnd) Then
               { common subexpression elimination }
-               CSE(AsmL, BlockStart, BlockEnd);
+               changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
            End;
         { More peephole optimizations }
          PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
+         if lastLoop then
+           PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Dispose labeltabel }
          ShutDownDFA;
         { Continue where we left off, BlockEnd is either the start of an }
@@ -100,15 +109,29 @@ Begin
                BlockEnd := DFAPass1(AsmL, BlockStart)
              { Otherwise, skip the next assembler block }
              Else BlockStart := HP;
-           End
-      End;
-   end;
+           End;
+       End;
+     inc(pass);
+  until lastLoop;
 End;
 
 End.
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:42  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:42  peter
     * moved to i386/
 
   Revision 1.5  2000/09/24 15:06:11  peter

+ 210 - 154
compiler/i386/csopt386.pas

@@ -27,16 +27,21 @@ Unit CSOpt386;
 
 Interface
 
-Uses aasm;
+Uses aasm, cpubase, cpuasm;
 
-{Procedure CSOpt386(First, Last: Pai);}
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(asmL: paasmoutput; first, last: pai; pass: longint): boolean;
+
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
+function RegSizesOK(oldReg,newReg: TRegister; p: paicpu): boolean;
 
 Implementation
 
 Uses
   {$ifdef replaceregdebug}cutils,{$endif}
-  verbose, hcodegen, globals,cpubase,cpuasm,DAOpt386, tgeni386;
+  globtype, verbose, hcodegen, globals, daopt386, tgeni386, rropt386;
 
 {
 Function PaiInSequence(P: Pai; Const Seq: TContent): Boolean;
@@ -83,7 +88,7 @@ begin
               end;
           end
        else
-        if is_reg_var[reg32(p^.oper[1].reg)] then
+{         if is_reg_var[reg32(p^.oper[1].reg)] then }
           for regCounter := R_EAX to R_EDI do
             begin
               if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
@@ -130,9 +135,9 @@ begin
       for opCount := 1 to MaxCh do
         case InsProp[p^.opcode].Ch[opCount] of
           Ch_MOp1,CH_WOp1,CH_RWOp1:
-            if (p^.oper[0].typ = top_ref) or
-               ((p^.oper[0].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[0].reg)]) then
+{             if (p^.oper[0].typ = top_ref) or }
+{                ((p^.oper[0].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[0].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[0],regCounter,c[regCounter]) then
                   begin
@@ -140,9 +145,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
           Ch_MOp2,CH_WOp2,CH_RWOp2:
-            if (p^.oper[1].typ = top_ref) or
-               ((p^.oper[1].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[1].reg)]) then
+{             if (p^.oper[1].typ = top_ref) or }
+{                ((p^.oper[1].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[1].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
                   begin
@@ -150,9 +155,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
           Ch_MOp3,CH_WOp3,CH_RWOp3:
-            if (p^.oper[2].typ = top_ref) or
-               ((p^.oper[2].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[2].reg)]) then
+{             if (p^.oper[2].typ = top_ref) or }
+{                ((p^.oper[2].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[2].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[2],regCounter,c[regCounter]) then
                   begin
@@ -290,7 +295,7 @@ end;
  Found holds the number of instructions between StartMod and EndMod and false
  is returned}
 Function CheckSequence(p: Pai; var prev: pai; Reg: TRegister; Var Found: Longint;
-           Var RegInfo: TRegInfo): Boolean;
+           Var RegInfo: TRegInfo; findPrevSeqs: boolean): Boolean;
 
 const
   checkingPrevSequences: boolean = false;
@@ -310,7 +315,8 @@ var
                  in [con_ref,con_noRemoveRef]);
         if currentReg > R_EDI then
           begin
-            if isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
+            if (paicpu(p)^.oper[0].typ <> top_ref) or
+               isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
               begin
                 checkingPrevSequences := true;
                 passedJump := false;
@@ -321,15 +327,19 @@ var
         else getNextRegToTest := currentReg;
       end;
     if checkingPrevSequences then
-      getNextRegToTest :=
-        getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid);
+      if findPrevSeqs then
+        getNextRegToTest :=
+          getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid)
+      else
+        getNextRegToTest := R_NO;
   end;
 
 Var hp2, hp3{, EndMod},highPrev, orgPrev: Pai;
     {Cnt,} OldNrOfMods: Longint;
     startRegInfo, OrgRegInfo, HighRegInfo: TRegInfo;
+    regModified: array[R_NO..R_EDI] of boolean;
     HighFound, OrgRegFound: Byte;
-    RegCounter, regCounter2: TRegister;
+    RegCounter, regCounter2, tmpreg: TRegister;
     OrgRegResult: Boolean;
     TmpResult: Boolean;
     {TmpState: Byte;}
@@ -356,6 +366,7 @@ Begin {CheckSequence}
   regCounter := getNextRegToTest(prev,R_NO);
   While (RegCounter <> R_NO) Do
     Begin
+      fillchar(regModified,sizeof(regModified),0);
       regInfo := startRegInfo;
       Found := 0;
       hp2 := PPaiProp(prev^.OptInfo)^.Regs[RegCounter].StartMod;
@@ -371,13 +382,34 @@ Begin {CheckSequence}
              ((paicpu(hp3)^.opcode = A_MOV) or
               (paicpu(hp3)^.opcode = A_MOVZX) or
               (paicpu(hp3)^.opcode = A_MOVSX)) and
-             (paicpu(hp3)^.oper[0].typ in
-               [top_const,top_ref,top_symbol]) and
              (paicpu(hp3)^.oper[1].typ = top_reg) and
-             not(regInRef(reg32(paicpu(hp3)^.oper[1].reg),
-                   paicpu(hp3)^.oper[0].ref^)) then
-            regInfo.lastReload
-              [reg32(paicpu(hp3)^.oper[1].reg)] := hp3;
+             not(regInOp(paicpu(hp3)^.oper[1].reg,
+                   paicpu(hp3)^.oper[0])) then
+            begin
+              tmpreg := reg32(paicpu(hp3)^.oper[1].reg);
+              regInfo.lastReload[tmpreg] := hp3;
+              case paicpu(hp3)^.oper[0].typ of
+                top_ref:
+                  begin
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.base)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.index)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  end;
+                top_reg:
+                  if regModified[reg32(paicpu(hp3)^.oper[0].reg)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+              end;
+            end;
+          for regCounter2 := R_EAX to R_EDI do
+            regModified[regCounter2] := regModified[regCounter2] or
+              regModifiedByInstruction(regCounter2,hp3);
           GetNextInstruction(hp2, hp2);
           GetNextInstruction(hp3, hp3);
           Inc(Found)
@@ -674,40 +706,6 @@ begin
 end;
 
 
-function FindRegDealloc(reg: tregister; p: pai): boolean;
-{ assumes reg is a 32bit register }
-var
-  hp: pai;
-  first: boolean;
-begin
-  findregdealloc := false;
-  first := true;
-  while assigned(p^.previous) and
-        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
-         ((Pai(p^.previous)^.typ = ait_label) and
-          labelCanBeSkipped(pai_label(p^.previous)))) do
-    begin
-      p := pai(p^.previous);
-      if (p^.typ = ait_regalloc) and
-         (pairegalloc(p)^.reg = reg) then
-        if not(pairegalloc(p)^.allocation) then
-          if first then
-            begin
-              findregdealloc := true;
-              break;
-            end
-          else
-            begin
-              findRegDealloc :=
-                getNextInstruction(p,hp) and
-                 regLoadedWithNewValue(reg,false,hp);
-              break
-            end
-        else
-          first := false;
-    end
-end;
-
 Procedure ClearRegContentsFrom(reg: TRegister; p, endP: pai);
 { first clears the contents of reg from p till endP. Then the contents are }
 { cleared until the first instruction that changes reg                     }
@@ -753,7 +751,7 @@ begin
 {$endif replaceregdebug}
 end;
 
-function NoHardCodedRegs(p: paicpu; orgReg, newReg: tRegister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
 var chCount: byte;
 begin
   case p^.opcode of
@@ -775,7 +773,7 @@ begin
   end;
 end;
 
-function ChangeReg(var Reg: TRegister; orgReg, newReg: TRegister): boolean;
+function ChangeReg(var Reg: TRegister; newReg, orgReg: TRegister): boolean;
 begin
   changeReg := true;
   if reg = newReg then
@@ -787,15 +785,15 @@ begin
   else changeReg := false;
 end;
 
-function changeOp(var o: toper; orgReg, newReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
 begin
   case o.typ of
-    top_reg: changeOp := changeReg(o.reg,orgReg,newReg);
+    top_reg: changeOp := changeReg(o.reg,newReg,orgReg);
     top_ref:
       begin
         changeOp :=
-          changeReg(o.ref^.base,orgReg,newReg) or
-          changeReg(o.ref^.index,orgReg,newReg);
+          changeReg(o.ref^.base,newReg,orgReg) or
+          changeReg(o.ref^.index,newReg,orgReg);
       end;
   end;
 end;
@@ -829,14 +827,14 @@ begin
     end;
 end;
 
-function doReplaceReg(orgReg,newReg: tregister; hp: paicpu): boolean;
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
 var
-  opCount: byte;
+  opCount: longint;
   tmpResult: boolean;
 begin
-  for opCount := 0 to 2 do
+  for opCount := 0 to hp^.ops-1 do
     tmpResult :=
-      changeOp(hp^.oper[opCount],orgReg,newReg) or tmpResult;
+      changeOp(hp^.oper[opCount],newReg,orgReg) or tmpResult;
   doReplaceReg := tmpResult;
 end;
 
@@ -858,7 +856,7 @@ begin
     end;
 end;
 
-function doReplaceReadReg(orgReg,newReg: tregister; p: paicpu): boolean;
+function doReplaceReadReg(p: paicpu; newReg,orgReg: tregister): boolean;
 var opCount: byte;
 begin
   doReplaceReadReg := false;
@@ -870,13 +868,13 @@ begin
           1: internalerror(1301001);
           2,3:
             begin
-              if changeOp(p^.oper[0],orgReg,newReg) then
+              if changeOp(p^.oper[0],newReg,orgReg) then
                 begin
 {                  updateStates(orgReg,newReg,p,false);}
                   doReplaceReadReg := true;
                 end;
              if p^.ops = 3 then
-                if changeOp(p^.oper[1],orgReg,newReg) then
+                if changeOp(p^.oper[1],newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
@@ -889,7 +887,7 @@ begin
       begin
         for opCount := 0 to 2 do
           if p^.oper[opCount].typ = top_ref then
-            if changeOp(p^.oper[opCount],orgReg,newReg) then
+            if changeOp(p^.oper[opCount],newReg,orgReg) then
               begin
 {                updateStates(orgReg,newReg,p,false);}
                 doReplaceReadReg := true;
@@ -898,21 +896,21 @@ begin
           case InsProp[p^.opcode].Ch[opCount] of
             Ch_ROp1:
               if p^.oper[0].typ = top_reg then
-                if changeReg(p^.oper[0].reg,orgReg,newReg) then
+                if changeReg(p^.oper[0].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                   end;
             Ch_ROp2:
               if p^.oper[1].typ = top_reg then
-                if changeReg(p^.oper[1].reg,orgReg,newReg) then
+                if changeReg(p^.oper[1].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                   end;
             Ch_ROp3:
               if p^.oper[2].typ = top_reg then
-                if changeReg(p^.oper[2].reg,orgReg,newReg) then
+                if changeReg(p^.oper[2].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
@@ -998,6 +996,20 @@ begin
 end;
 
 
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+{ returns true if p1 contains an instruction that stores the contents }
+{ of newReg back to orgReg                                            }
+begin
+  storeBack :=
+    (p1^.typ = ait_instruction) and
+    (paicpu(p1)^.opcode = A_MOV) and
+    (paicpu(p1)^.oper[0].typ = top_reg) and
+    (paicpu(p1)^.oper[0].reg = newReg) and
+    (paicpu(p1)^.oper[1].typ = top_reg) and
+    (paicpu(p1)^.oper[1].reg = orgReg);
+end;
+
+
 function ReplaceReg(asmL: PaasmOutput; orgReg, newReg: TRegister; p: pai;
            const c: TContent; orgRegCanBeModified: Boolean;
            var returnEndP: pai): Boolean;
@@ -1012,18 +1024,6 @@ var endP, hp: Pai;
     removeLast, sequenceEnd, tmpResult, newRegModified, orgRegRead,
       stateChanged, readStateChanged: Boolean;
 
-  function storeBack(p1: pai): boolean;
-  { returns true if p1 contains an instruction that stores the contents }
-  { of newReg back to orgReg                                            }
-  begin
-    storeBack :=
-      (p1^.typ = ait_instruction) and
-      (paicpu(p1)^.opcode = A_MOV) and
-      (paicpu(p1)^.oper[0].typ = top_reg) and
-      (paicpu(p1)^.oper[0].reg = newReg) and
-      (paicpu(p1)^.oper[1].typ = top_reg) and
-      (paicpu(p1)^.oper[1].reg = orgReg);
-  end;
 
 begin
   ReplaceReg := false;
@@ -1055,7 +1055,7 @@ begin
           { if the newReg gets stored back to the oldReg, we can change }
           { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
           { %oldReg" to "<operations on %oldReg>"                       }
-          removeLast := storeBack(endP);
+          removeLast := storeBack(endP, orgReg, newReg);
           sequenceEnd :=
             { no support for (i)div, mul and imul with hardcoded operands }
             (noHardCodedRegs(paicpu(endP),orgReg,newReg) and
@@ -1144,14 +1144,14 @@ begin
           if {not(PPaiProp(hp^.optInfo)^.canBeRemoved) and }
              (hp^.typ = ait_instruction) then
             stateChanged :=
-              doReplaceReg(orgReg,newReg,paicpu(hp)) or stateChanged;
+              doReplaceReg(paicpu(hp),newReg,orgReg) or stateChanged;
             if stateChanged then
               updateStates(orgReg,newReg,hp,true);
           getNextInstruction(hp,hp)
         end;
       if assigned(endp) and (endp^.typ = ait_instruction) then
         readStateChanged :=
-          DoReplaceReadReg(orgReg,newReg,paicpu(endP));
+          DoReplaceReadReg(paicpu(endP),newReg,orgReg);
       if stateChanged or readStateChanged then
         updateStates(orgReg,newReg,endP,stateChanged);
 
@@ -1271,11 +1271,11 @@ begin
 end;
 
 
-Procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai);
+procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai; findPrevSeqs, doSubOpts: boolean);
 {marks the instructions that can be removed by RemoveInstructs. They're not
  removed immediately because sometimes an instruction needs to be checked in
  two different sequences}
-var cnt, cnt2, cnt3: longint;
+var cnt, cnt2, orgNrOfMods: longint;
     p, hp1, hp2, prevSeq, prevSeq_next: Pai;
     hp3, hp4: pai;
     hp5 : pai;
@@ -1284,7 +1284,6 @@ var cnt, cnt2, cnt3: longint;
 Begin
   p := First;
   SkipHead(p);
-  First := p;
   While (p <> Last) Do
     Begin
       Case p^.typ Of
@@ -1302,11 +1301,19 @@ Begin
                        PPaiProp(Pai(p)^.OptInfo)^.CanBeRemoved := True;
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
+                  hp2 := p;
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
+                    top_ref, top_reg:
+                     if (paicpu(p)^.oper[1].typ = top_reg) then
+                       Begin
                         With PPaiProp(p^.OptInfo)^.Regs[Reg32(Paicpu(p)^.oper[1].reg)] Do
                           Begin
+                            if assigned(startmod) and
+                               (startmod = p)then
+                              orgNrOfMods := ppaiprop(startmod^.optinfo)^.
+                                regs[reg32(paicpu(p)^.oper[1].reg)].nrOfMods
+                            else
+                              orgNrOfMods := 0;
                             If (p = StartMod) And
                                GetLastInstruction (p, hp1) And
                                (hp1^.typ <> ait_marker) Then
@@ -1317,7 +1324,7 @@ Begin
                                  'cse checking '+att_reg2str[Reg32(Paicpu(p)^.oper[1].reg)])));
                                insertLLItem(asml,p,p^.next,hp5);
 {$endif csdebug}
-                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo) And
+                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo, findPrevSeqs) And
                                   (Cnt > 0) Then
                                  Begin
                                    hp1 := nil;
@@ -1336,19 +1343,17 @@ Begin
 {   movl 16(%ebp), %eax                                                     }
 {   movl 8(%edx), %edx                                                      }
 {   movl 4(%eax), eax                                                       }
-                                   hp2 := p;
                                    Cnt2 := 1;
                                    While Cnt2 <= Cnt Do
                                      Begin
-                                       If Not(RegInInstruction(Paicpu(hp2)^.oper[1].reg, p)) then
+(*                                       If not(regInInstruction(Paicpu(hp2)^.oper[1].reg, p)) and
+                                          not(ppaiprop(p^.optinfo)^.canBeRemoved) then
                                          begin
-                                           if ((p^.typ = ait_instruction) And
-                                               ((paicpu(p)^.OpCode = A_MOV)  or
-                                                (paicpu(p)^.opcode = A_MOVZX) or
-                                                (paicpu(p)^.opcode = A_MOVSX)) And
-                                               (paicpu(p)^.Oper[0].typ in
-                                                 [top_const,top_ref,top_symbol])) and
-                                               (paicpu(p)^.oper[1].typ = top_reg) then
+                                           if (p^.typ = ait_instruction) And
+                                              ((paicpu(p)^.OpCode = A_MOV)  or
+                                               (paicpu(p)^.opcode = A_MOVZX) or
+                                               (paicpu(p)^.opcode = A_MOVSX)) And
+                                              (paicpu(p)^.oper[1].typ = top_reg) then
                                              begin
                                                regCounter := reg32(paicpu(p)^.oper[1].reg);
                                                if (regCounter in reginfo.regsStillUsedAfterSeq) then
@@ -1369,9 +1374,9 @@ Begin
                                                  end
 {$endif noremove}
                                              end
-                                         end
+                                         end *)
 {$ifndef noremove}
-                                       else
+{                                       else }
                                          PPaiProp(p^.OptInfo)^.CanBeRemoved := True
 {$endif noremove}
                                        ; Inc(Cnt2);
@@ -1397,15 +1402,18 @@ Begin
                                         (RegInfo.New2OldReg[RegCounter] <> R_NO) Then
                                        Begin
                                          AllocRegBetween(AsmL,RegInfo.New2OldReg[RegCounter],
-                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,prevSeq_next);
+                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,hp2);
                                          if hp4 <> prevSeq then
                                            begin
                                              if assigned(reginfo.lastReload[regCounter]) then
                                                getLastInstruction(reginfo.lastReload[regCounter],hp3)
+                                             else if assigned(reginfo.lastReload[regInfo.New2OldReg[regCounter]]) then
+                                               getLastInstruction(reginfo.lastReload[regInfo.new2OldReg[regCounter]],hp3)
                                              else hp3 := hp4;
-                                             if prevSeq <> hp3 then
+                                             if prevSeq_next <> hp3 then
                                                clearRegContentsFrom(regCounter,prevSeq_next,
                                                  hp3);
+                                             getnextInstruction(hp3,hp3);
                                              allocRegBetween(asmL,regCounter,prevSeq,hp3);
                                            end;
                                          If Not(RegCounter In RegInfo.RegsLoadedForRef) And
@@ -1421,20 +1429,23 @@ Begin
                                                begin
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoEnd));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
-                                                 hp3 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
+                                                 hp5 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
                                                                          {old reg          new reg}
                                                        RegInfo.New2OldReg[RegCounter], RegCounter));
-                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
+                                                 new(ppaiprop(hp5^.optinfo));
+                                                 ppaiprop(hp5^.optinfo)^ := ppaiprop(prevSeq_next^.optinfo)^;
+                                                 ppaiprop(hp5^.optinfo)^.canBeRemoved := false;
+                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp5);
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoStart));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
                                                  { adjusts states in previous instruction so that it will  }
                                                  { definitely be different from the previous or next state }
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[RegInfo.New2OldReg[RegCounter]].rstate,20);
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[regCounter].wstate,20);
                                                  updateState(RegInfo.New2OldReg[RegCounter],
-                                                   prevSeq_next);
+                                                   hp5);
                                                end
                                            End
                                          Else
@@ -1479,7 +1490,7 @@ Begin
                                     (PPaiProp(p^.OptInfo)^.CanBeRemoved) Then
                                    if (cnt > 0) then
                                      begin
-                                       hp2 := p;
+                                       p := hp2;
                                        Cnt2 := 1;
                                        While Cnt2 <= Cnt Do
                                          Begin
@@ -1509,38 +1520,38 @@ Begin
                                      end;
                               End;
                           End;
-                        if not ppaiprop(p^.optinfo)^.canBeRemoved and
-                           not regInRef(reg32(paicpu(p)^.oper[1].reg),
-                                        paicpu(p)^.oper[0].ref^) then
-                          removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-                      End;
-                    top_Reg:
                       { try to replace the new reg with the old reg }
-                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) and
-                         { only remove if we're not storing something in a regvar }
-                         (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
-                         (paicpu(p)^.opcode = A_MOV) and
-                         getLastInstruction(p,hp4) then
-                        begin
-                          case paicpu(p)^.oper[1].typ of
-                            top_Reg:
-                              { we only have to start replacing from the instruction after the mov, }
-                              { but replacereg only starts with getnextinstruction(p,p)             }
-                              if ReplaceReg(asmL,paicpu(p)^.oper[0].reg,
-                                   paicpu(p)^.oper[1].reg,p,
-                                   PPaiProp(hp4^.optInfo)^.Regs[paicpu(p)^.oper[1].reg],false,hp1) then
-                                begin
-                                    PPaiProp(p^.optInfo)^.canBeRemoved := true;
-                                    allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
-                                    PPaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,
-                                    hp1);
-                                end
-                              else
-                                if reg32(paicpu(p)^.oper[0].reg) <> reg32(paicpu(p)^.oper[1].reg) then
-                                  removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-
+                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) then
+                        if (paicpu(p)^.oper[0].typ = top_reg) and
+                           (paicpu(p)^.oper[1].typ = top_reg) and
+                           { only remove if we're not storing something in a regvar }
+                           (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
+                           (paicpu(p)^.opcode = A_MOV) and
+                           getLastInstruction(p,hp4) and
+                          { we only have to start replacing from the instruction after the mov, }
+                          { but replacereg only starts with getnextinstruction(p,p)             }
+                            replaceReg(asmL,paicpu(p)^.oper[0].reg,
+                              paicpu(p)^.oper[1].reg,p,
+                              ppaiprop(hp4^.optInfo)^.regs[paicpu(p)^.oper[1].reg],false,hp1) then
+                          begin
+                            ppaiprop(p^.optInfo)^.canBeRemoved := true;
+                            allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
+                              ppaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,hp1);
                           end
-                        end;
+                        else
+                          if (paicpu(p)^.oper[1].typ = top_reg) and
+                             not regInOp(paicpu(p)^.oper[1].reg,paicpu(p)^.oper[0]) then
+                           removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
+                        { at first, only try optimizations of large blocks, because doing }
+                        { doing smaller ones may prevent bigger ones from completing in   }
+                        { in the next pass                                                }
+                        if not doSubOpts and (orgNrOfMods <> 0) then
+                          begin
+                            p := hp2;
+                            for cnt := 1 to pred(orgNrOfMods) do
+                              getNextInstruction(p,p);
+                          end;
+                      End;
                     top_symbol,Top_Const:
                       Begin
                         Case Paicpu(p)^.oper[1].typ Of
@@ -1584,21 +1595,48 @@ Begin
     End;
 End;
 
-Procedure RemoveInstructs(AsmL: PAasmOutput; First, Last: Pai);
+function removeInstructs(asmL: paasmoutput; first, last: pai): boolean;
 { Removes the marked instructions and disposes the PPaiProps of the other }
 { instructions                                                            }
 Var p, hp1: Pai;
 begin
+  removeInstructs := false;
   p := First;
   While (p <> Last) Do
     Begin
       If (p^.typ = ait_marker) and
-         (pai_marker(p)^.kind in [noPropInfoStart,noPropInfoEnd]) then
+         (pai_marker(p)^.kind = noPropInfoStart) then
         begin
           hp1 := pai(p^.next);
           asmL^.remove(p);
           dispose(p,done);
-          p := hp1
+          while not((hp1^.typ = ait_marker) and
+                    (pai_marker(p)^.kind = noPropInfoEnd)) do
+            begin
+              p := pai(hp1^.next);
+{$ifndef noinstremove}
+              { allocregbetween can insert new ait_regalloc objects }
+              { without optinfo                                     }
+              if assigned(hp1^.optinfo) then
+                if ppaiprop(hp1^.optinfo)^.canBeRemoved then
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                    asmL^.remove(hp1);
+                    dispose(hp1,done);
+                    hp1 := p;
+                  end
+                else
+{$endif noinstremove}
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                  end;
+              hp1 := p;
+            end;
+          p := pai(hp1^.next);
+          asmL^.remove(hp1);
+          dispose(hp1,done);
         end
       else
 {$ifndef noinstremove}
@@ -1609,6 +1647,7 @@ begin
             AsmL^.Remove(p);
             Dispose(p, Done);
             p := hp1;
+            removeInstructs := true;
           End
         Else
 {$endif noinstremove}
@@ -1617,20 +1656,37 @@ begin
             p := pai(p^.next);;
           End;
     End;
-    FreeMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4))
+    FreeMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp))
 End;
 
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(AsmL: PAasmOutput; First, Last: Pai; pass: longint): boolean;
 Begin
-  DoCSE(AsmL, First, Last);
-  RemoveInstructs(AsmL, First, Last);
+  DoCSE(AsmL, First, Last, not(cs_slowoptimize in aktglobalswitches) or (pass >= 2),
+        not(cs_slowoptimize in aktglobalswitches) or (pass >= 1));
+ { register renaming }
+  if not(cs_slowoptimize in aktglobalswitches) or (pass > 0) then
+    doRenaming(asmL, first, last);
+  cse := removeInstructs(asmL, first, last);
 End;
 
 End.
 
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
 
   Revision 1.14  2000/09/30 13:07:23  jonas

+ 113 - 75
compiler/i386/daopt386.pas

@@ -183,6 +183,7 @@ Function regLoadedWithNewValue(reg: tregister; canDependOnPrevValue: boolean;
            hp: pai): boolean;
 Procedure UpdateUsedRegs(Var UsedRegs: TRegSet; p: Pai);
 Procedure AllocRegBetween(AsmL: PAasmOutput; Reg: TRegister; p1, p2: Pai);
+function FindRegDealloc(reg: tregister; p: pai): boolean;
 
 Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
 Function InstructionsEquivalent(p1, p2: Pai; Var RegInfo: TRegInfo): Boolean;
@@ -720,15 +721,14 @@ Begin
               End
             Else Regsequivalent := False
         Else
-          If Not(Reg32(NewReg) in NewRegsEncountered) and
-             ((OpAct = OpAct_Write) or
-              ((newReg = oldReg) and
-               not(newReg in usableregs + [R_EDI]))) Then
-            Begin
-              AddReg2RegInfo(OldReg, NewReg, RegInfo);
-              RegsEquivalent := True
-            End
-          Else RegsEquivalent := False
+           If Not(Reg32(NewReg) in NewRegsEncountered) and
+              ((OpAct = OpAct_Write) or
+               (newReg = oldReg)) Then
+             Begin
+               AddReg2RegInfo(OldReg, NewReg, RegInfo);
+               RegsEquivalent := True
+             End
+           Else RegsEquivalent := False 
     Else RegsEquivalent := False
   Else RegsEquivalent := OldReg = NewReg
 End;
@@ -973,7 +973,7 @@ Begin
            ((current^.typ = ait_label) and
             labelCanBeSkipped(pai_label(current)))) do
       Current := Pai(Current^.Next);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoStart) Then
       Begin
@@ -981,10 +981,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoEnd)) Do
           Current := Pai(Current^.Next);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoEnd);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   Next := Current;
   If Assigned(Current) And
      Not((Current^.typ In SkipInstr) or
@@ -1009,12 +1009,12 @@ Begin
     Current := Pai(Current^.previous);
     While Assigned(Current) And
           (((Current^.typ = ait_Marker) And
-            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd,NoPropInfoEnd])) or
+            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd{,NoPropInfoEnd}])) or
            (Current^.typ In SkipInstr) or
            ((Current^.typ = ait_label) And
             labelCanBeSkipped(pai_label(current)))) Do
       Current := Pai(Current^.previous);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoEnd) Then
       Begin
@@ -1022,10 +1022,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoStart)) Do
           Current := Pai(Current^.previous);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoStart);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   If Not(Assigned(Current)) or
      (Current^.typ In SkipInstr) or
      ((Current^.typ = ait_label) And
@@ -1177,6 +1177,41 @@ Begin
    end;
 End;
 
+function FindRegDealloc(reg: tregister; p: pai): boolean;
+{ assumes reg is a 32bit register }
+var
+  hp: pai;
+  first: boolean;
+begin
+  findregdealloc := false;
+  first := true;
+  while assigned(p^.previous) and
+        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
+         ((Pai(p^.previous)^.typ = ait_label) and
+          labelCanBeSkipped(pai_label(p^.previous)))) do
+    begin
+      p := pai(p^.previous);
+      if (p^.typ = ait_regalloc) and
+         (pairegalloc(p)^.reg = reg) then
+        if not(pairegalloc(p)^.allocation) then
+          if first then
+            begin
+              findregdealloc := true;
+              break;
+            end
+          else
+            begin
+              findRegDealloc :=
+                getNextInstruction(p,hp) and
+                 regLoadedWithNewValue(reg,false,hp);
+              break
+            end
+        else
+          first := false;
+    end
+end;
+
+
 
 Procedure IncState(Var S: Byte; amount: longint);
 {Increases S by 1, wraps around at $ffff to 0 (so we won't get overflow
@@ -1233,7 +1268,7 @@ Begin
   sequenceDependsonReg := TmpResult
 End;
 
-procedure invalidateDepedingRegs(p1: ppaiProp; reg: tregister);
+procedure invalidateDependingRegs(p1: ppaiProp; reg: tregister);
 var
   counter: tregister;
 begin
@@ -1277,7 +1312,7 @@ Begin
             { con_invalid and con_noRemoveRef = con_unknown }
             else typ := con_unknown;
         end;
-      invalidateDepedingRegs(p1,reg);
+      invalidateDependingRegs(p1,reg);
     end;
 End;
 
@@ -1644,7 +1679,7 @@ function writeToRegDestroysContents(destReg: tregister; reg: tregister;
 { modified                                                           }
 begin
   writeToRegDestroysContents :=
-    (c.typ <> con_unknown) and
+    (c.typ in [con_ref,con_noRemoveRef,con_invalid]) and
     sequenceDependsOnReg(c,reg,reg32(destReg));
 end;
 
@@ -2033,73 +2068,63 @@ Begin
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Reg:
-                      Case Paicpu(p)^.oper[1].typ Of
-                        Top_Reg:
+                    top_ref, top_reg:
+                      case paicpu(p)^.oper[1].typ Of
+                        top_reg:
                           Begin
 {$ifdef statedebug}
                             hp := new(pai_asm_comment,init(strpnew('destroying '+
                               att_reg2str[Paicpu(p)^.oper[1].reg])));
                             insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
-                            DestroyReg(CurProp, Paicpu(p)^.oper[1].reg, true);
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-{                            CurProp^.Regs[Paicpu(p)^.oper[1].reg] :=
-                              CurProp^.Regs[Paicpu(p)^.oper[0].reg];
-                            If (CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg = R_NO) Then
-                              CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg :=
-                                Paicpu(p)^.oper[0].reg;}
-                          End;
-                        Top_Ref:
-                          Begin
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
-                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
-                          End;
-                      End;
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
-                        ReadRef(CurProp, Paicpu(p)^.oper[0].ref);
-                        TmpReg := Reg32(Paicpu(p)^.oper[1].reg);
-                        If RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^) And
-                           (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef])
-                          Then
-                            Begin
-                              With CurProp^.Regs[TmpReg] Do
-                                Begin
-                                  incState(wstate,1);
- {also store how many instructions are part of the sequence in the first
-  instructions PPaiProp, so it can be easily accessed from within
-  CheckSequence}
-                                  Inc(NrOfMods, NrOfInstrSinceLastMod[TmpReg]);
-                                  PPaiProp(Pai(StartMod)^.OptInfo)^.Regs[TmpReg].NrOfMods := NrOfMods;
-                                  NrOfInstrSinceLastMod[TmpReg] := 0;
-                                  { Destroy the contents of the registers  }
-                                  { that depended on the previous value of }
-                                  { this register                          }
-                                  invalidateDepedingRegs(curProp,tmpReg);
-                                End;
-                            End
-                          Else
-                            Begin
+
+                            readOp(curprop, paicpu(p)^.oper[0]);
+                            tmpreg := reg32(paicpu(p)^.oper[1].reg);
+                            if regInOp(tmpreg, paicpu(p)^.oper[0]) and
+                               (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef]) then
+                              begin
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    incState(wstate,1);
+ { also store how many instructions are part of the sequence in the first }
+ { instruction's PPaiProp, so it can be easily accessed from within       }
+ { CheckSequence                                                          }
+                                    inc(nrOfMods, nrOfInstrSinceLastMod[tmpreg]);
+                                    ppaiprop(startmod^.optinfo)^.regs[tmpreg].nrOfMods := nrOfMods;
+                                    nrOfInstrSinceLastMod[tmpreg] := 0;
+                                   { Destroy the contents of the registers  }
+                                   { that depended on the previous value of }
+                                   { this register                          }
+                                    invalidateDependingRegs(curprop,tmpreg);
+                                end;
+                            end
+                          else
+                            begin
 {$ifdef statedebug}
                               hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
                               insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
-                              DestroyReg(CurProp, TmpReg, true);
-                              If Not(RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^)) Then
-                                With CurProp^.Regs[TmpReg] Do
-                                  Begin
-                                    Typ := Con_Ref;
-                                    StartMod := p;
-                                    NrOfMods := 1;
-                                  End
-                            End;
+                              destroyReg(curprop, tmpreg, true);
+                              if not(reginop(tmpreg, paicpu(p)^.oper[0])) then
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    typ := con_ref;
+                                    startmod := p;
+                                    nrOfMods := 1;
+                                  end
+                            end;
 {$ifdef StateDebug}
                   hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
                   InsertLLItem(AsmL, p, p^.next, hp);
 {$endif StateDebug}
-
+                          End;
+                        Top_Ref:
+                          { can only be if oper[0] = top_reg }
+                          Begin
+                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
+                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
+                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
+                          End;
                       End;
                     top_symbol,Top_Const:
                       Begin
@@ -2317,12 +2342,12 @@ Begin
       GetNextInstruction(p, p);
     End;
 {Uncomment the next line to see how much memory the reloading optimizer needs}
-{  Writeln((NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4)));}
+{  Writeln(NrOfPaiObjs*SizeOf(TPaiProp));}
 {no need to check mem/maxavail, we've got as much virtual memory as we want}
   If NrOfPaiObjs <> 0 Then
     Begin
       InitDFAPass2 := True;
-      GetMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4));
+      GetMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp));
       p := BlockStart;
       SkipHead(p);
       For Count := 1 To NrOfPaiObjs Do
@@ -2362,7 +2387,20 @@ End.
 
 {
   $Log$
-  Revision 1.2  2000-10-19 15:59:40  jonas
+  Revision 1.3  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.2  2000/10/19 15:59:40  jonas
     * fixed bug in allocregbetween (the register wasn't added to the
       usedregs set of the last instruction of the chain) ("merged")
 

+ 456 - 390
compiler/i386/popt386.pas

@@ -28,14 +28,16 @@ Interface
 
 Uses Aasm;
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
 Implementation
 
 Uses
   globtype,systems,
-  globals,verbose,hcodegen,
+  globals,hcodegen,
 {$ifdef finaldestdebug}
   cobjects,
 {$endif finaldestdebug}
@@ -97,6 +99,308 @@ begin
     end;
 end;
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1: pai;
+  l: longint;
+  tmpRef: treference;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_IMUL:
+                {changes certain "imul const, %reg"'s to lea sequences}
+                Begin
+                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
+                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
+                     (Paicpu(p)^.opsize = S_L) Then
+                    If (Paicpu(p)^.oper[0].val = 1) Then
+                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                       {remove "imul $1, reg"}
+                        Begin
+                          hp1 := Pai(p^.Next);
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          Continue;
+                        End
+                      Else
+                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
+                        Begin
+                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
+                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
+                          Dispose(p, Done);
+                          p := hp1;
+                        End
+                    Else If
+                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
+                      (Paicpu(p)^.oper[2].typ = Top_None)) And
+                     (aktoptprocessor < ClassP6) And
+                     (Paicpu(p)^.oper[0].val <= 12) And
+                     Not(CS_LittleSize in aktglobalswitches) And
+                     (Not(GetNextInstruction(p, hp1)) Or
+                       {GetNextInstruction(p, hp1) And}
+                       Not((Pai(hp1)^.typ = ait_instruction) And
+                           ((paicpu(hp1)^.opcode=A_Jcc) and
+                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
+                    Then
+                      Begin
+                        Reset_reference(tmpref);
+                        Case Paicpu(p)^.oper[0].val Of
+                          3: Begin
+                             {imul 3, reg1, reg2 to
+                                lea (reg1,reg1,2), reg2
+                              imul 3, reg1 to
+                                lea (reg1,reg1,2), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 2;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                            End;
+                         5: Begin
+                            {imul 5, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                             imul 5, reg1 to
+                               lea (reg1,reg1,4), reg1}
+                              TmpRef.base := Paicpu(p)^.oper[1].reg;
+                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                              TmpRef.ScaleFactor := 4;
+                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                              Else
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                              Dispose(p, Done);
+                              p := hp1;
+                            End;
+                         6: Begin
+                            {imul 6, reg1, reg2 to
+                               lea (,reg1,2), reg2
+                               lea (reg2,reg1,4), reg2
+                             imul 6, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               add reg1, reg1}
+                              If (aktoptprocessor <= Class386)
+                                Then
+                                  Begin
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                          TmpRef.ScaleFactor := 4;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p, p^.next, hp1);
+                                    Reset_reference(tmpref);
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    TmpRef.ScaleFactor := 2;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := R_NO;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
+                                            Paicpu(p)^.oper[2].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                    Dispose(p, Done);
+                                    p := Pai(hp1^.next);
+                                  End
+                            End;
+                          9: Begin
+                             {imul 9, reg1, reg2 to
+                                lea (reg1,reg1,8), reg2
+                              imul 9, reg1 to
+                                lea (reg1,reg1,8), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 8;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                             End;
+                         10: Begin
+                            {imul 10, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                               add reg2, reg2
+                             imul 10, reg1 to
+                               lea (reg1,reg1,4), reg1
+                               add reg1, reg1}
+                               If (aktoptprocessor <= Class386) Then
+                                 Begin
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
+                                   Else
+                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p, p^.next, hp1);
+                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.ScaleFactor := 4;
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                     Then
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
+                                     Else
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                   Dispose(p, Done);
+                                   p := Pai(hp1^.next);
+                                 End
+                             End;
+                         12: Begin
+                            {imul 12, reg1, reg2 to
+                               lea (,reg1,4), reg2
+                               lea (,reg1,8) reg2
+                             imul 12, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               lea (,reg1,4), reg1}
+                               If (aktoptprocessor <= Class386)
+                                 Then
+                                   Begin
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                         TmpRef.ScaleFactor := 8;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p, p^.next, hp1);
+                                     Reset_reference(tmpref);
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                         TmpRef.ScaleFactor := 2;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                     Dispose(p, Done);
+                                     p := Pai(hp1^.next);
+                                   End
+                             End
+                        End;
+                      End;
+                End;
+              A_SAR, A_SHR:
+                  {changes the code sequence
+                   shr/sar const1, x
+                   shl     const2, x
+                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
+                Begin
+                  If GetNextInstruction(p, hp1) And
+                     (pai(hp1)^.typ = ait_instruction) and
+                     (Paicpu(hp1)^.opcode = A_SHL) and
+                     (Paicpu(p)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
+                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
+                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
+                    Then
+                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
+                         Not(CS_LittleSize In aktglobalswitches)
+                        Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 > const2 }
+                          Begin
+                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
+                            Paicpu(hp1)^.opcode := A_AND;
+                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
+                            Case Paicpu(p)^.opsize Of
+                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
+                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
+                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
+                            End;
+                          End
+                        Else
+                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
+                             Not(CS_LittleSize In aktglobalswitches)
+                            Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 < const2 }
+                              Begin
+                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
+                                Paicpu(p)^.opcode := A_AND;
+                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                Case Paicpu(p)^.opsize Of
+                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                End;
+                              End
+                            Else
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 = const2 }
+                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
+                                Begin
+                                  Paicpu(p)^.opcode := A_AND;
+                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                  Case Paicpu(p)^.opsize Of
+                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  End;
+                                  AsmL^.remove(hp1);
+                                  dispose(hp1, done);
+                                End;
+                End;
+              A_XOR:
+                If (Paicpu(p)^.oper[0].typ = top_reg) And
+                   (Paicpu(p)^.oper[1].typ = top_reg) And
+                   (Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
+                 { temporarily change this to 'mov reg,0' to make it easier }
+                 { for the CSE. Will be changed back in pass 2              }
+                  begin
+                    paicpu(p)^.opcode := A_MOV;
+                    paicpu(p)^.loadconst(0,0);
+                  end;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
 
 Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
 {First pass of peepholeoptimizations}
@@ -279,9 +583,7 @@ Begin
                If (paicpu(p)^.opcode = A_JMP) Then
                  Begin
                    While GetNextInstruction(p, hp1) and
-                         ((hp1^.typ <> ait_label) or
-                   { skip unused labels, they're not referenced anywhere }
-                          labelCanBeSkipped(pai_label(hp1))) Do
+                         (hp1^.typ <> ait_label) do
                      If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
                        Begin
                          AsmL^.Remove(hp1);
@@ -289,6 +591,7 @@ Begin
                        End
                      else break;
                   End;
+               { remove jumps to a label coming right after them }
                If GetNextInstruction(p, hp1) then
                  Begin
                    if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
@@ -481,259 +784,53 @@ Begin
                       { change                      to
                           fld/fst   mem1  (hp1)       fld/fst   mem1
                           fld       mem1  (p)         fadd/
-                          faddp/                       fmul     st, st
-                           fmulp  st, st1 (hp2) }
-                        Begin
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
-                            Paicpu(hp2)^.opcode := A_FADD
-                          Else
-                            Paicpu(hp2)^.opcode := A_FMUL;
-                          Paicpu(hp2)^.oper[1].reg := R_ST;
-                        End
-                      Else
-                      { change              to
-                          fld/fst mem1 (hp1)   fld/fst mem1
-                          fld     mem1 (p)     fld      st}
-                        Begin
-                          Paicpu(p)^.changeopsize(S_FL);
-                          Paicpu(p)^.loadreg(0,R_ST);
-                        End
-                    Else
-                      Begin
-                        Case Paicpu(hp2)^.opcode Of
-                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
-                     { change                        to
-                         fld/fst  mem1    (hp1)      fld/fst    mem1
-                         fld      mem2    (p)        fxxx       mem2
-                         fxxxp    st, st1 (hp2)                      }
-
-                            Begin
-                              Case Paicpu(hp2)^.opcode Of
-                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
-                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
-                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
-                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
-                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
-                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
-                              End;
-                              AsmL^.Remove(hp2);
-                              Dispose(hp2, Done)
-                            End
-                        End
-                      End
-                End;
-              A_FSTP,A_FISTP:
-                if doFpuLoadStoreOpt(asmL,p) then
-                  continue;
-              A_IMUL:
-                {changes certain "imul const, %reg"'s to lea sequences}
-                Begin
-                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
-                     (Paicpu(p)^.opsize = S_L) Then
-                    If (Paicpu(p)^.oper[0].val = 1) Then
-                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                       {remove "imul $1, reg"}
-                        Begin
-                          hp1 := Pai(p^.Next);
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          Continue;
-                        End
-                      Else
-                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
-                        Begin
-                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
-                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
-                          Dispose(p, Done);
-                          p := hp1;
-                        End
-                    Else If
-                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
-                      (Paicpu(p)^.oper[2].typ = Top_None)) And
-                     (aktoptprocessor < ClassP6) And
-                     (Paicpu(p)^.oper[0].val <= 12) And
-                     Not(CS_LittleSize in aktglobalswitches) And
-                     (Not(GetNextInstruction(p, hp1)) Or
-                       {GetNextInstruction(p, hp1) And}
-                       Not((Pai(hp1)^.typ = ait_instruction) And
-                           ((paicpu(hp1)^.opcode=A_Jcc) and
-                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
-                    Then
-                      Begin
-                        Reset_reference(tmpref);
-                        Case Paicpu(p)^.oper[0].val Of
-                          3: Begin
-                             {imul 3, reg1, reg2 to
-                                lea (reg1,reg1,2), reg2
-                              imul 3, reg1 to
-                                lea (reg1,reg1,2), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 2;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                            End;
-                         5: Begin
-                            {imul 5, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                             imul 5, reg1 to
-                               lea (reg1,reg1,4), reg1}
-                              TmpRef.base := Paicpu(p)^.oper[1].reg;
-                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                              TmpRef.ScaleFactor := 4;
-                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                              Else
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                              Dispose(p, Done);
-                              p := hp1;
-                            End;
-                         6: Begin
-                            {imul 6, reg1, reg2 to
-                               lea (,reg1,2), reg2
-                               lea (reg2,reg1,4), reg2
-                             imul 6, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               add reg1, reg1}
-                              If (aktoptprocessor <= Class386)
-                                Then
-                                  Begin
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                          TmpRef.ScaleFactor := 4;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p, p^.next, hp1);
-                                    Reset_reference(tmpref);
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    TmpRef.ScaleFactor := 2;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := R_NO;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
-                                            Paicpu(p)^.oper[2].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                    Dispose(p, Done);
-                                    p := Pai(hp1^.next);
-                                  End
-                            End;
-                          9: Begin
-                             {imul 9, reg1, reg2 to
-                                lea (reg1,reg1,8), reg2
-                              imul 9, reg1 to
-                                lea (reg1,reg1,8), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 8;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                             End;
-                         10: Begin
-                            {imul 10, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                               add reg2, reg2
-                             imul 10, reg1 to
-                               lea (reg1,reg1,4), reg1
-                               add reg1, reg1}
-                               If (aktoptprocessor <= Class386) Then
-                                 Begin
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
-                                   Else
-                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p, p^.next, hp1);
-                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.ScaleFactor := 4;
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                     Then
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
-                                     Else
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                   Dispose(p, Done);
-                                   p := Pai(hp1^.next);
-                                 End
-                             End;
-                         12: Begin
-                            {imul 12, reg1, reg2 to
-                               lea (,reg1,4), reg2
-                               lea (,reg1,8) reg2
-                             imul 12, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               lea (,reg1,4), reg1}
-                               If (aktoptprocessor <= Class386)
-                                 Then
-                                   Begin
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                         TmpRef.ScaleFactor := 8;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p, p^.next, hp1);
-                                     Reset_reference(tmpref);
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                         TmpRef.ScaleFactor := 2;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                     Dispose(p, Done);
-                                     p := Pai(hp1^.next);
-                                   End
-                             End
-                        End;
-                      End;
+                          faddp/                       fmul     st, st
+                           fmulp  st, st1 (hp2) }
+                        Begin
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
+                            Paicpu(hp2)^.opcode := A_FADD
+                          Else
+                            Paicpu(hp2)^.opcode := A_FMUL;
+                          Paicpu(hp2)^.oper[1].reg := R_ST;
+                        End
+                      Else
+                      { change              to
+                          fld/fst mem1 (hp1)   fld/fst mem1
+                          fld     mem1 (p)     fld      st}
+                        Begin
+                          Paicpu(p)^.changeopsize(S_FL);
+                          Paicpu(p)^.loadreg(0,R_ST);
+                        End
+                    Else
+                      Begin
+                        Case Paicpu(hp2)^.opcode Of
+                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
+                     { change                        to
+                         fld/fst  mem1    (hp1)      fld/fst    mem1
+                         fld      mem2    (p)        fxxx       mem2
+                         fxxxp    st, st1 (hp2)                      }
+
+                            Begin
+                              Case Paicpu(hp2)^.opcode Of
+                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
+                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
+                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
+                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
+                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
+                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
+                              End;
+                              AsmL^.Remove(hp2);
+                              Dispose(hp2, Done)
+                            End
+                        End
+                      End
                 End;
+              A_FSTP,A_FISTP:
+                if doFpuLoadStoreOpt(asmL,p) then
+                  continue;
               A_LEA:
                 Begin
                 {removes seg register prefixes from LEA operations, as they
@@ -784,7 +881,6 @@ Begin
                                   end;
                               end;
                             end;
-
                 End;
               A_MOV:
                 Begin
@@ -1420,71 +1516,6 @@ Begin
                                    p := hp1;
                                  End
                 End;
-              A_SAR, A_SHR:
-                  {changes the code sequence
-                   shr/sar const1, x
-                   shl     const2, x
-                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
-                Begin
-                  If GetNextInstruction(p, hp1) And
-                     (pai(hp1)^.typ = ait_instruction) and
-                     (Paicpu(hp1)^.opcode = A_SHL) and
-                     (Paicpu(p)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
-                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
-                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
-                    Then
-                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
-                         Not(CS_LittleSize In aktglobalswitches)
-                        Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 > const2 }
-                          Begin
-                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
-                            Paicpu(hp1)^.opcode := A_AND;
-                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
-                            Case Paicpu(p)^.opsize Of
-                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
-                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
-                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
-                            End;
-                          End
-                        Else
-                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
-                             Not(CS_LittleSize In aktglobalswitches)
-                            Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 < const2 }
-                              Begin
-                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
-                                Paicpu(p)^.opcode := A_AND;
-                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                Case Paicpu(p)^.opsize Of
-                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                End;
-                              End
-                            Else
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 = const2 }
-                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
-                                Begin
-                                  Paicpu(p)^.opcode := A_AND;
-                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                  Case Paicpu(p)^.opsize Of
-                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  End;
-                                  AsmL^.remove(hp1);
-                                  dispose(hp1, done);
-                                End;
-                End;
               A_SETcc :
                 { changes
                     setcc (funcres)             setcc reg
@@ -1604,6 +1635,7 @@ end;
 
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
+{$ifdef USECMOV}
   function CanBeCMOV(p : pai) : boolean;
 
     begin
@@ -1613,6 +1645,7 @@ Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
          (paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
          (paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
     end;
+{$endif USECMOV}
 
 var
   p,hp1,hp2: pai;
@@ -1633,20 +1666,6 @@ Begin
         Ait_Instruction:
           Begin
             Case Paicpu(p)^.opcode Of
-              A_CALL:
-                If (AktOptProcessor < ClassP6) And
-                   GetNextInstruction(p, hp1) And
-                   (hp1^.typ = ait_instruction) And
-                   (paicpu(hp1)^.opcode = A_JMP) Then
-                  Begin
-                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
-                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
-                    InsertLLItem(AsmL, p^.previous, p, hp2);
-                    Paicpu(p)^.opcode := A_JMP;
-                    AsmL^.Remove(hp1);
-                    Dispose(hp1, Done)
-                  End;
-
 {$ifdef USECMOV}
               A_Jcc:
                 if (aktspecificoptprocessor=ClassP6) then
@@ -1833,59 +1852,6 @@ Begin
                        p := hp1
                      End;
                    End
-                  else if (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[0].val = 0) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) Then
-                    { change "mov $0, %reg" into "xor %reg, %reg" }
-                    Begin
-                      Paicpu(p)^.opcode := A_XOR;
-                      Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
-                    End
-                End;
-              A_MOVZX:
-                Begin
-                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
-                    If (Paicpu(p)^.oper[0].typ = top_reg)
-                      Then
-                        Case Paicpu(p)^.opsize of
-                          S_BL:
-                            Begin
-                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                                 Not(CS_LittleSize in aktglobalswitches) And
-                                 (aktoptprocessor = ClassP5)
-                                Then
-                                  {Change "movzbl %reg1, %reg2" to
-                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
-                                   PentiumMMX}
-                                  Begin
-                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
-                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
-                                    InsertLLItem(AsmL,p^.previous, p, hp1);
-                                    Paicpu(p)^.opcode := A_MOV;
-                                    Paicpu(p)^.changeopsize(S_B);
-                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                                  End;
-                            End;
-                        End
-                      Else
-                        If (Paicpu(p)^.oper[0].typ = top_ref) And
-                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
-                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
-                           Not(CS_LittleSize in aktglobalswitches) And
-                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                           (aktoptprocessor = ClassP5) And
-                           (Paicpu(p)^.opsize = S_BL)
-                          Then
-                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
-                             Pentium and PentiumMMX}
-                            Begin
-                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
-                                         Paicpu(p)^.oper[1].reg));
-                              Paicpu(p)^.opcode := A_MOV;
-                              Paicpu(p)^.changeopsize(S_B);
-                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                              InsertLLItem(AsmL,p^.previous, p, hp1);
-                            End;
                 End;
               A_TEST, A_OR:
                 {removes the line marked with (x) from the sequence
@@ -1942,11 +1908,111 @@ Begin
     End;
 End;
 
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1,hp2: pai;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_CALL:
+                If (AktOptProcessor < ClassP6) And
+                   GetNextInstruction(p, hp1) And
+                   (hp1^.typ = ait_instruction) And
+                   (paicpu(hp1)^.opcode = A_JMP) Then
+                  Begin
+                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
+                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
+                    InsertLLItem(AsmL, p^.previous, p, hp2);
+                    Paicpu(p)^.opcode := A_JMP;
+                    AsmL^.Remove(hp1);
+                    Dispose(hp1, Done)
+                  End;
+              A_MOV:
+                if (Paicpu(p)^.oper[0].typ = Top_Const) And
+                   (Paicpu(p)^.oper[0].val = 0) And
+                   (Paicpu(p)^.oper[1].typ = Top_Reg) Then
+                  { change "mov $0, %reg" into "xor %reg, %reg" }
+                  Begin
+                    Paicpu(p)^.opcode := A_XOR;
+                    Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
+                  End;
+              A_MOVZX:
+                Begin
+                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
+                    If (Paicpu(p)^.oper[0].typ = top_reg)
+                      Then
+                        Case Paicpu(p)^.opsize of
+                          S_BL:
+                            Begin
+                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                                 Not(CS_LittleSize in aktglobalswitches) And
+                                 (aktoptprocessor = ClassP5)
+                                Then
+                                  {Change "movzbl %reg1, %reg2" to
+                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
+                                   PentiumMMX}
+                                  Begin
+                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
+                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
+                                    InsertLLItem(AsmL,p^.previous, p, hp1);
+                                    Paicpu(p)^.opcode := A_MOV;
+                                    Paicpu(p)^.changeopsize(S_B);
+                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                                  End;
+                            End;
+                        End
+                      Else
+                        If (Paicpu(p)^.oper[0].typ = top_ref) And
+                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
+                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
+                           Not(CS_LittleSize in aktglobalswitches) And
+                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                           (aktoptprocessor = ClassP5) And
+                           (Paicpu(p)^.opsize = S_BL)
+                          Then
+                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
+                             Pentium and PentiumMMX}
+                            Begin
+                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
+                                         Paicpu(p)^.oper[1].reg));
+                              Paicpu(p)^.opcode := A_MOV;
+                              Paicpu(p)^.changeopsize(S_B);
+                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                              InsertLLItem(AsmL,p^.previous, p, hp1);
+                            End;
+                End;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
+
 End.
 
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
 
   Revision 1.13  2000/10/02 13:01:29  jonas

+ 350 - 0
compiler/i386/rropt386.pas

@@ -0,0 +1,350 @@
+{
+    $Id$
+    Copyright (c) 1998-2000 by Jonas Maebe, member of the Free Pascal
+      development team
+
+    This unit contains register renaming functionality
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit rrOpt386;
+
+{$i defines.inc}
+
+Interface
+
+Uses aasm;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+
+Implementation
+
+Uses
+  {$ifdef replaceregdebug}cutils,{$endif}
+  verbose,globals,cpubase,cpuasm,daopt386,csopt386,tgeni386;
+
+function canBeFirstSwitch(p: paicpu; reg: tregister): boolean;
+{ checks whether an operation on reg can be switched to another reg without an }
+{ additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
+begin
+  canBeFirstSwitch := false;
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (reg32(p^.oper[1].reg) = reg);
+    A_IMUL:
+      canBeFirstSwitch :=
+        (p^.ops >= 2) and
+        (reg32(p^.oper[p^.ops-1].reg) = reg);
+    A_INC,A_DEC,A_SUB,A_ADD:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (p^.opsize = S_L) and
+        (reg32(p^.oper[1].reg) = reg) and
+        (p^.oper[0].typ <> top_ref) and
+        ((p^.opcode <> A_SUB) or
+         (p^.oper[0].typ = top_const));
+    A_SHL:
+      canBeFirstSwitch :=
+        (p^.opsize = S_L) and
+        (p^.oper[1].typ = top_reg) and
+        (p^.oper[1].reg = reg) and
+        (p^.oper[0].typ = top_const) and
+        (p^.oper[0].val in [1,2,3]);
+  end;
+end;
+
+procedure switchReg(var reg: tregister; reg1, reg2: tregister);
+begin
+  if reg = reg1 then
+    reg := reg2
+  else if reg = reg2 then
+    reg := reg1
+  else if reg = regtoreg8(reg1) then
+         reg := regtoreg8(reg2)
+  else if reg = regtoreg8(reg2) then
+         reg := regtoreg8(reg1)
+  else if reg = regtoreg16(reg1) then
+         reg := regtoreg16(reg2)
+  else if reg = regtoreg16(reg2) then
+         reg := regtoreg16(reg1)
+end;
+
+
+procedure switchOp(var op: toper; reg1, reg2: tregister);
+begin
+  case op.typ of
+    top_reg:
+      switchReg(op.reg,reg1,reg2);
+    top_ref:
+      begin
+        switchReg(op.ref^.base,reg1,reg2);
+        switchReg(op.ref^.index,reg1,reg2);
+      end;
+  end;
+end;
+
+procedure doSwitchReg(hp: paicpu; reg1,reg2: tregister);
+var
+  opCount: longint;
+begin
+  for opCount := 0 to hp^.ops-1 do
+    switchOp(hp^.oper[opCount],reg1,reg2);
+end;
+
+
+procedure doFirstSwitch(p: paicpu; reg1, reg2: tregister);
+var
+  tmpRef: treference;
+begin
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+       begin
+         changeOp(p^.oper[1],reg1,reg2);
+         changeOp(p^.oper[0],reg2,reg1);
+       end;
+    A_IMUL:
+      begin
+        p^.ops := 3;
+        p^.loadreg(2,p^.oper[1].reg);
+        changeOp(p^.oper[2],reg1,reg2);
+      end;
+    A_INC,A_DEC:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.opcode of
+          A_INC:
+            tmpref.offset := 1;
+          A_DEC:
+            tmpref.offset := -1;
+        end;
+        p^.ops := 2;
+        p^.opcode := A_LEA;
+        p^.loadreg(1,reg2);
+        p^.loadref(0,newreference(tmpref));
+      end;
+    A_SUB,A_ADD:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.oper[0].typ of
+          top_const:
+            begin
+              tmpref.offset := p^.oper[0].val;
+              if p^.opcode = A_SUB then
+                tmpref.offset := - tmpRef.offset;
+            end;
+          top_symbol:
+            tmpref.symbol := p^.oper[0].sym;
+          top_reg:
+            begin
+              tmpref.index := p^.oper[0].reg;
+              tmpref.scalefactor := 1;
+            end;
+          else internalerror(200010031);
+        end;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    A_SHL:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        tmpref.scalefactor := 1 shl p^.oper[0].val;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    else internalerror(200010032);
+  end;
+end;
+
+
+function switchRegs(asml: paasmoutput; reg1, reg2: tregister; start: pai): Boolean;
+{ change movl  %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
+var
+  endP, hp: pai;
+  switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
+  reg1StillUsed, reg2StillUsed, isInstruction: boolean;
+begin
+  switchRegs := false;
+  tmpResult := true;
+  sequenceEnd := false;
+  reg1Modified := false;
+  reg2Modified := false;
+  endP := start;
+  while tmpResult and not sequenceEnd do
+    begin
+      tmpResult :=
+        getNextInstruction(endP,endP);
+      If tmpResult and
+         not ppaiprop(endP^.optinfo)^.canBeRemoved then
+        begin
+          { if the newReg gets stored back to the oldReg, we can change }
+          { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
+          { %oldReg" to "<operations on %oldReg>"                       }
+          switchLast := storeBack(endP,reg1,reg2);
+          reg1StillUsed := reg1 in ppaiprop(endP^.optinfo)^.usedregs;
+          reg2StillUsed := reg2 in ppaiprop(endP^.optinfo)^.usedregs;
+          isInstruction := endP^.typ = ait_instruction;
+          sequenceEnd :=
+            switchLast or
+            { if both registers are released right before an instruction }
+            { that contains hardcoded regs, it's ok too                  }
+            (not reg1StillUsed and not reg2StillUsed) or
+            { no support for (i)div, mul and imul with hardcoded operands }
+            (((not isInstruction) or
+              noHardCodedRegs(paicpu(endP),reg1,reg2)) and
+             (not reg1StillUsed or
+              (isInstruction and findRegDealloc(reg1,endP) and
+               regLoadedWithNewValue(reg1,false,paicpu(endP)))) and
+             (not reg2StillUsed or
+              (isInstruction and findRegDealloc(reg2,endP) and
+               regLoadedWithNewValue(reg2,false,paicpu(endP)))));
+
+          { we can't switch reg1 and reg2 in something like }
+          {   movl  %reg1,%reg2                             }
+          {   movl  (%reg2),%reg2                           }
+          {   movl  4(%reg1),%reg1                          }
+          if reg2Modified and not(reg1Modified) and
+             regReadByInstruction(reg1,endP) then
+            begin
+              tmpResult := false;
+              break
+            end;
+
+          if not reg1Modified then
+            begin
+              reg1Modified := regModifiedByInstruction(reg1,endP);
+              if reg1Modified and not canBeFirstSwitch(paicpu(endP),reg1) then
+                begin
+                  tmpResult := false;
+                  break;
+                end;
+            end;
+          if not reg2Modified then
+            reg2Modified := regModifiedByInstruction(reg2,endP);
+
+          if sequenceEnd then
+            break;
+
+          tmpResult :=
+            (endP^.typ <> ait_label) and
+            ((not isInstruction) or
+             (NoHardCodedRegs(paicpu(endP),reg1,reg2) and
+               RegSizesOk(reg1,reg2,paicpu(endP))));
+        end;
+    end;
+
+  if tmpResult and sequenceEnd then
+    begin
+      switchRegs := true;
+      reg1Modified := false;
+      reg2Modified := false;
+      getNextInstruction(start,hp);
+      while hp <> endP do
+        begin
+          if (not ppaiprop(hp^.optinfo)^.canberemoved) and
+             (hp^.typ = ait_instruction) then
+            begin
+              switchDone := false;
+              if not reg1Modified then
+                begin
+                  reg1Modified := regModifiedByInstruction(reg1,hp);
+                  if reg1Modified then
+                    begin
+                      doFirstSwitch(paicpu(hp),reg1,reg2);
+                      switchDone := true;
+                    end;
+                end;
+              if not switchDone then
+                if reg1Modified then
+                  doSwitchReg(paicpu(hp),reg1,reg2)
+                else
+                  doReplaceReg(paicpu(hp),reg2,reg1);
+            end;
+          getNextInstruction(hp,hp);
+        end;
+      if switchLast then
+        doSwitchReg(paicpu(hp),reg1,reg2)
+      else getLastInstruction(hp,hp);
+      allocRegBetween(asmL,reg1,start,hp);
+      allocRegBetween(asmL,reg2,start,hp);
+    end;
+end;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+var
+  p: pai;
+begin
+  p := First;
+  SkipHead(p);
+  while p <> last do
+    begin
+      case p^.typ of
+        ait_instruction:
+          begin
+            case paicpu(p)^.opcode of
+              A_MOV:
+                begin
+                  if not(ppaiprop(p^.optinfo)^.canBeRemoved) and
+                     (paicpu(p)^.oper[0].typ = top_reg) and
+                     (paicpu(p)^.oper[1].typ = top_reg) and
+                     (paicpu(p)^.opsize = S_L) and
+                     (paicpu(p)^.oper[0].reg in (usableregs+[R_EDI])) and
+                     (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) then
+                    if switchRegs(asml,paicpu(p)^.oper[0].reg,
+                         paicpu(p)^.oper[1].reg,p) then
+                      begin
+{                        getnextinstruction(p,hp);
+                        asmL^.remove(p);
+                        dispose(p,done);
+                        p := hp;
+                        continue }
+                        ppaiprop(p^.optinfo)^.canBeRemoved := true;
+                      end;
+                end;
+            end;
+          end;
+      end;
+      getNextInstruction(p,p);
+    end;
+end;
+
+
+End.
+
+{
+  $Log$
+  Revision 1.1  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+
+}

+ 1 - 1
compiler/msgidx.inc

@@ -557,7 +557,7 @@ const
   option_info=11024;
   option_help_pages=11025;
 
-  MsgTxtSize = 31210;
+  MsgTxtSize = 31225;
 
   MsgIdxMax : array[1..20] of longint=(
     17,58,165,34,41,41,86,14,35,40,

+ 10 - 10
compiler/msgtxt.inc

@@ -728,37 +728,37 @@ const msgtxt : array[0..000130,1..240] of char=(
   '3*2Ou_enable uncertain optimizations (see docs)'#010+
   '3*2O1_level 1 optimizat','ions (quick optimizations)'#010+
   '3*2O2_level 2 optimizations (-O1 + slower optimizations)'#010+
-  '3*2O3_level 3 optimizations (same as -O2u)'#010+
+  '3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)'#010+
   '3*2Op<x>_target processor:'#010+
   '3*3Op1_set target processor to 386/486'#010+
-  '3*3Op2_set target processor to Pentium/PentiumM','MX (tm)'#010+
+  '3*3Op2_set target processor to P','entium/PentiumMMX (tm)'#010+
   '3*3Op3_set target processor to PPro/PII/c6x86/K6 (tm)'#010+
   '3*1T<x>_Target operating system:'#010+
   '3*2TGO32V1_version 1 of DJ Delorie DOS extender'#010+
   '3*2TGO32V2_version 2 of DJ Delorie DOS extender'#010+
   '3*2TLINUX_Linux'#010+
-  '3*2Tnetware_Novell Netware Module',' (experimental)'#010+
+  '3*2Tnetware_Novell',' Netware Module (experimental)'#010+
   '3*2TOS2_OS/2 2.x'#010+
   '3*2TWin32_Windows 32 Bit'#010+
   '3*1W<x>_Win32 target options'#010+
   '3*2WB<x>_Set Image base to Hexadecimal <x> value'#010+
   '3*2WC_Specify console type application'#010+
-  '3*2WD_Use DEFFILE to export functions of DLL or EXE'#010+
-  '3*2WG_Specify',' graphic type application'#010+
+  '3*2WD_Use DEFFILE to export functions of DLL or EX','E'#010+
+  '3*2WG_Specify graphic type application'#010+
   '3*2WN_Do not generate relocation code (necessary for debugging)'#010+
   '3*2WR_Generate relocation code'#010+
   '6*1A<x>_output format'#010+
   '6*2Aas_Unix o-file using GNU AS'#010+
   '6*2Agas_GNU Motorola assembler'#010+
-  '6*2Amit_MIT Syntax (old GAS)'#010+
-  '6*2Am','ot_Standard Motorola assembler'#010+
+  '6*2Amit_MIT Syntax ','(old GAS)'#010+
+  '6*2Amot_Standard Motorola assembler'#010+
   '6*1O_optimizations:'#010+
   '6*2Oa_turn on the optimizer'#010+
   '6*2Og_generate smaller code'#010+
   '6*2OG_generate faster code (default)'#010+
   '6*2Ox_optimize maximum (still BUGGY!!!)'#010+
-  '6*2O2_set target processor to a MC68020+'#010+
-  '6*1R<x>_assembl','er reading style:'#010+
+  '6*2O2_set target processor to a MC68020+'#010,
+  '6*1R<x>_assembler reading style:'#010+
   '6*2RMOT_read motorola style assembler'#010+
   '6*1T<x>_Target operating system:'#010+
   '6*2TAMIGA_Commodore Amiga'#010+
@@ -767,5 +767,5 @@ const msgtxt : array[0..000130,1..240] of char=(
   '6*2TLINUX_Linux-68k'#010+
   '**1*_'#010+
   '**1?_shows this help'#010+
-  '**1h_shows this help withou','t waiting'#000
+  '**1h_shows t','his help without waiting'#000
 );

+ 18 - 5
compiler/opts386.pas

@@ -58,10 +58,10 @@ begin
                  'g' : initglobalswitches:=initglobalswitches+[cs_littlesize];
                  'G' : initglobalswitches:=initglobalswitches-[cs_littlesize];
                  'r' : initglobalswitches:=initglobalswitches+[cs_regalloc];
-                 'u' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_uncertainopts];
-                 '1' : initglobalswitches:=initglobalswitches-[cs_slowoptimize,cs_uncertainopts]+[cs_optimize,cs_fastoptimize];
-                 '2' : initglobalswitches:=initglobalswitches-[cs_uncertainopts]+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
-                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize,cs_uncertainopts];
+                 'u' : initglobalswitches:=initglobalswitches+[cs_uncertainopts];
+                 '1' : initglobalswitches:=initglobalswitches-[cs_fastoptimize,cs_slowoptimize]+[cs_optimize];
+                 '2' : initglobalswitches:=initglobalswitches-[cs_slowoptimize]+[cs_optimize,cs_fastoptimize];
+                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
                  'p' :
                    Begin
                      If j < Length(Opt) Then
@@ -115,7 +115,20 @@ end;
 end.
 {
   $Log$
-  Revision 1.5  2000-09-24 15:06:20  peter
+  Revision 1.6  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.5  2000/09/24 15:06:20  peter
     * use defines.inc
 
   Revision 1.4  2000/08/27 16:11:51  peter