瀏覽代碼

+ register renaming ("fixes" bug1088)
* changed command line options meanings for optimizer:
O2 now means peepholopts, CSE and register renaming in 1 pass
O3 is the same, but repeated until no further optimizations are
possible or until 5 passes have been done (to avoid endless loops)
* changed aopt386 so it does this looping
* added some procedures from csopt386 to the interface because they're
used by rropt386 as well
* some changes to csopt386 and daopt386 so that newly added instructions
by the CSE get optimizer info (they were simply skipped previously),
this fixes some bugs

Jonas Maebe 25 年之前
父節點
當前提交
a4fde73649
共有 9 個文件被更改,包括 1196 次插入650 次删除
  1. 1 1
      compiler/errore.msg
  2. 37 14
      compiler/i386/aopt386.pas
  3. 210 154
      compiler/i386/csopt386.pas
  4. 113 75
      compiler/i386/daopt386.pas
  5. 456 390
      compiler/i386/popt386.pas
  6. 350 0
      compiler/i386/rropt386.pas
  7. 1 1
      compiler/msgidx.inc
  8. 10 10
      compiler/msgtxt.inc
  9. 18 5
      compiler/opts386.pas

+ 1 - 1
compiler/errore.msg

@@ -1870,7 +1870,7 @@ option_help_pages=11025_[
 3*2Ou_enable uncertain optimizations (see docs)
 3*2O1_level 1 optimizations (quick optimizations)
 3*2O2_level 2 optimizations (-O1 + slower optimizations)
-3*2O3_level 3 optimizations (same as -O2u)
+3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)
 3*2Op<x>_target processor:
 3*3Op1_set target processor to 386/486
 3*3Op2_set target processor to Pentium/PentiumMMX (tm)

+ 37 - 14
compiler/i386/aopt386.pas

@@ -43,15 +43,20 @@ Uses
 
 Procedure Optimize(AsmL: PAasmOutput);
 Var
-  count, max: longint;
   BlockStart, BlockEnd, HP: Pai;
+  pass: longint;
+  slowopt, changed, lastLoop: boolean;
 Begin
-  if (cs_slowoptimize in aktglobalswitches) then
-   { Optimize twice }
-    max := 2
-  else max := 1;
-  for count := 1 to max do
-    begin
+  slowopt := (cs_slowoptimize in aktglobalswitches);
+  pass := 0;
+  changed := false;
+  repeat
+     lastLoop :=
+       not(slowopt) or
+       (not changed and (pass > 2)) or
+      { prevent endless loops }
+       (pass = 4);
+     changed := false;
    { Setup labeltable, always necessary }
      BlockStart := Pai(AsmL^.First);
      BlockEnd := DFAPass1(AsmL, BlockStart);
@@ -59,13 +64,15 @@ Begin
    { or nil                                                                }
      While Assigned(BlockStart) Do
        Begin
+         if pass = 0 then
+           PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Peephole optimizations }
          PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Only perform them twice in the first pass }
-         if count = 1 then
+         if pass = 0 then
            PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
         { Data flow analyzer }
-         If (cs_slowoptimize in aktglobalswitches) Then
+         If (cs_fastoptimize in aktglobalswitches) Then
            Begin
              If DFAPass2(
 {$ifdef statedebug}
@@ -73,10 +80,12 @@ Begin
 {$endif statedebug}
                                BlockStart, BlockEnd) Then
               { common subexpression elimination }
-               CSE(AsmL, BlockStart, BlockEnd);
+               changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
            End;
         { More peephole optimizations }
          PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
+         if lastLoop then
+           PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
         { Dispose labeltabel }
          ShutDownDFA;
         { Continue where we left off, BlockEnd is either the start of an }
@@ -100,15 +109,29 @@ Begin
                BlockEnd := DFAPass1(AsmL, BlockStart)
              { Otherwise, skip the next assembler block }
              Else BlockStart := HP;
-           End
-      End;
-   end;
+           End;
+       End;
+     inc(pass);
+  until lastLoop;
 End;
 
 End.
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:42  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:42  peter
     * moved to i386/
 
   Revision 1.5  2000/09/24 15:06:11  peter

+ 210 - 154
compiler/i386/csopt386.pas

@@ -27,16 +27,21 @@ Unit CSOpt386;
 
 Interface
 
-Uses aasm;
+Uses aasm, cpubase, cpuasm;
 
-{Procedure CSOpt386(First, Last: Pai);}
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(asmL: paasmoutput; first, last: pai; pass: longint): boolean;
+
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
+function RegSizesOK(oldReg,newReg: TRegister; p: paicpu): boolean;
 
 Implementation
 
 Uses
   {$ifdef replaceregdebug}cutils,{$endif}
-  verbose, hcodegen, globals,cpubase,cpuasm,DAOpt386, tgeni386;
+  globtype, verbose, hcodegen, globals, daopt386, tgeni386, rropt386;
 
 {
 Function PaiInSequence(P: Pai; Const Seq: TContent): Boolean;
@@ -83,7 +88,7 @@ begin
               end;
           end
        else
-        if is_reg_var[reg32(p^.oper[1].reg)] then
+{         if is_reg_var[reg32(p^.oper[1].reg)] then }
           for regCounter := R_EAX to R_EDI do
             begin
               if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
@@ -130,9 +135,9 @@ begin
       for opCount := 1 to MaxCh do
         case InsProp[p^.opcode].Ch[opCount] of
           Ch_MOp1,CH_WOp1,CH_RWOp1:
-            if (p^.oper[0].typ = top_ref) or
-               ((p^.oper[0].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[0].reg)]) then
+{             if (p^.oper[0].typ = top_ref) or }
+{                ((p^.oper[0].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[0].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[0],regCounter,c[regCounter]) then
                   begin
@@ -140,9 +145,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
           Ch_MOp2,CH_WOp2,CH_RWOp2:
-            if (p^.oper[1].typ = top_ref) or
-               ((p^.oper[1].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[1].reg)]) then
+{             if (p^.oper[1].typ = top_ref) or }
+{                ((p^.oper[1].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[1].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[1],regCounter,c[regCounter]) then
                   begin
@@ -150,9 +155,9 @@ begin
                     modifiesConflictingMemLocation := not(reg in regsStillValid);
                   end;
           Ch_MOp3,CH_WOp3,CH_RWOp3:
-            if (p^.oper[2].typ = top_ref) or
-               ((p^.oper[2].typ = top_reg) and
-                is_reg_var[reg32(p^.oper[2].reg)]) then
+{             if (p^.oper[2].typ = top_ref) or }
+{                ((p^.oper[2].typ = top_reg) and }
+{                 is_reg_var[reg32(p^.oper[2].reg)]) then }
               for regCounter := R_EAX to R_EDI do
                 if writeDestroysContents(p^.oper[2],regCounter,c[regCounter]) then
                   begin
@@ -290,7 +295,7 @@ end;
  Found holds the number of instructions between StartMod and EndMod and false
  is returned}
 Function CheckSequence(p: Pai; var prev: pai; Reg: TRegister; Var Found: Longint;
-           Var RegInfo: TRegInfo): Boolean;
+           Var RegInfo: TRegInfo; findPrevSeqs: boolean): Boolean;
 
 const
   checkingPrevSequences: boolean = false;
@@ -310,7 +315,8 @@ var
                  in [con_ref,con_noRemoveRef]);
         if currentReg > R_EDI then
           begin
-            if isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
+            if (paicpu(p)^.oper[0].typ <> top_ref) or
+               isSimpleMemLoc(paicpu(p)^.oper[0].ref^) then
               begin
                 checkingPrevSequences := true;
                 passedJump := false;
@@ -321,15 +327,19 @@ var
         else getNextRegToTest := currentReg;
       end;
     if checkingPrevSequences then
-      getNextRegToTest :=
-        getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid);
+      if findPrevSeqs then
+        getNextRegToTest :=
+          getPrevSequence(p,reg,prev,prev,passedJump,regsNotRead,RegsStillValid)
+      else
+        getNextRegToTest := R_NO;
   end;
 
 Var hp2, hp3{, EndMod},highPrev, orgPrev: Pai;
     {Cnt,} OldNrOfMods: Longint;
     startRegInfo, OrgRegInfo, HighRegInfo: TRegInfo;
+    regModified: array[R_NO..R_EDI] of boolean;
     HighFound, OrgRegFound: Byte;
-    RegCounter, regCounter2: TRegister;
+    RegCounter, regCounter2, tmpreg: TRegister;
     OrgRegResult: Boolean;
     TmpResult: Boolean;
     {TmpState: Byte;}
@@ -356,6 +366,7 @@ Begin {CheckSequence}
   regCounter := getNextRegToTest(prev,R_NO);
   While (RegCounter <> R_NO) Do
     Begin
+      fillchar(regModified,sizeof(regModified),0);
       regInfo := startRegInfo;
       Found := 0;
       hp2 := PPaiProp(prev^.OptInfo)^.Regs[RegCounter].StartMod;
@@ -371,13 +382,34 @@ Begin {CheckSequence}
              ((paicpu(hp3)^.opcode = A_MOV) or
               (paicpu(hp3)^.opcode = A_MOVZX) or
               (paicpu(hp3)^.opcode = A_MOVSX)) and
-             (paicpu(hp3)^.oper[0].typ in
-               [top_const,top_ref,top_symbol]) and
              (paicpu(hp3)^.oper[1].typ = top_reg) and
-             not(regInRef(reg32(paicpu(hp3)^.oper[1].reg),
-                   paicpu(hp3)^.oper[0].ref^)) then
-            regInfo.lastReload
-              [reg32(paicpu(hp3)^.oper[1].reg)] := hp3;
+             not(regInOp(paicpu(hp3)^.oper[1].reg,
+                   paicpu(hp3)^.oper[0])) then
+            begin
+              tmpreg := reg32(paicpu(hp3)^.oper[1].reg);
+              regInfo.lastReload[tmpreg] := hp3;
+              case paicpu(hp3)^.oper[0].typ of
+                top_ref:
+                  begin
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.base)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  if regModified[reg32(paicpu(hp3)^.oper[0].ref^.index)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+                  end;
+                top_reg:
+                  if regModified[reg32(paicpu(hp3)^.oper[0].reg)] then
+                    with ppaiprop(hp3^.optinfo)^.regs[tmpreg] do
+                      if nrOfMods > (oldNrOfMods - found) then
+                        oldNrOfMods := found + nrOfMods;
+              end;
+            end;
+          for regCounter2 := R_EAX to R_EDI do
+            regModified[regCounter2] := regModified[regCounter2] or
+              regModifiedByInstruction(regCounter2,hp3);
           GetNextInstruction(hp2, hp2);
           GetNextInstruction(hp3, hp3);
           Inc(Found)
@@ -674,40 +706,6 @@ begin
 end;
 
 
-function FindRegDealloc(reg: tregister; p: pai): boolean;
-{ assumes reg is a 32bit register }
-var
-  hp: pai;
-  first: boolean;
-begin
-  findregdealloc := false;
-  first := true;
-  while assigned(p^.previous) and
-        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
-         ((Pai(p^.previous)^.typ = ait_label) and
-          labelCanBeSkipped(pai_label(p^.previous)))) do
-    begin
-      p := pai(p^.previous);
-      if (p^.typ = ait_regalloc) and
-         (pairegalloc(p)^.reg = reg) then
-        if not(pairegalloc(p)^.allocation) then
-          if first then
-            begin
-              findregdealloc := true;
-              break;
-            end
-          else
-            begin
-              findRegDealloc :=
-                getNextInstruction(p,hp) and
-                 regLoadedWithNewValue(reg,false,hp);
-              break
-            end
-        else
-          first := false;
-    end
-end;
-
 Procedure ClearRegContentsFrom(reg: TRegister; p, endP: pai);
 { first clears the contents of reg from p till endP. Then the contents are }
 { cleared until the first instruction that changes reg                     }
@@ -753,7 +751,7 @@ begin
 {$endif replaceregdebug}
 end;
 
-function NoHardCodedRegs(p: paicpu; orgReg, newReg: tRegister): boolean;
+function NoHardCodedRegs(p: paicpu; orgReg, newReg: TRegister): boolean;
 var chCount: byte;
 begin
   case p^.opcode of
@@ -775,7 +773,7 @@ begin
   end;
 end;
 
-function ChangeReg(var Reg: TRegister; orgReg, newReg: TRegister): boolean;
+function ChangeReg(var Reg: TRegister; newReg, orgReg: TRegister): boolean;
 begin
   changeReg := true;
   if reg = newReg then
@@ -787,15 +785,15 @@ begin
   else changeReg := false;
 end;
 
-function changeOp(var o: toper; orgReg, newReg: tregister): boolean;
+function changeOp(var o: toper; newReg, orgReg: tregister): boolean;
 begin
   case o.typ of
-    top_reg: changeOp := changeReg(o.reg,orgReg,newReg);
+    top_reg: changeOp := changeReg(o.reg,newReg,orgReg);
     top_ref:
       begin
         changeOp :=
-          changeReg(o.ref^.base,orgReg,newReg) or
-          changeReg(o.ref^.index,orgReg,newReg);
+          changeReg(o.ref^.base,newReg,orgReg) or
+          changeReg(o.ref^.index,newReg,orgReg);
       end;
   end;
 end;
@@ -829,14 +827,14 @@ begin
     end;
 end;
 
-function doReplaceReg(orgReg,newReg: tregister; hp: paicpu): boolean;
+function doReplaceReg(hp: paicpu; newReg, orgReg: tregister): boolean;
 var
-  opCount: byte;
+  opCount: longint;
   tmpResult: boolean;
 begin
-  for opCount := 0 to 2 do
+  for opCount := 0 to hp^.ops-1 do
     tmpResult :=
-      changeOp(hp^.oper[opCount],orgReg,newReg) or tmpResult;
+      changeOp(hp^.oper[opCount],newReg,orgReg) or tmpResult;
   doReplaceReg := tmpResult;
 end;
 
@@ -858,7 +856,7 @@ begin
     end;
 end;
 
-function doReplaceReadReg(orgReg,newReg: tregister; p: paicpu): boolean;
+function doReplaceReadReg(p: paicpu; newReg,orgReg: tregister): boolean;
 var opCount: byte;
 begin
   doReplaceReadReg := false;
@@ -870,13 +868,13 @@ begin
           1: internalerror(1301001);
           2,3:
             begin
-              if changeOp(p^.oper[0],orgReg,newReg) then
+              if changeOp(p^.oper[0],newReg,orgReg) then
                 begin
 {                  updateStates(orgReg,newReg,p,false);}
                   doReplaceReadReg := true;
                 end;
              if p^.ops = 3 then
-                if changeOp(p^.oper[1],orgReg,newReg) then
+                if changeOp(p^.oper[1],newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
@@ -889,7 +887,7 @@ begin
       begin
         for opCount := 0 to 2 do
           if p^.oper[opCount].typ = top_ref then
-            if changeOp(p^.oper[opCount],orgReg,newReg) then
+            if changeOp(p^.oper[opCount],newReg,orgReg) then
               begin
 {                updateStates(orgReg,newReg,p,false);}
                 doReplaceReadReg := true;
@@ -898,21 +896,21 @@ begin
           case InsProp[p^.opcode].Ch[opCount] of
             Ch_ROp1:
               if p^.oper[0].typ = top_reg then
-                if changeReg(p^.oper[0].reg,orgReg,newReg) then
+                if changeReg(p^.oper[0].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                   end;
             Ch_ROp2:
               if p^.oper[1].typ = top_reg then
-                if changeReg(p^.oper[1].reg,orgReg,newReg) then
+                if changeReg(p^.oper[1].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
                   end;
             Ch_ROp3:
               if p^.oper[2].typ = top_reg then
-                if changeReg(p^.oper[2].reg,orgReg,newReg) then
+                if changeReg(p^.oper[2].reg,newReg,orgReg) then
                   begin
 {                    updateStates(orgReg,newReg,p,false);}
                     doReplaceReadReg := true;
@@ -998,6 +996,20 @@ begin
 end;
 
 
+function storeBack(p1: pai; orgReg, newReg: tregister): boolean;
+{ returns true if p1 contains an instruction that stores the contents }
+{ of newReg back to orgReg                                            }
+begin
+  storeBack :=
+    (p1^.typ = ait_instruction) and
+    (paicpu(p1)^.opcode = A_MOV) and
+    (paicpu(p1)^.oper[0].typ = top_reg) and
+    (paicpu(p1)^.oper[0].reg = newReg) and
+    (paicpu(p1)^.oper[1].typ = top_reg) and
+    (paicpu(p1)^.oper[1].reg = orgReg);
+end;
+
+
 function ReplaceReg(asmL: PaasmOutput; orgReg, newReg: TRegister; p: pai;
            const c: TContent; orgRegCanBeModified: Boolean;
            var returnEndP: pai): Boolean;
@@ -1012,18 +1024,6 @@ var endP, hp: Pai;
     removeLast, sequenceEnd, tmpResult, newRegModified, orgRegRead,
       stateChanged, readStateChanged: Boolean;
 
-  function storeBack(p1: pai): boolean;
-  { returns true if p1 contains an instruction that stores the contents }
-  { of newReg back to orgReg                                            }
-  begin
-    storeBack :=
-      (p1^.typ = ait_instruction) and
-      (paicpu(p1)^.opcode = A_MOV) and
-      (paicpu(p1)^.oper[0].typ = top_reg) and
-      (paicpu(p1)^.oper[0].reg = newReg) and
-      (paicpu(p1)^.oper[1].typ = top_reg) and
-      (paicpu(p1)^.oper[1].reg = orgReg);
-  end;
 
 begin
   ReplaceReg := false;
@@ -1055,7 +1055,7 @@ begin
           { if the newReg gets stored back to the oldReg, we can change }
           { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
           { %oldReg" to "<operations on %oldReg>"                       }
-          removeLast := storeBack(endP);
+          removeLast := storeBack(endP, orgReg, newReg);
           sequenceEnd :=
             { no support for (i)div, mul and imul with hardcoded operands }
             (noHardCodedRegs(paicpu(endP),orgReg,newReg) and
@@ -1144,14 +1144,14 @@ begin
           if {not(PPaiProp(hp^.optInfo)^.canBeRemoved) and }
              (hp^.typ = ait_instruction) then
             stateChanged :=
-              doReplaceReg(orgReg,newReg,paicpu(hp)) or stateChanged;
+              doReplaceReg(paicpu(hp),newReg,orgReg) or stateChanged;
             if stateChanged then
               updateStates(orgReg,newReg,hp,true);
           getNextInstruction(hp,hp)
         end;
       if assigned(endp) and (endp^.typ = ait_instruction) then
         readStateChanged :=
-          DoReplaceReadReg(orgReg,newReg,paicpu(endP));
+          DoReplaceReadReg(paicpu(endP),newReg,orgReg);
       if stateChanged or readStateChanged then
         updateStates(orgReg,newReg,endP,stateChanged);
 
@@ -1271,11 +1271,11 @@ begin
 end;
 
 
-Procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai);
+procedure DoCSE(AsmL: PAasmOutput; First, Last: Pai; findPrevSeqs, doSubOpts: boolean);
 {marks the instructions that can be removed by RemoveInstructs. They're not
  removed immediately because sometimes an instruction needs to be checked in
  two different sequences}
-var cnt, cnt2, cnt3: longint;
+var cnt, cnt2, orgNrOfMods: longint;
     p, hp1, hp2, prevSeq, prevSeq_next: Pai;
     hp3, hp4: pai;
     hp5 : pai;
@@ -1284,7 +1284,6 @@ var cnt, cnt2, cnt3: longint;
 Begin
   p := First;
   SkipHead(p);
-  First := p;
   While (p <> Last) Do
     Begin
       Case p^.typ Of
@@ -1302,11 +1301,19 @@ Begin
                        PPaiProp(Pai(p)^.OptInfo)^.CanBeRemoved := True;
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
+                  hp2 := p;
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
+                    top_ref, top_reg:
+                     if (paicpu(p)^.oper[1].typ = top_reg) then
+                       Begin
                         With PPaiProp(p^.OptInfo)^.Regs[Reg32(Paicpu(p)^.oper[1].reg)] Do
                           Begin
+                            if assigned(startmod) and
+                               (startmod = p)then
+                              orgNrOfMods := ppaiprop(startmod^.optinfo)^.
+                                regs[reg32(paicpu(p)^.oper[1].reg)].nrOfMods
+                            else
+                              orgNrOfMods := 0;
                             If (p = StartMod) And
                                GetLastInstruction (p, hp1) And
                                (hp1^.typ <> ait_marker) Then
@@ -1317,7 +1324,7 @@ Begin
                                  'cse checking '+att_reg2str[Reg32(Paicpu(p)^.oper[1].reg)])));
                                insertLLItem(asml,p,p^.next,hp5);
 {$endif csdebug}
-                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo) And
+                               If CheckSequence(p,prevSeq,Paicpu(p)^.oper[1].reg, Cnt, RegInfo, findPrevSeqs) And
                                   (Cnt > 0) Then
                                  Begin
                                    hp1 := nil;
@@ -1336,19 +1343,17 @@ Begin
 {   movl 16(%ebp), %eax                                                     }
 {   movl 8(%edx), %edx                                                      }
 {   movl 4(%eax), eax                                                       }
-                                   hp2 := p;
                                    Cnt2 := 1;
                                    While Cnt2 <= Cnt Do
                                      Begin
-                                       If Not(RegInInstruction(Paicpu(hp2)^.oper[1].reg, p)) then
+(*                                       If not(regInInstruction(Paicpu(hp2)^.oper[1].reg, p)) and
+                                          not(ppaiprop(p^.optinfo)^.canBeRemoved) then
                                          begin
-                                           if ((p^.typ = ait_instruction) And
-                                               ((paicpu(p)^.OpCode = A_MOV)  or
-                                                (paicpu(p)^.opcode = A_MOVZX) or
-                                                (paicpu(p)^.opcode = A_MOVSX)) And
-                                               (paicpu(p)^.Oper[0].typ in
-                                                 [top_const,top_ref,top_symbol])) and
-                                               (paicpu(p)^.oper[1].typ = top_reg) then
+                                           if (p^.typ = ait_instruction) And
+                                              ((paicpu(p)^.OpCode = A_MOV)  or
+                                               (paicpu(p)^.opcode = A_MOVZX) or
+                                               (paicpu(p)^.opcode = A_MOVSX)) And
+                                              (paicpu(p)^.oper[1].typ = top_reg) then
                                              begin
                                                regCounter := reg32(paicpu(p)^.oper[1].reg);
                                                if (regCounter in reginfo.regsStillUsedAfterSeq) then
@@ -1369,9 +1374,9 @@ Begin
                                                  end
 {$endif noremove}
                                              end
-                                         end
+                                         end *)
 {$ifndef noremove}
-                                       else
+{                                       else }
                                          PPaiProp(p^.OptInfo)^.CanBeRemoved := True
 {$endif noremove}
                                        ; Inc(Cnt2);
@@ -1397,15 +1402,18 @@ Begin
                                         (RegInfo.New2OldReg[RegCounter] <> R_NO) Then
                                        Begin
                                          AllocRegBetween(AsmL,RegInfo.New2OldReg[RegCounter],
-                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,prevSeq_next);
+                                           PPaiProp(prevSeq^.OptInfo)^.Regs[RegInfo.New2OldReg[RegCounter]].StartMod,hp2);
                                          if hp4 <> prevSeq then
                                            begin
                                              if assigned(reginfo.lastReload[regCounter]) then
                                                getLastInstruction(reginfo.lastReload[regCounter],hp3)
+                                             else if assigned(reginfo.lastReload[regInfo.New2OldReg[regCounter]]) then
+                                               getLastInstruction(reginfo.lastReload[regInfo.new2OldReg[regCounter]],hp3)
                                              else hp3 := hp4;
-                                             if prevSeq <> hp3 then
+                                             if prevSeq_next <> hp3 then
                                                clearRegContentsFrom(regCounter,prevSeq_next,
                                                  hp3);
+                                             getnextInstruction(hp3,hp3);
                                              allocRegBetween(asmL,regCounter,prevSeq,hp3);
                                            end;
                                          If Not(RegCounter In RegInfo.RegsLoadedForRef) And
@@ -1421,20 +1429,23 @@ Begin
                                                begin
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoEnd));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
-                                                 hp3 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
+                                                 hp5 := New(Paicpu,Op_Reg_Reg(A_MOV, S_L,
                                                                          {old reg          new reg}
                                                        RegInfo.New2OldReg[RegCounter], RegCounter));
-                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
+                                                 new(ppaiprop(hp5^.optinfo));
+                                                 ppaiprop(hp5^.optinfo)^ := ppaiprop(prevSeq_next^.optinfo)^;
+                                                 ppaiprop(hp5^.optinfo)^.canBeRemoved := false;
+                                                 InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp5);
                                                  hp3 := New(Pai_Marker,Init(NoPropInfoStart));
                                                  InsertLLItem(AsmL, prevSeq, Pai(prevSeq^.next), hp3);
                                                  { adjusts states in previous instruction so that it will  }
                                                  { definitely be different from the previous or next state }
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[RegInfo.New2OldReg[RegCounter]].rstate,20);
-                                                 incstate(ppaiprop(prevSeq_next^.optinfo)^.
+                                                 incstate(ppaiprop(hp5^.optinfo)^.
                                                    regs[regCounter].wstate,20);
                                                  updateState(RegInfo.New2OldReg[RegCounter],
-                                                   prevSeq_next);
+                                                   hp5);
                                                end
                                            End
                                          Else
@@ -1479,7 +1490,7 @@ Begin
                                     (PPaiProp(p^.OptInfo)^.CanBeRemoved) Then
                                    if (cnt > 0) then
                                      begin
-                                       hp2 := p;
+                                       p := hp2;
                                        Cnt2 := 1;
                                        While Cnt2 <= Cnt Do
                                          Begin
@@ -1509,38 +1520,38 @@ Begin
                                      end;
                               End;
                           End;
-                        if not ppaiprop(p^.optinfo)^.canBeRemoved and
-                           not regInRef(reg32(paicpu(p)^.oper[1].reg),
-                                        paicpu(p)^.oper[0].ref^) then
-                          removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-                      End;
-                    top_Reg:
                       { try to replace the new reg with the old reg }
-                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) and
-                         { only remove if we're not storing something in a regvar }
-                         (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
-                         (paicpu(p)^.opcode = A_MOV) and
-                         getLastInstruction(p,hp4) then
-                        begin
-                          case paicpu(p)^.oper[1].typ of
-                            top_Reg:
-                              { we only have to start replacing from the instruction after the mov, }
-                              { but replacereg only starts with getnextinstruction(p,p)             }
-                              if ReplaceReg(asmL,paicpu(p)^.oper[0].reg,
-                                   paicpu(p)^.oper[1].reg,p,
-                                   PPaiProp(hp4^.optInfo)^.Regs[paicpu(p)^.oper[1].reg],false,hp1) then
-                                begin
-                                    PPaiProp(p^.optInfo)^.canBeRemoved := true;
-                                    allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
-                                    PPaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,
-                                    hp1);
-                                end
-                              else
-                                if reg32(paicpu(p)^.oper[0].reg) <> reg32(paicpu(p)^.oper[1].reg) then
-                                  removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
-
+                      if not(PPaiProp(p^.optInfo)^.canBeRemoved) then
+                        if (paicpu(p)^.oper[0].typ = top_reg) and
+                           (paicpu(p)^.oper[1].typ = top_reg) and
+                           { only remove if we're not storing something in a regvar }
+                           (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) and
+                           (paicpu(p)^.opcode = A_MOV) and
+                           getLastInstruction(p,hp4) and
+                          { we only have to start replacing from the instruction after the mov, }
+                          { but replacereg only starts with getnextinstruction(p,p)             }
+                            replaceReg(asmL,paicpu(p)^.oper[0].reg,
+                              paicpu(p)^.oper[1].reg,p,
+                              ppaiprop(hp4^.optInfo)^.regs[paicpu(p)^.oper[1].reg],false,hp1) then
+                          begin
+                            ppaiprop(p^.optInfo)^.canBeRemoved := true;
+                            allocRegBetween(asmL,paicpu(p)^.oper[0].reg,
+                              ppaiProp(p^.optInfo)^.regs[paicpu(p)^.oper[0].reg].startMod,hp1);
                           end
-                        end;
+                        else
+                          if (paicpu(p)^.oper[1].typ = top_reg) and
+                             not regInOp(paicpu(p)^.oper[1].reg,paicpu(p)^.oper[0]) then
+                           removePrevNotUsedLoad(p,reg32(paicpu(p)^.oper[1].reg),false);
+                        { at first, only try optimizations of large blocks, because doing }
+                        { doing smaller ones may prevent bigger ones from completing in   }
+                        { in the next pass                                                }
+                        if not doSubOpts and (orgNrOfMods <> 0) then
+                          begin
+                            p := hp2;
+                            for cnt := 1 to pred(orgNrOfMods) do
+                              getNextInstruction(p,p);
+                          end;
+                      End;
                     top_symbol,Top_Const:
                       Begin
                         Case Paicpu(p)^.oper[1].typ Of
@@ -1584,21 +1595,48 @@ Begin
     End;
 End;
 
-Procedure RemoveInstructs(AsmL: PAasmOutput; First, Last: Pai);
+function removeInstructs(asmL: paasmoutput; first, last: pai): boolean;
 { Removes the marked instructions and disposes the PPaiProps of the other }
 { instructions                                                            }
 Var p, hp1: Pai;
 begin
+  removeInstructs := false;
   p := First;
   While (p <> Last) Do
     Begin
       If (p^.typ = ait_marker) and
-         (pai_marker(p)^.kind in [noPropInfoStart,noPropInfoEnd]) then
+         (pai_marker(p)^.kind = noPropInfoStart) then
         begin
           hp1 := pai(p^.next);
           asmL^.remove(p);
           dispose(p,done);
-          p := hp1
+          while not((hp1^.typ = ait_marker) and
+                    (pai_marker(p)^.kind = noPropInfoEnd)) do
+            begin
+              p := pai(hp1^.next);
+{$ifndef noinstremove}
+              { allocregbetween can insert new ait_regalloc objects }
+              { without optinfo                                     }
+              if assigned(hp1^.optinfo) then
+                if ppaiprop(hp1^.optinfo)^.canBeRemoved then
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                    asmL^.remove(hp1);
+                    dispose(hp1,done);
+                    hp1 := p;
+                  end
+                else
+{$endif noinstremove}
+                  begin
+                    dispose(ppaiprop(hp1^.optinfo));
+                    hp1^.optinfo := nil;
+                  end;
+              hp1 := p;
+            end;
+          p := pai(hp1^.next);
+          asmL^.remove(hp1);
+          dispose(hp1,done);
         end
       else
 {$ifndef noinstremove}
@@ -1609,6 +1647,7 @@ begin
             AsmL^.Remove(p);
             Dispose(p, Done);
             p := hp1;
+            removeInstructs := true;
           End
         Else
 {$endif noinstremove}
@@ -1617,20 +1656,37 @@ begin
             p := pai(p^.next);;
           End;
     End;
-    FreeMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4))
+    FreeMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp))
 End;
 
-Procedure CSE(AsmL: PAasmOutput; First, Last: Pai);
+function CSE(AsmL: PAasmOutput; First, Last: Pai; pass: longint): boolean;
 Begin
-  DoCSE(AsmL, First, Last);
-  RemoveInstructs(AsmL, First, Last);
+  DoCSE(AsmL, First, Last, not(cs_slowoptimize in aktglobalswitches) or (pass >= 2),
+        not(cs_slowoptimize in aktglobalswitches) or (pass >= 1));
+ { register renaming }
+  if not(cs_slowoptimize in aktglobalswitches) or (pass > 0) then
+    doRenaming(asmL, first, last);
+  cse := removeInstructs(asmL, first, last);
 End;
 
 End.
 
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
 
   Revision 1.14  2000/09/30 13:07:23  jonas

+ 113 - 75
compiler/i386/daopt386.pas

@@ -183,6 +183,7 @@ Function regLoadedWithNewValue(reg: tregister; canDependOnPrevValue: boolean;
            hp: pai): boolean;
 Procedure UpdateUsedRegs(Var UsedRegs: TRegSet; p: Pai);
 Procedure AllocRegBetween(AsmL: PAasmOutput; Reg: TRegister; p1, p2: Pai);
+function FindRegDealloc(reg: tregister; p: pai): boolean;
 
 Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
 Function InstructionsEquivalent(p1, p2: Pai; Var RegInfo: TRegInfo): Boolean;
@@ -720,15 +721,14 @@ Begin
               End
             Else Regsequivalent := False
         Else
-          If Not(Reg32(NewReg) in NewRegsEncountered) and
-             ((OpAct = OpAct_Write) or
-              ((newReg = oldReg) and
-               not(newReg in usableregs + [R_EDI]))) Then
-            Begin
-              AddReg2RegInfo(OldReg, NewReg, RegInfo);
-              RegsEquivalent := True
-            End
-          Else RegsEquivalent := False
+           If Not(Reg32(NewReg) in NewRegsEncountered) and
+              ((OpAct = OpAct_Write) or
+               (newReg = oldReg)) Then
+             Begin
+               AddReg2RegInfo(OldReg, NewReg, RegInfo);
+               RegsEquivalent := True
+             End
+           Else RegsEquivalent := False 
     Else RegsEquivalent := False
   Else RegsEquivalent := OldReg = NewReg
 End;
@@ -973,7 +973,7 @@ Begin
            ((current^.typ = ait_label) and
             labelCanBeSkipped(pai_label(current)))) do
       Current := Pai(Current^.Next);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoStart) Then
       Begin
@@ -981,10 +981,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoEnd)) Do
           Current := Pai(Current^.Next);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoEnd);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   Next := Current;
   If Assigned(Current) And
      Not((Current^.typ In SkipInstr) or
@@ -1009,12 +1009,12 @@ Begin
     Current := Pai(Current^.previous);
     While Assigned(Current) And
           (((Current^.typ = ait_Marker) And
-            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd,NoPropInfoEnd])) or
+            Not(Pai_Marker(Current)^.Kind in [AsmBlockEnd{,NoPropInfoEnd}])) or
            (Current^.typ In SkipInstr) or
            ((Current^.typ = ait_label) And
             labelCanBeSkipped(pai_label(current)))) Do
       Current := Pai(Current^.previous);
-    If Assigned(Current) And
+{    If Assigned(Current) And
        (Current^.typ = ait_Marker) And
        (Pai_Marker(Current)^.Kind = NoPropInfoEnd) Then
       Begin
@@ -1022,10 +1022,10 @@ Begin
               ((Current^.typ <> ait_Marker) Or
                (Pai_Marker(Current)^.Kind <> NoPropInfoStart)) Do
           Current := Pai(Current^.previous);
-      End;
+      End;}
   Until Not(Assigned(Current)) Or
         (Current^.typ <> ait_Marker) Or
-        (Pai_Marker(Current)^.Kind <> NoPropInfoStart);
+        not(Pai_Marker(Current)^.Kind in [NoPropInfoStart,NoPropInfoEnd]);
   If Not(Assigned(Current)) or
      (Current^.typ In SkipInstr) or
      ((Current^.typ = ait_label) And
@@ -1177,6 +1177,41 @@ Begin
    end;
 End;
 
+function FindRegDealloc(reg: tregister; p: pai): boolean;
+{ assumes reg is a 32bit register }
+var
+  hp: pai;
+  first: boolean;
+begin
+  findregdealloc := false;
+  first := true;
+  while assigned(p^.previous) and
+        ((Pai(p^.previous)^.typ in (skipinstr+[ait_align])) or
+         ((Pai(p^.previous)^.typ = ait_label) and
+          labelCanBeSkipped(pai_label(p^.previous)))) do
+    begin
+      p := pai(p^.previous);
+      if (p^.typ = ait_regalloc) and
+         (pairegalloc(p)^.reg = reg) then
+        if not(pairegalloc(p)^.allocation) then
+          if first then
+            begin
+              findregdealloc := true;
+              break;
+            end
+          else
+            begin
+              findRegDealloc :=
+                getNextInstruction(p,hp) and
+                 regLoadedWithNewValue(reg,false,hp);
+              break
+            end
+        else
+          first := false;
+    end
+end;
+
+
 
 Procedure IncState(Var S: Byte; amount: longint);
 {Increases S by 1, wraps around at $ffff to 0 (so we won't get overflow
@@ -1233,7 +1268,7 @@ Begin
   sequenceDependsonReg := TmpResult
 End;
 
-procedure invalidateDepedingRegs(p1: ppaiProp; reg: tregister);
+procedure invalidateDependingRegs(p1: ppaiProp; reg: tregister);
 var
   counter: tregister;
 begin
@@ -1277,7 +1312,7 @@ Begin
             { con_invalid and con_noRemoveRef = con_unknown }
             else typ := con_unknown;
         end;
-      invalidateDepedingRegs(p1,reg);
+      invalidateDependingRegs(p1,reg);
     end;
 End;
 
@@ -1644,7 +1679,7 @@ function writeToRegDestroysContents(destReg: tregister; reg: tregister;
 { modified                                                           }
 begin
   writeToRegDestroysContents :=
-    (c.typ <> con_unknown) and
+    (c.typ in [con_ref,con_noRemoveRef,con_invalid]) and
     sequenceDependsOnReg(c,reg,reg32(destReg));
 end;
 
@@ -2033,73 +2068,63 @@ Begin
               A_MOV, A_MOVZX, A_MOVSX:
                 Begin
                   Case Paicpu(p)^.oper[0].typ Of
-                    Top_Reg:
-                      Case Paicpu(p)^.oper[1].typ Of
-                        Top_Reg:
+                    top_ref, top_reg:
+                      case paicpu(p)^.oper[1].typ Of
+                        top_reg:
                           Begin
 {$ifdef statedebug}
                             hp := new(pai_asm_comment,init(strpnew('destroying '+
                               att_reg2str[Paicpu(p)^.oper[1].reg])));
                             insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
-                            DestroyReg(CurProp, Paicpu(p)^.oper[1].reg, true);
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-{                            CurProp^.Regs[Paicpu(p)^.oper[1].reg] :=
-                              CurProp^.Regs[Paicpu(p)^.oper[0].reg];
-                            If (CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg = R_NO) Then
-                              CurProp^.Regs[Paicpu(p)^.oper[1].reg].ModReg :=
-                                Paicpu(p)^.oper[0].reg;}
-                          End;
-                        Top_Ref:
-                          Begin
-                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
-                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
-                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
-                          End;
-                      End;
-                    Top_Ref:
-                      Begin {destination is always a register in this case}
-                        ReadRef(CurProp, Paicpu(p)^.oper[0].ref);
-                        TmpReg := Reg32(Paicpu(p)^.oper[1].reg);
-                        If RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^) And
-                           (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef])
-                          Then
-                            Begin
-                              With CurProp^.Regs[TmpReg] Do
-                                Begin
-                                  incState(wstate,1);
- {also store how many instructions are part of the sequence in the first
-  instructions PPaiProp, so it can be easily accessed from within
-  CheckSequence}
-                                  Inc(NrOfMods, NrOfInstrSinceLastMod[TmpReg]);
-                                  PPaiProp(Pai(StartMod)^.OptInfo)^.Regs[TmpReg].NrOfMods := NrOfMods;
-                                  NrOfInstrSinceLastMod[TmpReg] := 0;
-                                  { Destroy the contents of the registers  }
-                                  { that depended on the previous value of }
-                                  { this register                          }
-                                  invalidateDepedingRegs(curProp,tmpReg);
-                                End;
-                            End
-                          Else
-                            Begin
+
+                            readOp(curprop, paicpu(p)^.oper[0]);
+                            tmpreg := reg32(paicpu(p)^.oper[1].reg);
+                            if regInOp(tmpreg, paicpu(p)^.oper[0]) and
+                               (curProp^.regs[tmpReg].typ in [con_ref,con_noRemoveRef]) then
+                              begin
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    incState(wstate,1);
+ { also store how many instructions are part of the sequence in the first }
+ { instruction's PPaiProp, so it can be easily accessed from within       }
+ { CheckSequence                                                          }
+                                    inc(nrOfMods, nrOfInstrSinceLastMod[tmpreg]);
+                                    ppaiprop(startmod^.optinfo)^.regs[tmpreg].nrOfMods := nrOfMods;
+                                    nrOfInstrSinceLastMod[tmpreg] := 0;
+                                   { Destroy the contents of the registers  }
+                                   { that depended on the previous value of }
+                                   { this register                          }
+                                    invalidateDependingRegs(curprop,tmpreg);
+                                end;
+                            end
+                          else
+                            begin
 {$ifdef statedebug}
                               hp := new(pai_asm_comment,init(strpnew('destroying & initing '+att_reg2str[tmpreg])));
                               insertllitem(asml,p,p^.next,hp);
 {$endif statedebug}
-                              DestroyReg(CurProp, TmpReg, true);
-                              If Not(RegInRef(TmpReg, Paicpu(p)^.oper[0].ref^)) Then
-                                With CurProp^.Regs[TmpReg] Do
-                                  Begin
-                                    Typ := Con_Ref;
-                                    StartMod := p;
-                                    NrOfMods := 1;
-                                  End
-                            End;
+                              destroyReg(curprop, tmpreg, true);
+                              if not(reginop(tmpreg, paicpu(p)^.oper[0])) then
+                                with curprop^.regs[tmpreg] Do
+                                  begin
+                                    typ := con_ref;
+                                    startmod := p;
+                                    nrOfMods := 1;
+                                  end
+                            end;
 {$ifdef StateDebug}
                   hp := new(pai_asm_comment,init(strpnew(att_reg2str[TmpReg]+': '+tostr(CurProp^.Regs[TmpReg].WState))));
                   InsertLLItem(AsmL, p, p^.next, hp);
 {$endif StateDebug}
-
+                          End;
+                        Top_Ref:
+                          { can only be if oper[0] = top_reg }
+                          Begin
+                            ReadReg(CurProp, Paicpu(p)^.oper[0].reg);
+                            ReadRef(CurProp, Paicpu(p)^.oper[1].ref);
+                            DestroyRefs(p, Paicpu(p)^.oper[1].ref^, Paicpu(p)^.oper[0].reg);
+                          End;
                       End;
                     top_symbol,Top_Const:
                       Begin
@@ -2317,12 +2342,12 @@ Begin
       GetNextInstruction(p, p);
     End;
 {Uncomment the next line to see how much memory the reloading optimizer needs}
-{  Writeln((NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4)));}
+{  Writeln(NrOfPaiObjs*SizeOf(TPaiProp));}
 {no need to check mem/maxavail, we've got as much virtual memory as we want}
   If NrOfPaiObjs <> 0 Then
     Begin
       InitDFAPass2 := True;
-      GetMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4));
+      GetMem(PaiPropBlock, NrOfPaiObjs*SizeOf(TPaiProp));
       p := BlockStart;
       SkipHead(p);
       For Count := 1 To NrOfPaiObjs Do
@@ -2362,7 +2387,20 @@ End.
 
 {
   $Log$
-  Revision 1.2  2000-10-19 15:59:40  jonas
+  Revision 1.3  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.2  2000/10/19 15:59:40  jonas
     * fixed bug in allocregbetween (the register wasn't added to the
       usedregs set of the last instruction of the chain) ("merged")
 

+ 456 - 390
compiler/i386/popt386.pas

@@ -28,14 +28,16 @@ Interface
 
 Uses Aasm;
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
 Implementation
 
 Uses
   globtype,systems,
-  globals,verbose,hcodegen,
+  globals,hcodegen,
 {$ifdef finaldestdebug}
   cobjects,
 {$endif finaldestdebug}
@@ -97,6 +99,308 @@ begin
     end;
 end;
 
+Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1: pai;
+  l: longint;
+  tmpRef: treference;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_IMUL:
+                {changes certain "imul const, %reg"'s to lea sequences}
+                Begin
+                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
+                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
+                     (Paicpu(p)^.opsize = S_L) Then
+                    If (Paicpu(p)^.oper[0].val = 1) Then
+                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                       {remove "imul $1, reg"}
+                        Begin
+                          hp1 := Pai(p^.Next);
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          Continue;
+                        End
+                      Else
+                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
+                        Begin
+                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
+                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
+                          Dispose(p, Done);
+                          p := hp1;
+                        End
+                    Else If
+                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
+                      (Paicpu(p)^.oper[2].typ = Top_None)) And
+                     (aktoptprocessor < ClassP6) And
+                     (Paicpu(p)^.oper[0].val <= 12) And
+                     Not(CS_LittleSize in aktglobalswitches) And
+                     (Not(GetNextInstruction(p, hp1)) Or
+                       {GetNextInstruction(p, hp1) And}
+                       Not((Pai(hp1)^.typ = ait_instruction) And
+                           ((paicpu(hp1)^.opcode=A_Jcc) and
+                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
+                    Then
+                      Begin
+                        Reset_reference(tmpref);
+                        Case Paicpu(p)^.oper[0].val Of
+                          3: Begin
+                             {imul 3, reg1, reg2 to
+                                lea (reg1,reg1,2), reg2
+                              imul 3, reg1 to
+                                lea (reg1,reg1,2), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 2;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                            End;
+                         5: Begin
+                            {imul 5, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                             imul 5, reg1 to
+                               lea (reg1,reg1,4), reg1}
+                              TmpRef.base := Paicpu(p)^.oper[1].reg;
+                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                              TmpRef.ScaleFactor := 4;
+                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                              Else
+                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                              Dispose(p, Done);
+                              p := hp1;
+                            End;
+                         6: Begin
+                            {imul 6, reg1, reg2 to
+                               lea (,reg1,2), reg2
+                               lea (reg2,reg1,4), reg2
+                             imul 6, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               add reg1, reg1}
+                              If (aktoptprocessor <= Class386)
+                                Then
+                                  Begin
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                          TmpRef.ScaleFactor := 4;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p, p^.next, hp1);
+                                    Reset_reference(tmpref);
+                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                    TmpRef.ScaleFactor := 2;
+                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                      Then
+                                        Begin
+                                          TmpRef.base := R_NO;
+                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
+                                            Paicpu(p)^.oper[2].reg));
+                                        End
+                                      Else
+                                        Begin
+                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                        End;
+                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                    Dispose(p, Done);
+                                    p := Pai(hp1^.next);
+                                  End
+                            End;
+                          9: Begin
+                             {imul 9, reg1, reg2 to
+                                lea (reg1,reg1,8), reg2
+                              imul 9, reg1 to
+                                lea (reg1,reg1,8), reg1}
+                               TmpRef.base := Paicpu(p)^.oper[1].reg;
+                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                               TmpRef.ScaleFactor := 8;
+                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
+                               Else
+                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                               Dispose(p, Done);
+                               p := hp1;
+                             End;
+                         10: Begin
+                            {imul 10, reg1, reg2 to
+                               lea (reg1,reg1,4), reg2
+                               add reg2, reg2
+                             imul 10, reg1 to
+                               lea (reg1,reg1,4), reg1
+                               add reg1, reg1}
+                               If (aktoptprocessor <= Class386) Then
+                                 Begin
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
+                                   Else
+                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
+                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p, p^.next, hp1);
+                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                   TmpRef.ScaleFactor := 4;
+                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
+                                     Then
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
+                                     Else
+                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                   Dispose(p, Done);
+                                   p := Pai(hp1^.next);
+                                 End
+                             End;
+                         12: Begin
+                            {imul 12, reg1, reg2 to
+                               lea (,reg1,4), reg2
+                               lea (,reg1,8) reg2
+                             imul 12, reg1 to
+                               lea (reg1,reg1,2), reg1
+                               lea (,reg1,4), reg1}
+                               If (aktoptprocessor <= Class386)
+                                 Then
+                                   Begin
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
+                                         TmpRef.ScaleFactor := 8;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p, p^.next, hp1);
+                                     Reset_reference(tmpref);
+                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
+                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
+                                       Begin
+                                         TmpRef.base := R_NO;
+                                         TmpRef.ScaleFactor := 4;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
+                                       End
+                                     Else
+                                       Begin
+                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
+                                         TmpRef.ScaleFactor := 2;
+                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
+                                       End;
+                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
+                                     Dispose(p, Done);
+                                     p := Pai(hp1^.next);
+                                   End
+                             End
+                        End;
+                      End;
+                End;
+              A_SAR, A_SHR:
+                  {changes the code sequence
+                   shr/sar const1, x
+                   shl     const2, x
+                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
+                Begin
+                  If GetNextInstruction(p, hp1) And
+                     (pai(hp1)^.typ = ait_instruction) and
+                     (Paicpu(hp1)^.opcode = A_SHL) and
+                     (Paicpu(p)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.oper[0].typ = top_const) and
+                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
+                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
+                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
+                    Then
+                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
+                         Not(CS_LittleSize In aktglobalswitches)
+                        Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 > const2 }
+                          Begin
+                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
+                            Paicpu(hp1)^.opcode := A_AND;
+                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
+                            Case Paicpu(p)^.opsize Of
+                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
+                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
+                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
+                            End;
+                          End
+                        Else
+                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
+                             Not(CS_LittleSize In aktglobalswitches)
+                            Then
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 < const2 }
+                              Begin
+                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
+                                Paicpu(p)^.opcode := A_AND;
+                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                Case Paicpu(p)^.opsize Of
+                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                End;
+                              End
+                            Else
+                   { shr/sar const1, %reg
+                     shl     const2, %reg
+                      with const1 = const2 }
+                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
+                                Begin
+                                  Paicpu(p)^.opcode := A_AND;
+                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
+                                  Case Paicpu(p)^.opsize Of
+                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
+                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
+                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
+                                  End;
+                                  AsmL^.remove(hp1);
+                                  dispose(hp1, done);
+                                End;
+                End;
+              A_XOR:
+                If (Paicpu(p)^.oper[0].typ = top_reg) And
+                   (Paicpu(p)^.oper[1].typ = top_reg) And
+                   (Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
+                 { temporarily change this to 'mov reg,0' to make it easier }
+                 { for the CSE. Will be changed back in pass 2              }
+                  begin
+                    paicpu(p)^.opcode := A_MOV;
+                    paicpu(p)^.loadconst(0,0);
+                  end;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
 
 Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
 {First pass of peepholeoptimizations}
@@ -279,9 +583,7 @@ Begin
                If (paicpu(p)^.opcode = A_JMP) Then
                  Begin
                    While GetNextInstruction(p, hp1) and
-                         ((hp1^.typ <> ait_label) or
-                   { skip unused labels, they're not referenced anywhere }
-                          labelCanBeSkipped(pai_label(hp1))) Do
+                         (hp1^.typ <> ait_label) do
                      If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
                        Begin
                          AsmL^.Remove(hp1);
@@ -289,6 +591,7 @@ Begin
                        End
                      else break;
                   End;
+               { remove jumps to a label coming right after them }
                If GetNextInstruction(p, hp1) then
                  Begin
                    if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
@@ -481,259 +784,53 @@ Begin
                       { change                      to
                           fld/fst   mem1  (hp1)       fld/fst   mem1
                           fld       mem1  (p)         fadd/
-                          faddp/                       fmul     st, st
-                           fmulp  st, st1 (hp2) }
-                        Begin
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
-                            Paicpu(hp2)^.opcode := A_FADD
-                          Else
-                            Paicpu(hp2)^.opcode := A_FMUL;
-                          Paicpu(hp2)^.oper[1].reg := R_ST;
-                        End
-                      Else
-                      { change              to
-                          fld/fst mem1 (hp1)   fld/fst mem1
-                          fld     mem1 (p)     fld      st}
-                        Begin
-                          Paicpu(p)^.changeopsize(S_FL);
-                          Paicpu(p)^.loadreg(0,R_ST);
-                        End
-                    Else
-                      Begin
-                        Case Paicpu(hp2)^.opcode Of
-                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
-                     { change                        to
-                         fld/fst  mem1    (hp1)      fld/fst    mem1
-                         fld      mem2    (p)        fxxx       mem2
-                         fxxxp    st, st1 (hp2)                      }
-
-                            Begin
-                              Case Paicpu(hp2)^.opcode Of
-                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
-                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
-                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
-                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
-                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
-                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
-                              End;
-                              AsmL^.Remove(hp2);
-                              Dispose(hp2, Done)
-                            End
-                        End
-                      End
-                End;
-              A_FSTP,A_FISTP:
-                if doFpuLoadStoreOpt(asmL,p) then
-                  continue;
-              A_IMUL:
-                {changes certain "imul const, %reg"'s to lea sequences}
-                Begin
-                  If (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) And
-                     (Paicpu(p)^.opsize = S_L) Then
-                    If (Paicpu(p)^.oper[0].val = 1) Then
-                      If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                       {remove "imul $1, reg"}
-                        Begin
-                          hp1 := Pai(p^.Next);
-                          AsmL^.Remove(p);
-                          Dispose(p, Done);
-                          p := hp1;
-                          Continue;
-                        End
-                      Else
-                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
-                        Begin
-                          hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
-                          InsertLLItem(AsmL, p^.previous, p^.next, hp1);
-                          Dispose(p, Done);
-                          p := hp1;
-                        End
-                    Else If
-                     ((Paicpu(p)^.oper[2].typ = Top_Reg) or
-                      (Paicpu(p)^.oper[2].typ = Top_None)) And
-                     (aktoptprocessor < ClassP6) And
-                     (Paicpu(p)^.oper[0].val <= 12) And
-                     Not(CS_LittleSize in aktglobalswitches) And
-                     (Not(GetNextInstruction(p, hp1)) Or
-                       {GetNextInstruction(p, hp1) And}
-                       Not((Pai(hp1)^.typ = ait_instruction) And
-                           ((paicpu(hp1)^.opcode=A_Jcc) and
-                            (paicpu(hp1)^.condition in [C_O,C_NO]))))
-                    Then
-                      Begin
-                        Reset_reference(tmpref);
-                        Case Paicpu(p)^.oper[0].val Of
-                          3: Begin
-                             {imul 3, reg1, reg2 to
-                                lea (reg1,reg1,2), reg2
-                              imul 3, reg1 to
-                                lea (reg1,reg1,2), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 2;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                            End;
-                         5: Begin
-                            {imul 5, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                             imul 5, reg1 to
-                               lea (reg1,reg1,4), reg1}
-                              TmpRef.base := Paicpu(p)^.oper[1].reg;
-                              TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                              TmpRef.ScaleFactor := 4;
-                              If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                              Else
-                                hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                              InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                              Dispose(p, Done);
-                              p := hp1;
-                            End;
-                         6: Begin
-                            {imul 6, reg1, reg2 to
-                               lea (,reg1,2), reg2
-                               lea (reg2,reg1,4), reg2
-                             imul 6, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               add reg1, reg1}
-                              If (aktoptprocessor <= Class386)
-                                Then
-                                  Begin
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                          TmpRef.ScaleFactor := 4;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                            Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p, p^.next, hp1);
-                                    Reset_reference(tmpref);
-                                    TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                    TmpRef.ScaleFactor := 2;
-                                    If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                      Then
-                                        Begin
-                                          TmpRef.base := R_NO;
-                                          hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
-                                            Paicpu(p)^.oper[2].reg));
-                                        End
-                                      Else
-                                        Begin
-                                          TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                          hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                        End;
-                                    InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                    Dispose(p, Done);
-                                    p := Pai(hp1^.next);
-                                  End
-                            End;
-                          9: Begin
-                             {imul 9, reg1, reg2 to
-                                lea (reg1,reg1,8), reg2
-                              imul 9, reg1 to
-                                lea (reg1,reg1,8), reg1}
-                               TmpRef.base := Paicpu(p)^.oper[1].reg;
-                               TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                               TmpRef.ScaleFactor := 8;
-                               If (Paicpu(p)^.oper[2].typ = Top_None) Then
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
-                               Else
-                                 hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                               InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                               Dispose(p, Done);
-                               p := hp1;
-                             End;
-                         10: Begin
-                            {imul 10, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                               add reg2, reg2
-                             imul 10, reg1 to
-                               lea (reg1,reg1,4), reg1
-                               add reg1, reg1}
-                               If (aktoptprocessor <= Class386) Then
-                                 Begin
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                     hp1 :=  New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
-                                   Else
-                                     hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
-                                       Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p, p^.next, hp1);
-                                   TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                   TmpRef.ScaleFactor := 4;
-                                   If (Paicpu(p)^.oper[2].typ = Top_Reg)
-                                     Then
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
-                                     Else
-                                       hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                   InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                   Dispose(p, Done);
-                                   p := Pai(hp1^.next);
-                                 End
-                             End;
-                         12: Begin
-                            {imul 12, reg1, reg2 to
-                               lea (,reg1,4), reg2
-                               lea (,reg1,8) reg2
-                             imul 12, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               lea (,reg1,4), reg1}
-                               If (aktoptprocessor <= Class386)
-                                 Then
-                                   Begin
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[2].reg;
-                                         TmpRef.ScaleFactor := 8;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p, p^.next, hp1);
-                                     Reset_reference(tmpref);
-                                     TmpRef.Index := Paicpu(p)^.oper[1].reg;
-                                     If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
-                                       Begin
-                                         TmpRef.base := R_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
-                                       End
-                                     Else
-                                       Begin
-                                         TmpRef.base := Paicpu(p)^.oper[1].reg;
-                                         TmpRef.ScaleFactor := 2;
-                                         hp1 :=  New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
-                                       End;
-                                     InsertLLItem(AsmL,p^.previous, p^.next, hp1);
-                                     Dispose(p, Done);
-                                     p := Pai(hp1^.next);
-                                   End
-                             End
-                        End;
-                      End;
+                          faddp/                       fmul     st, st
+                           fmulp  st, st1 (hp2) }
+                        Begin
+                          AsmL^.Remove(p);
+                          Dispose(p, Done);
+                          p := hp1;
+                          If (Paicpu(hp2)^.opcode = A_FADDP) Then
+                            Paicpu(hp2)^.opcode := A_FADD
+                          Else
+                            Paicpu(hp2)^.opcode := A_FMUL;
+                          Paicpu(hp2)^.oper[1].reg := R_ST;
+                        End
+                      Else
+                      { change              to
+                          fld/fst mem1 (hp1)   fld/fst mem1
+                          fld     mem1 (p)     fld      st}
+                        Begin
+                          Paicpu(p)^.changeopsize(S_FL);
+                          Paicpu(p)^.loadreg(0,R_ST);
+                        End
+                    Else
+                      Begin
+                        Case Paicpu(hp2)^.opcode Of
+                          A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
+                     { change                        to
+                         fld/fst  mem1    (hp1)      fld/fst    mem1
+                         fld      mem2    (p)        fxxx       mem2
+                         fxxxp    st, st1 (hp2)                      }
+
+                            Begin
+                              Case Paicpu(hp2)^.opcode Of
+                                A_FADDP: Paicpu(p)^.opcode := A_FADD;
+                                A_FMULP: Paicpu(p)^.opcode := A_FMUL;
+                                A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
+                                A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
+                                A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
+                                A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
+                              End;
+                              AsmL^.Remove(hp2);
+                              Dispose(hp2, Done)
+                            End
+                        End
+                      End
                 End;
+              A_FSTP,A_FISTP:
+                if doFpuLoadStoreOpt(asmL,p) then
+                  continue;
               A_LEA:
                 Begin
                 {removes seg register prefixes from LEA operations, as they
@@ -784,7 +881,6 @@ Begin
                                   end;
                               end;
                             end;
-
                 End;
               A_MOV:
                 Begin
@@ -1420,71 +1516,6 @@ Begin
                                    p := hp1;
                                  End
                 End;
-              A_SAR, A_SHR:
-                  {changes the code sequence
-                   shr/sar const1, x
-                   shl     const2, x
-                   to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
-                Begin
-                  If GetNextInstruction(p, hp1) And
-                     (pai(hp1)^.typ = ait_instruction) and
-                     (Paicpu(hp1)^.opcode = A_SHL) and
-                     (Paicpu(p)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.oper[0].typ = top_const) and
-                     (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
-                     (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
-                     OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
-                    Then
-                      If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
-                         Not(CS_LittleSize In aktglobalswitches)
-                        Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 > const2 }
-                          Begin
-                            Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
-                            Paicpu(hp1)^.opcode := A_AND;
-                            l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
-                            Case Paicpu(p)^.opsize Of
-                              S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
-                              S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
-                              S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
-                            End;
-                          End
-                        Else
-                          If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
-                             Not(CS_LittleSize In aktglobalswitches)
-                            Then
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 < const2 }
-                              Begin
-                                Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
-                                Paicpu(p)^.opcode := A_AND;
-                                l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                Case Paicpu(p)^.opsize Of
-                                  S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                  S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                End;
-                              End
-                            Else
-                   { shr/sar const1, %reg
-                     shl     const2, %reg
-                      with const1 = const2 }
-                              if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
-                                Begin
-                                  Paicpu(p)^.opcode := A_AND;
-                                  l := (1 shl (Paicpu(p)^.oper[0].val))-1;
-                                  Case Paicpu(p)^.opsize Of
-                                    S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
-                                    S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
-                                    S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
-                                  End;
-                                  AsmL^.remove(hp1);
-                                  dispose(hp1, done);
-                                End;
-                End;
               A_SETcc :
                 { changes
                     setcc (funcres)             setcc reg
@@ -1604,6 +1635,7 @@ end;
 
 Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
 
+{$ifdef USECMOV}
   function CanBeCMOV(p : pai) : boolean;
 
     begin
@@ -1613,6 +1645,7 @@ Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
          (paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
          (paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
     end;
+{$endif USECMOV}
 
 var
   p,hp1,hp2: pai;
@@ -1633,20 +1666,6 @@ Begin
         Ait_Instruction:
           Begin
             Case Paicpu(p)^.opcode Of
-              A_CALL:
-                If (AktOptProcessor < ClassP6) And
-                   GetNextInstruction(p, hp1) And
-                   (hp1^.typ = ait_instruction) And
-                   (paicpu(hp1)^.opcode = A_JMP) Then
-                  Begin
-                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
-                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
-                    InsertLLItem(AsmL, p^.previous, p, hp2);
-                    Paicpu(p)^.opcode := A_JMP;
-                    AsmL^.Remove(hp1);
-                    Dispose(hp1, Done)
-                  End;
-
 {$ifdef USECMOV}
               A_Jcc:
                 if (aktspecificoptprocessor=ClassP6) then
@@ -1833,59 +1852,6 @@ Begin
                        p := hp1
                      End;
                    End
-                  else if (Paicpu(p)^.oper[0].typ = Top_Const) And
-                     (Paicpu(p)^.oper[0].val = 0) And
-                     (Paicpu(p)^.oper[1].typ = Top_Reg) Then
-                    { change "mov $0, %reg" into "xor %reg, %reg" }
-                    Begin
-                      Paicpu(p)^.opcode := A_XOR;
-                      Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
-                    End
-                End;
-              A_MOVZX:
-                Begin
-                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
-                    If (Paicpu(p)^.oper[0].typ = top_reg)
-                      Then
-                        Case Paicpu(p)^.opsize of
-                          S_BL:
-                            Begin
-                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                                 Not(CS_LittleSize in aktglobalswitches) And
-                                 (aktoptprocessor = ClassP5)
-                                Then
-                                  {Change "movzbl %reg1, %reg2" to
-                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
-                                   PentiumMMX}
-                                  Begin
-                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
-                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
-                                    InsertLLItem(AsmL,p^.previous, p, hp1);
-                                    Paicpu(p)^.opcode := A_MOV;
-                                    Paicpu(p)^.changeopsize(S_B);
-                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                                  End;
-                            End;
-                        End
-                      Else
-                        If (Paicpu(p)^.oper[0].typ = top_ref) And
-                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
-                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
-                           Not(CS_LittleSize in aktglobalswitches) And
-                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
-                           (aktoptprocessor = ClassP5) And
-                           (Paicpu(p)^.opsize = S_BL)
-                          Then
-                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
-                             Pentium and PentiumMMX}
-                            Begin
-                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
-                                         Paicpu(p)^.oper[1].reg));
-                              Paicpu(p)^.opcode := A_MOV;
-                              Paicpu(p)^.changeopsize(S_B);
-                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
-                              InsertLLItem(AsmL,p^.previous, p, hp1);
-                            End;
                 End;
               A_TEST, A_OR:
                 {removes the line marked with (x) from the sequence
@@ -1942,11 +1908,111 @@ Begin
     End;
 End;
 
+Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
+var
+  p,hp1,hp2: pai;
+Begin
+  P := BlockStart;
+  While (P <> BlockEnd) Do
+    Begin
+      Case P^.Typ Of
+        Ait_Instruction:
+          Begin
+            Case Paicpu(p)^.opcode Of
+              A_CALL:
+                If (AktOptProcessor < ClassP6) And
+                   GetNextInstruction(p, hp1) And
+                   (hp1^.typ = ait_instruction) And
+                   (paicpu(hp1)^.opcode = A_JMP) Then
+                  Begin
+                    Inc(paicpu(hp1)^.oper[0].sym^.refs);
+                    hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
+                    InsertLLItem(AsmL, p^.previous, p, hp2);
+                    Paicpu(p)^.opcode := A_JMP;
+                    AsmL^.Remove(hp1);
+                    Dispose(hp1, Done)
+                  End;
+              A_MOV:
+                if (Paicpu(p)^.oper[0].typ = Top_Const) And
+                   (Paicpu(p)^.oper[0].val = 0) And
+                   (Paicpu(p)^.oper[1].typ = Top_Reg) Then
+                  { change "mov $0, %reg" into "xor %reg, %reg" }
+                  Begin
+                    Paicpu(p)^.opcode := A_XOR;
+                    Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
+                  End;
+              A_MOVZX:
+                Begin
+                  If (Paicpu(p)^.oper[1].typ = top_reg) Then
+                    If (Paicpu(p)^.oper[0].typ = top_reg)
+                      Then
+                        Case Paicpu(p)^.opsize of
+                          S_BL:
+                            Begin
+                              If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                                 Not(CS_LittleSize in aktglobalswitches) And
+                                 (aktoptprocessor = ClassP5)
+                                Then
+                                  {Change "movzbl %reg1, %reg2" to
+                                   "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
+                                   PentiumMMX}
+                                  Begin
+                                    hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
+                                               Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
+                                    InsertLLItem(AsmL,p^.previous, p, hp1);
+                                    Paicpu(p)^.opcode := A_MOV;
+                                    Paicpu(p)^.changeopsize(S_B);
+                                    Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                                  End;
+                            End;
+                        End
+                      Else
+                        If (Paicpu(p)^.oper[0].typ = top_ref) And
+                           (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
+                           (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
+                           Not(CS_LittleSize in aktglobalswitches) And
+                           IsGP32Reg(Paicpu(p)^.oper[1].reg) And
+                           (aktoptprocessor = ClassP5) And
+                           (Paicpu(p)^.opsize = S_BL)
+                          Then
+                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
+                             Pentium and PentiumMMX}
+                            Begin
+                              hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
+                                         Paicpu(p)^.oper[1].reg));
+                              Paicpu(p)^.opcode := A_MOV;
+                              Paicpu(p)^.changeopsize(S_B);
+                              Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
+                              InsertLLItem(AsmL,p^.previous, p, hp1);
+                            End;
+                End;
+            End;
+          End;
+      End;
+      p := Pai(p^.next)
+    End;
+End;
+
+
+
 End.
 
 {
   $Log$
-  Revision 1.1  2000-10-15 09:47:43  peter
+  Revision 1.2  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.1  2000/10/15 09:47:43  peter
     * moved to i386/
 
   Revision 1.13  2000/10/02 13:01:29  jonas

+ 350 - 0
compiler/i386/rropt386.pas

@@ -0,0 +1,350 @@
+{
+    $Id$
+    Copyright (c) 1998-2000 by Jonas Maebe, member of the Free Pascal
+      development team
+
+    This unit contains register renaming functionality
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit rrOpt386;
+
+{$i defines.inc}
+
+Interface
+
+Uses aasm;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+
+Implementation
+
+Uses
+  {$ifdef replaceregdebug}cutils,{$endif}
+  verbose,globals,cpubase,cpuasm,daopt386,csopt386,tgeni386;
+
+function canBeFirstSwitch(p: paicpu; reg: tregister): boolean;
+{ checks whether an operation on reg can be switched to another reg without an }
+{ additional mov, e.g. "addl $4,%reg1" can be changed to "leal 4(%reg1),%reg2" }
+begin
+  canBeFirstSwitch := false;
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (reg32(p^.oper[1].reg) = reg);
+    A_IMUL:
+      canBeFirstSwitch :=
+        (p^.ops >= 2) and
+        (reg32(p^.oper[p^.ops-1].reg) = reg);
+    A_INC,A_DEC,A_SUB,A_ADD:
+      canBeFirstSwitch :=
+        (p^.oper[1].typ = top_reg) and
+        (p^.opsize = S_L) and
+        (reg32(p^.oper[1].reg) = reg) and
+        (p^.oper[0].typ <> top_ref) and
+        ((p^.opcode <> A_SUB) or
+         (p^.oper[0].typ = top_const));
+    A_SHL:
+      canBeFirstSwitch :=
+        (p^.opsize = S_L) and
+        (p^.oper[1].typ = top_reg) and
+        (p^.oper[1].reg = reg) and
+        (p^.oper[0].typ = top_const) and
+        (p^.oper[0].val in [1,2,3]);
+  end;
+end;
+
+procedure switchReg(var reg: tregister; reg1, reg2: tregister);
+begin
+  if reg = reg1 then
+    reg := reg2
+  else if reg = reg2 then
+    reg := reg1
+  else if reg = regtoreg8(reg1) then
+         reg := regtoreg8(reg2)
+  else if reg = regtoreg8(reg2) then
+         reg := regtoreg8(reg1)
+  else if reg = regtoreg16(reg1) then
+         reg := regtoreg16(reg2)
+  else if reg = regtoreg16(reg2) then
+         reg := regtoreg16(reg1)
+end;
+
+
+procedure switchOp(var op: toper; reg1, reg2: tregister);
+begin
+  case op.typ of
+    top_reg:
+      switchReg(op.reg,reg1,reg2);
+    top_ref:
+      begin
+        switchReg(op.ref^.base,reg1,reg2);
+        switchReg(op.ref^.index,reg1,reg2);
+      end;
+  end;
+end;
+
+procedure doSwitchReg(hp: paicpu; reg1,reg2: tregister);
+var
+  opCount: longint;
+begin
+  for opCount := 0 to hp^.ops-1 do
+    switchOp(hp^.oper[opCount],reg1,reg2);
+end;
+
+
+procedure doFirstSwitch(p: paicpu; reg1, reg2: tregister);
+var
+  tmpRef: treference;
+begin
+  case p^.opcode of
+    A_MOV,A_MOVZX,A_MOVSX,A_LEA:
+       begin
+         changeOp(p^.oper[1],reg1,reg2);
+         changeOp(p^.oper[0],reg2,reg1);
+       end;
+    A_IMUL:
+      begin
+        p^.ops := 3;
+        p^.loadreg(2,p^.oper[1].reg);
+        changeOp(p^.oper[2],reg1,reg2);
+      end;
+    A_INC,A_DEC:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.opcode of
+          A_INC:
+            tmpref.offset := 1;
+          A_DEC:
+            tmpref.offset := -1;
+        end;
+        p^.ops := 2;
+        p^.opcode := A_LEA;
+        p^.loadreg(1,reg2);
+        p^.loadref(0,newreference(tmpref));
+      end;
+    A_SUB,A_ADD:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        case p^.oper[0].typ of
+          top_const:
+            begin
+              tmpref.offset := p^.oper[0].val;
+              if p^.opcode = A_SUB then
+                tmpref.offset := - tmpRef.offset;
+            end;
+          top_symbol:
+            tmpref.symbol := p^.oper[0].sym;
+          top_reg:
+            begin
+              tmpref.index := p^.oper[0].reg;
+              tmpref.scalefactor := 1;
+            end;
+          else internalerror(200010031);
+        end;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    A_SHL:
+      begin
+        reset_reference(tmpref);
+        tmpref.base := reg1;
+        tmpref.scalefactor := 1 shl p^.oper[0].val;
+        p^.opcode := A_LEA;
+        p^.loadref(0,newreference(tmpref));
+        p^.loadreg(1,reg2);
+      end;
+    else internalerror(200010032);
+  end;
+end;
+
+
+function switchRegs(asml: paasmoutput; reg1, reg2: tregister; start: pai): Boolean;
+{ change movl  %reg1,%reg2 ... bla ... to ... bla with reg1 and reg2 switched }
+var
+  endP, hp: pai;
+  switchDone, switchLast, tmpResult, sequenceEnd, reg1Modified, reg2Modified: boolean;
+  reg1StillUsed, reg2StillUsed, isInstruction: boolean;
+begin
+  switchRegs := false;
+  tmpResult := true;
+  sequenceEnd := false;
+  reg1Modified := false;
+  reg2Modified := false;
+  endP := start;
+  while tmpResult and not sequenceEnd do
+    begin
+      tmpResult :=
+        getNextInstruction(endP,endP);
+      If tmpResult and
+         not ppaiprop(endP^.optinfo)^.canBeRemoved then
+        begin
+          { if the newReg gets stored back to the oldReg, we can change }
+          { "mov %oldReg,%newReg; <operations on %newReg>; mov %newReg, }
+          { %oldReg" to "<operations on %oldReg>"                       }
+          switchLast := storeBack(endP,reg1,reg2);
+          reg1StillUsed := reg1 in ppaiprop(endP^.optinfo)^.usedregs;
+          reg2StillUsed := reg2 in ppaiprop(endP^.optinfo)^.usedregs;
+          isInstruction := endP^.typ = ait_instruction;
+          sequenceEnd :=
+            switchLast or
+            { if both registers are released right before an instruction }
+            { that contains hardcoded regs, it's ok too                  }
+            (not reg1StillUsed and not reg2StillUsed) or
+            { no support for (i)div, mul and imul with hardcoded operands }
+            (((not isInstruction) or
+              noHardCodedRegs(paicpu(endP),reg1,reg2)) and
+             (not reg1StillUsed or
+              (isInstruction and findRegDealloc(reg1,endP) and
+               regLoadedWithNewValue(reg1,false,paicpu(endP)))) and
+             (not reg2StillUsed or
+              (isInstruction and findRegDealloc(reg2,endP) and
+               regLoadedWithNewValue(reg2,false,paicpu(endP)))));
+
+          { we can't switch reg1 and reg2 in something like }
+          {   movl  %reg1,%reg2                             }
+          {   movl  (%reg2),%reg2                           }
+          {   movl  4(%reg1),%reg1                          }
+          if reg2Modified and not(reg1Modified) and
+             regReadByInstruction(reg1,endP) then
+            begin
+              tmpResult := false;
+              break
+            end;
+
+          if not reg1Modified then
+            begin
+              reg1Modified := regModifiedByInstruction(reg1,endP);
+              if reg1Modified and not canBeFirstSwitch(paicpu(endP),reg1) then
+                begin
+                  tmpResult := false;
+                  break;
+                end;
+            end;
+          if not reg2Modified then
+            reg2Modified := regModifiedByInstruction(reg2,endP);
+
+          if sequenceEnd then
+            break;
+
+          tmpResult :=
+            (endP^.typ <> ait_label) and
+            ((not isInstruction) or
+             (NoHardCodedRegs(paicpu(endP),reg1,reg2) and
+               RegSizesOk(reg1,reg2,paicpu(endP))));
+        end;
+    end;
+
+  if tmpResult and sequenceEnd then
+    begin
+      switchRegs := true;
+      reg1Modified := false;
+      reg2Modified := false;
+      getNextInstruction(start,hp);
+      while hp <> endP do
+        begin
+          if (not ppaiprop(hp^.optinfo)^.canberemoved) and
+             (hp^.typ = ait_instruction) then
+            begin
+              switchDone := false;
+              if not reg1Modified then
+                begin
+                  reg1Modified := regModifiedByInstruction(reg1,hp);
+                  if reg1Modified then
+                    begin
+                      doFirstSwitch(paicpu(hp),reg1,reg2);
+                      switchDone := true;
+                    end;
+                end;
+              if not switchDone then
+                if reg1Modified then
+                  doSwitchReg(paicpu(hp),reg1,reg2)
+                else
+                  doReplaceReg(paicpu(hp),reg2,reg1);
+            end;
+          getNextInstruction(hp,hp);
+        end;
+      if switchLast then
+        doSwitchReg(paicpu(hp),reg1,reg2)
+      else getLastInstruction(hp,hp);
+      allocRegBetween(asmL,reg1,start,hp);
+      allocRegBetween(asmL,reg2,start,hp);
+    end;
+end;
+
+procedure doRenaming(asml: paasmoutput; first, last: pai);
+var
+  p: pai;
+begin
+  p := First;
+  SkipHead(p);
+  while p <> last do
+    begin
+      case p^.typ of
+        ait_instruction:
+          begin
+            case paicpu(p)^.opcode of
+              A_MOV:
+                begin
+                  if not(ppaiprop(p^.optinfo)^.canBeRemoved) and
+                     (paicpu(p)^.oper[0].typ = top_reg) and
+                     (paicpu(p)^.oper[1].typ = top_reg) and
+                     (paicpu(p)^.opsize = S_L) and
+                     (paicpu(p)^.oper[0].reg in (usableregs+[R_EDI])) and
+                     (paicpu(p)^.oper[1].reg in (usableregs+[R_EDI])) then
+                    if switchRegs(asml,paicpu(p)^.oper[0].reg,
+                         paicpu(p)^.oper[1].reg,p) then
+                      begin
+{                        getnextinstruction(p,hp);
+                        asmL^.remove(p);
+                        dispose(p,done);
+                        p := hp;
+                        continue }
+                        ppaiprop(p^.optinfo)^.canBeRemoved := true;
+                      end;
+                end;
+            end;
+          end;
+      end;
+      getNextInstruction(p,p);
+    end;
+end;
+
+
+End.
+
+{
+  $Log$
+  Revision 1.1  2000-10-24 10:40:54  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+
+}

+ 1 - 1
compiler/msgidx.inc

@@ -557,7 +557,7 @@ const
   option_info=11024;
   option_help_pages=11025;
 
-  MsgTxtSize = 31210;
+  MsgTxtSize = 31225;
 
   MsgIdxMax : array[1..20] of longint=(
     17,58,165,34,41,41,86,14,35,40,

+ 10 - 10
compiler/msgtxt.inc

@@ -728,37 +728,37 @@ const msgtxt : array[0..000130,1..240] of char=(
   '3*2Ou_enable uncertain optimizations (see docs)'#010+
   '3*2O1_level 1 optimizat','ions (quick optimizations)'#010+
   '3*2O2_level 2 optimizations (-O1 + slower optimizations)'#010+
-  '3*2O3_level 3 optimizations (same as -O2u)'#010+
+  '3*2O3_level 3 optimizations (-O2 repeatedly, max 5 times)'#010+
   '3*2Op<x>_target processor:'#010+
   '3*3Op1_set target processor to 386/486'#010+
-  '3*3Op2_set target processor to Pentium/PentiumM','MX (tm)'#010+
+  '3*3Op2_set target processor to P','entium/PentiumMMX (tm)'#010+
   '3*3Op3_set target processor to PPro/PII/c6x86/K6 (tm)'#010+
   '3*1T<x>_Target operating system:'#010+
   '3*2TGO32V1_version 1 of DJ Delorie DOS extender'#010+
   '3*2TGO32V2_version 2 of DJ Delorie DOS extender'#010+
   '3*2TLINUX_Linux'#010+
-  '3*2Tnetware_Novell Netware Module',' (experimental)'#010+
+  '3*2Tnetware_Novell',' Netware Module (experimental)'#010+
   '3*2TOS2_OS/2 2.x'#010+
   '3*2TWin32_Windows 32 Bit'#010+
   '3*1W<x>_Win32 target options'#010+
   '3*2WB<x>_Set Image base to Hexadecimal <x> value'#010+
   '3*2WC_Specify console type application'#010+
-  '3*2WD_Use DEFFILE to export functions of DLL or EXE'#010+
-  '3*2WG_Specify',' graphic type application'#010+
+  '3*2WD_Use DEFFILE to export functions of DLL or EX','E'#010+
+  '3*2WG_Specify graphic type application'#010+
   '3*2WN_Do not generate relocation code (necessary for debugging)'#010+
   '3*2WR_Generate relocation code'#010+
   '6*1A<x>_output format'#010+
   '6*2Aas_Unix o-file using GNU AS'#010+
   '6*2Agas_GNU Motorola assembler'#010+
-  '6*2Amit_MIT Syntax (old GAS)'#010+
-  '6*2Am','ot_Standard Motorola assembler'#010+
+  '6*2Amit_MIT Syntax ','(old GAS)'#010+
+  '6*2Amot_Standard Motorola assembler'#010+
   '6*1O_optimizations:'#010+
   '6*2Oa_turn on the optimizer'#010+
   '6*2Og_generate smaller code'#010+
   '6*2OG_generate faster code (default)'#010+
   '6*2Ox_optimize maximum (still BUGGY!!!)'#010+
-  '6*2O2_set target processor to a MC68020+'#010+
-  '6*1R<x>_assembl','er reading style:'#010+
+  '6*2O2_set target processor to a MC68020+'#010,
+  '6*1R<x>_assembler reading style:'#010+
   '6*2RMOT_read motorola style assembler'#010+
   '6*1T<x>_Target operating system:'#010+
   '6*2TAMIGA_Commodore Amiga'#010+
@@ -767,5 +767,5 @@ const msgtxt : array[0..000130,1..240] of char=(
   '6*2TLINUX_Linux-68k'#010+
   '**1*_'#010+
   '**1?_shows this help'#010+
-  '**1h_shows this help withou','t waiting'#000
+  '**1h_shows t','his help without waiting'#000
 );

+ 18 - 5
compiler/opts386.pas

@@ -58,10 +58,10 @@ begin
                  'g' : initglobalswitches:=initglobalswitches+[cs_littlesize];
                  'G' : initglobalswitches:=initglobalswitches-[cs_littlesize];
                  'r' : initglobalswitches:=initglobalswitches+[cs_regalloc];
-                 'u' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_uncertainopts];
-                 '1' : initglobalswitches:=initglobalswitches-[cs_slowoptimize,cs_uncertainopts]+[cs_optimize,cs_fastoptimize];
-                 '2' : initglobalswitches:=initglobalswitches-[cs_uncertainopts]+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
-                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize,cs_uncertainopts];
+                 'u' : initglobalswitches:=initglobalswitches+[cs_uncertainopts];
+                 '1' : initglobalswitches:=initglobalswitches-[cs_fastoptimize,cs_slowoptimize]+[cs_optimize];
+                 '2' : initglobalswitches:=initglobalswitches-[cs_slowoptimize]+[cs_optimize,cs_fastoptimize];
+                 '3' : initglobalswitches:=initglobalswitches+[cs_optimize,cs_fastoptimize,cs_slowoptimize];
                  'p' :
                    Begin
                      If j < Length(Opt) Then
@@ -115,7 +115,20 @@ end;
 end.
 {
   $Log$
-  Revision 1.5  2000-09-24 15:06:20  peter
+  Revision 1.6  2000-10-24 10:40:53  jonas
+    + register renaming ("fixes" bug1088)
+    * changed command line options meanings for optimizer:
+        O2 now means peepholopts, CSE and register renaming in 1 pass
+        O3 is the same, but repeated until no further optimizations are
+          possible or until 5 passes have been done (to avoid endless loops)
+    * changed aopt386 so it does this looping
+    * added some procedures from csopt386 to the interface because they're
+      used by rropt386 as well
+    * some changes to csopt386 and daopt386 so that newly added instructions
+      by the CSE get optimizer info (they were simply skipped previously),
+      this fixes some bugs
+
+  Revision 1.5  2000/09/24 15:06:20  peter
     * use defines.inc
 
   Revision 1.4  2000/08/27 16:11:51  peter