Browse Source

+ memory references are now replaced by register reads in "regular"
instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
if %ebx contains ref1). Previously only complete load sequences were
optimized away, but not such small accesses in other instructions than
mov/movzx/movsx

Jonas Maebe 24 years ago
parent
commit
82f7cf0957
2 changed files with 161 additions and 12 deletions
  1. 117 8
      compiler/i386/csopt386.pas
  2. 44 4
      compiler/i386/daopt386.pas

+ 117 - 8
compiler/i386/csopt386.pas

@@ -185,10 +185,7 @@ function isSimpleMemLoc(const ref: treference): boolean;
 begin
   isSimpleMemLoc :=
     (ref.index = R_NO) and
-    (not(ref.base in (usableregs+[R_EDI])) or
-     (assigned(ref.symbol) and
-      (ref.base = R_NO) and
-      (ref.index = R_NO)));
+    not(ref.base in (usableregs+[R_EDI]));
 end;
 
 {checks whether the current instruction sequence (starting with p) and the
@@ -1314,6 +1311,48 @@ begin
      (p.opcode = A_IDIV));
 end;
 
+function memtoreg(const t: Taicpu; const ref: treference): tregister;
+var
+  hp: tai;
+  p: pTaiprop;
+  regcounter: tregister;
+begin
+  if not getlastinstruction(t,hp) then 
+    begin
+      memtoreg := R_NO;
+      exit;
+    end;
+  p := pTaiprop(hp.optinfo);
+  if isSimpleMemLoc(ref) then
+    begin
+      for regcounter := R_EAX to R_EDI do
+        if (p^.regs[regcounter].typ in [CON_REF,CON_NOREMOVEREF]) and
+           (p^.regs[regcounter].nrofmods = 1) and
+           ((Taicpu(p^.regs[regcounter].startmod).opcode = A_MOV) or
+            (Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVZX) or
+            (Taicpu(p^.regs[regcounter].startmod).opcode = A_MOVSX)) and
+           (taicpu(p^.regs[regcounter].startmod).oper[0].typ = top_ref) and
+           refsequal(ref,taicpu(p^.regs[regcounter].startmod).oper[0].ref^) then
+          begin
+            if ((t.opsize <> S_B) or
+                (regcounter <> R_EDI)) and
+               sizescompatible(Taicpu(p^.regs[regcounter].startmod).opsize,t.opsize) then
+              begin
+                case t.opsize of
+                  S_B,S_BW,S_BL:
+                    memtoreg := reg32toreg8(regcounter);
+                  S_W,S_WL:
+                    memtoreg := reg32toreg16(regcounter);
+                  S_L: 
+                    memtoreg := regcounter;
+                end;
+                exit;
+              end;
+          end;
+    end;
+  memtoreg := R_NO;
+end;
+
 procedure DoCSE(AsmL: TAAsmOutput; First, Last: Tai; findPrevSeqs, doSubOpts: boolean);
 {marks the instructions that can be removed by RemoveInstructs. They're not
  removed immediately because sometimes an instruction needs to be checked in
@@ -1594,9 +1633,26 @@ Begin
                               pTaiProp(p.optInfo)^.regs[Taicpu(p).oper[0].reg].startMod,hp1);
                           end
                         else
-                          if (Taicpu(p).oper[1].typ = top_reg) and
-                             not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
-                           removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
+                          begin
+                            if (Taicpu(p).oper[1].typ = top_reg) and
+                               not regInOp(Taicpu(p).oper[1].reg,Taicpu(p).oper[0]) then
+                             removePrevNotUsedLoad(p,reg32(Taicpu(p).oper[1].reg),false);
+                             if doSubOpts and
+                                (Taicpu(p).opcode <> A_LEA) and
+                                (Taicpu(p).oper[0].typ = top_ref) then
+                              begin
+                                regcounter :=
+                                  memtoreg(taicpu(p),
+                                  Taicpu(p).oper[0].ref^);
+                                if regcounter <> R_NO then
+                                  begin
+                                    Taicpu(p).loadreg(0,regcounter);
+                                    allocregbetween(asml,reg32(regcounter),
+                                      pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
+                                      p);
+                                  end;
+                              end;
+                          end;
                         { at first, only try optimizations of large blocks, because doing }
                         { doing smaller ones may prevent bigger ones from completing in   }
                         { in the next pass                                                }
@@ -1643,6 +1699,52 @@ Begin
               A_STD: If GetLastInstruction(p, hp1) And
                         (PTaiProp(hp1.OptInfo)^.DirFlag = F_Set) Then
                         PTaiProp(Tai(p).OptInfo)^.CanBeRemoved := True;
+              else
+                begin
+                  for cnt := 1 to maxch do
+                    begin
+                      case InsProp[taicpu(p).opcode].Ch[cnt] of
+                        Ch_ROp1:
+                          if (taicpu(p).oper[0].typ = top_ref) and
+                             ((taicpu(p).opcode < A_F2XM1) or
+                              ((taicpu(p).opcode > A_IN) and
+                               (taicpu(p).opcode < A_OUT)) or
+                              (taicpu(p).opcode = A_PUSH) or
+                              (taicpu(p).opcode = A_SUB) or
+                              (taicpu(p).opcode = A_TEST) or
+                              (taicpu(p).opcode = A_XOR))then
+                            begin
+                              regcounter :=
+                                memtoreg(taicpu(p),
+                                Taicpu(p).oper[0].ref^);
+                              if regcounter <> R_NO then
+                                begin
+                                  Taicpu(p).loadreg(0,regcounter);
+                                  allocregbetween(asml,reg32(regcounter),
+                                    pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
+                                    p);
+                                end;
+                            end;
+
+                        Ch_ROp2:
+                          if ((taicpu(p).opcode = A_CMP) or
+                              (taicpu(p).opcode = A_TEST)) and
+                             (taicpu(p).oper[1].typ = top_ref) then
+                            begin
+                              regcounter :=
+                                memtoreg(taicpu(p),
+                                Taicpu(p).oper[1].ref^);
+                              if regcounter <> R_NO then
+                                begin
+                                  Taicpu(p).loadreg(1,regcounter);
+                                  allocregbetween(asml,reg32(regcounter),
+                                    pTaiprop(p.optinfo)^.regs[reg32(regcounter)].startmod,
+                                    p);
+                                end;
+                            end;
+                      end;
+                    end;
+                end;
             End
           End;
       End;
@@ -1742,7 +1844,14 @@ End.
 
 {
   $Log$
-  Revision 1.18  2001-09-04 14:01:03  jonas
+  Revision 1.19  2001-10-12 13:58:05  jonas
+    + memory references are now replaced by register reads in "regular"
+      instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
+      if %ebx contains ref1). Previously only complete load sequences were
+      optimized away, but not such small accesses in other instructions than
+      mov/movzx/movsx
+
+  Revision 1.18  2001/09/04 14:01:03  jonas
     * commented out some inactive code in csopt386
     + small improvement: lea is now handled the same as mov/zx/sx
 

+ 44 - 4
compiler/i386/daopt386.pas

@@ -191,6 +191,7 @@ function FindRegDealloc(reg: tregister; p: Tai): boolean;
 
 Function RegsEquivalent(OldReg, NewReg: TRegister; Var RegInfo: TRegInfo; OpAct: TopAction): Boolean;
 Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
+function sizescompatible(loadsize,newsize: topsize): boolean;
 Function OpsEqual(const o1,o2:toper): Boolean;
 
 Function DFAPass1(AsmL: TAAsmOutput; BlockStart: Tai): Tai;
@@ -1164,10 +1165,13 @@ var
   lastRemovedWasDealloc, firstRemovedWasAlloc, first: boolean;
 Begin
   If not(reg in usableregs+[R_EDI,R_ESI]) or
-     not(assigned(p1)) Then
+     not(assigned(p1)) then
     { this happens with registers which are loaded implicitely, outside the }
     { current block (e.g. esi with self)                                    }
     exit;
+  { make sure we allocate it for this instruction }
+  if p1 = p2 then
+    getnextinstruction(p2,p2);
   lastRemovedWasDealloc := false;
   firstRemovedWasAlloc := false;
   first := true;
@@ -1433,6 +1437,34 @@ Begin {checks whether the two ops are equal}
     End;
 End;
 
+
+function sizescompatible(loadsize,newsize: topsize): boolean;
+  begin
+    case loadsize of
+      S_B,S_BW,S_BL:
+        sizescompatible := (newsize = loadsize) or (newsize = S_B);
+      S_W,S_WL:
+        sizescompatible := (newsize = loadsize) or (newsize = S_W);
+      else
+        sizescompatible := newsize = S_L;
+    end;
+  end;
+
+
+function opscompatible(p1,p2: Taicpu): boolean;
+begin
+  case p1.opcode of
+    A_MOVZX,A_MOVSX:
+      opscompatible :=
+        ((p2.opcode = p1.opcode) or (p2.opcode = A_MOV)) and
+        sizescompatible(p1.opsize,p2.opsize);
+    else
+      opscompatible :=
+        (p1.opcode = p2.opcode) and
+        (p1.opsize = p2.opsize);
+  end;
+end;
+
 Function InstructionsEquivalent(p1, p2: Tai; Var RegInfo: TRegInfo): Boolean;
 {$ifdef csdebug}
 var
@@ -1442,7 +1474,7 @@ Begin {checks whether two Taicpu instructions are equal}
   If Assigned(p1) And Assigned(p2) And
      (Tai(p1).typ = ait_instruction) And
      (Tai(p1).typ = ait_instruction) And
-     (Taicpu(p1).opcode = Taicpu(p2).opcode) And
+     opscompatible(Taicpu(p1),Taicpu(p2)) and
      (Taicpu(p1).oper[0].typ = Taicpu(p2).oper[0].typ) And
      (Taicpu(p1).oper[1].typ = Taicpu(p2).oper[1].typ) And
      (Taicpu(p1).oper[2].typ = Taicpu(p2).oper[2].typ)
@@ -1476,7 +1508,8 @@ Begin {checks whether two Taicpu instructions are equal}
                 AddOp2RegInfo(Taicpu(p1).oper[0], RegInfo);
  {the registers from .oper[1] have to be equivalent, but not necessarily equal}
                 InstructionsEquivalent :=
-                  RegsEquivalent(Taicpu(p1).oper[1].reg, Taicpu(p2).oper[1].reg, RegInfo, OpAct_Write);
+                  RegsEquivalent(reg32(Taicpu(p1).oper[1].reg),
+                    reg32(Taicpu(p2).oper[1].reg), RegInfo, OpAct_Write);
               End
  {the registers are loaded with values from different memory locations. If
   this was allowed, the instructions "mov -4(esi),eax" and "mov -4(ebp),eax"
@@ -2519,7 +2552,14 @@ End.
 
 {
   $Log$
-  Revision 1.21  2001-09-04 14:01:04  jonas
+  Revision 1.22  2001-10-12 13:58:05  jonas
+    + memory references are now replaced by register reads in "regular"
+      instructions (e.g. "addl ref1,%eax" will be replaced by "addl %ebx,%eax"
+      if %ebx contains ref1). Previously only complete load sequences were
+      optimized away, but not such small accesses in other instructions than
+      mov/movzx/movsx
+
+  Revision 1.21  2001/09/04 14:01:04  jonas
     * commented out some inactive code in csopt386
     + small improvement: lea is now handled the same as mov/zx/sx