Pārlūkot izejas kodu

* moved all i386 mov peephole optimization code into OptPass1MOV

git-svn-id: trunk@33908 -
florian 9 gadi atpakaļ
vecāks
revīzija
ba54f7243e
2 mainītis faili ar 592 papildinājumiem un 506 dzēšanām
  1. 3 504
      compiler/i386/aoptcpu.pas
  2. 589 2
      compiler/x86/aoptx86.pas

+ 3 - 504
compiler/i386/aoptcpu.pas

@@ -40,10 +40,7 @@ unit aoptcpu;
         procedure PeepHoleOptPass1; override;
         procedure PeepHoleOptPass2; override;
         procedure PostPeepHoleOpts; override;
-        function IsExitCode(p : tai) : boolean;
         function DoFpuLoadStoreOpt(var p : tai) : boolean;
-        procedure RemoveLastDeallocForFuncRes(p : tai);
-        procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
         function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
         function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
       end;
@@ -83,76 +80,6 @@ unit aoptcpu;
     end;
 
 
-    function TCpuAsmOptimizer.IsExitCode(p : tai) : boolean;
-      var
-        hp2,hp3 : tai;
-      begin
-        result:=(p.typ=ait_instruction) and
-        ((taicpu(p).opcode = A_RET) or
-         ((taicpu(p).opcode=A_LEAVE) and
-          GetNextInstruction(p,hp2) and
-          (hp2.typ=ait_instruction) and
-          (taicpu(hp2).opcode=A_RET)
-         ) or
-         ((taicpu(p).opcode=A_MOV) and
-          (taicpu(p).oper[0]^.typ=top_reg) and
-          (taicpu(p).oper[0]^.reg=NR_EBP) and
-          (taicpu(p).oper[1]^.typ=top_reg) and
-          (taicpu(p).oper[1]^.reg=NR_ESP) and
-          GetNextInstruction(p,hp2) and
-          (hp2.typ=ait_instruction) and
-          (taicpu(hp2).opcode=A_POP) and
-          (taicpu(hp2).oper[0]^.typ=top_reg) and
-          (taicpu(hp2).oper[0]^.reg=NR_EBP) and
-          GetNextInstruction(hp2,hp3) and
-          (hp3.typ=ait_instruction) and
-          (taicpu(hp3).opcode=A_RET)
-         )
-        );
-      end;
-
-
-    procedure TCPUAsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
-
-      procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
-        var
-          hp2: tai;
-        begin
-          hp2 := p;
-          repeat
-            hp2 := tai(hp2.previous);
-            if assigned(hp2) and
-               (hp2.typ = ait_regalloc) and
-               (tai_regalloc(hp2).ratype=ra_dealloc) and
-               (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
-               (getsupreg(tai_regalloc(hp2).reg) = supreg) then
-              begin
-                asml.remove(hp2);
-                hp2.free;
-                break;
-              end;
-          until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
-        end;
-
-      begin
-          case current_procinfo.procdef.returndef.typ of
-            arraydef,recorddef,pointerdef,
-               stringdef,enumdef,procdef,objectdef,errordef,
-               filedef,setdef,procvardef,
-               classrefdef,forwarddef:
-              DoRemoveLastDeallocForFuncRes(RS_EAX);
-            orddef:
-              if current_procinfo.procdef.returndef.size <> 0 then
-                begin
-                  DoRemoveLastDeallocForFuncRes(RS_EAX);
-                  { for int64/qword }
-                  if current_procinfo.procdef.returndef.size = 8 then
-                    DoRemoveLastDeallocForFuncRes(RS_EDX);
-                end;
-          end;
-      end;
-
-
     function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
     { returns true if a "continue" should be done after this optimization }
     var hp1, hp2: tai;
@@ -204,95 +131,6 @@ unit aoptcpu;
     end;
 
 
-    { allocates register reg between (and including) instructions p1 and p2
-      the type of p1 and p2 must not be in SkipInstr
-      note that this routine is both called from the peephole optimizer
-      where optinfo is not yet initialised) and from the cse (where it is)  }
-    procedure TCpuAsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
-      var
-        hp, start: tai;
-        removedsomething,
-        firstRemovedWasAlloc,
-        lastRemovedWasDealloc: boolean;
-      begin
-{$ifdef EXTDEBUG}
-{        if assigned(p1.optinfo) and
-           (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
-         internalerror(2004101010); }
-{$endif EXTDEBUG}
-        start := p1;
-       if (reg = NR_ESP) or
-          (reg = current_procinfo.framepointer) or
-           not(assigned(p1)) then
-          { this happens with registers which are loaded implicitely, outside the }
-          { current block (e.g. esi with self)                                    }
-          exit;
-        { make sure we allocate it for this instruction }
-        getnextinstruction(p2,p2);
-        lastRemovedWasDealloc := false;
-        removedSomething := false;
-        firstRemovedWasAlloc := false;
-{$ifdef allocregdebug}
-        hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
-          ' from here...'));
-        insertllitem(asml,p1.previous,p1,hp);
-        hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
-          ' till here...'));
-        insertllitem(asml,p2,p2.next,hp);
-{$endif allocregdebug}
-        if not(RegInUsedRegs(reg,initialusedregs)) then
-          begin
-            hp := tai_regalloc.alloc(reg,nil);
-            insertllItem(p1.previous,p1,hp);
-            IncludeRegInUsedRegs(reg,initialusedregs);
-          end;
-        while assigned(p1) and
-              (p1 <> p2) do
-          begin
-            if assigned(p1.optinfo) then
-              internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
-            p1 := tai(p1.next);
-            repeat
-              while assigned(p1) and
-                    (p1.typ in (SkipInstr-[ait_regalloc])) Do
-                p1 := tai(p1.next);
-
-              { remove all allocation/deallocation info about the register in between }
-              if assigned(p1) and
-                 (p1.typ = ait_regalloc) then
-                if tai_regalloc(p1).reg=reg then
-                  begin
-                    if not removedSomething then
-                      begin
-                        firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
-                        removedSomething := true;
-                      end;
-                    lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
-                    hp := tai(p1.Next);
-                    asml.Remove(p1);
-                    p1.free;
-                    p1 := hp;
-                  end
-                else p1 := tai(p1.next);
-            until not(assigned(p1)) or
-                  not(p1.typ in SkipInstr);
-          end;
-        if assigned(p1) then
-          begin
-            if firstRemovedWasAlloc then
-              begin
-                hp := tai_regalloc.Alloc(reg,nil);
-                insertLLItem(start.previous,start,hp);
-              end;
-            if lastRemovedWasDealloc then
-              begin
-                hp := tai_regalloc.DeAlloc(reg,nil);
-                insertLLItem(p1.previous,p1,hp);
-              end;
-          end;
-      end;
-
-
   { converts a TChange variable to a TRegister }
   function tch2reg(ch: tinschange): tsuperregister;
     const
@@ -751,17 +589,6 @@ function SkipLabels(hp: tai; var hp2: tai): boolean;
 { First pass of peephole optimizations }
 procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
 
-{$ifdef DEBUG_AOPTCPU}
-  procedure DebugMsg(const s: string;p : tai);
-    begin
-      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
-    end;
-{$else DEBUG_AOPTCPU}
-  procedure DebugMsg(const s: string;p : tai);inline;
-    begin
-    end;
-{$endif DEBUG_AOPTCPU}
-
 function WriteOk : Boolean;
   begin
     writeln('Ok');
@@ -1331,339 +1158,11 @@ begin
                         end
 *)
                     end;
+
                   A_MOV:
                     begin
-                      if (taicpu(p).oper[1]^.typ = top_reg) and
-                         (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
-                         GetNextInstruction(p, hp1) and
-                         (tai(hp1).typ = ait_instruction) and
-                         (taicpu(hp1).opcode = A_MOV) and
-                         (taicpu(hp1).oper[0]^.typ = top_reg) and
-                         (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
-                        begin
-                          CopyUsedRegs(TmpUsedRegs);
-                          {we have "mov x, %treg; mov %treg, y}
-                          if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
-                             not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
-                            {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
-                            case taicpu(p).oper[0]^.typ Of
-                              top_reg:
-                                begin
-                                  { change "mov %reg, %treg; mov %treg, y"
-                                    to "mov %reg, y" }
-                                  taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
-                                  asml.remove(hp1);
-                                  hp1.free;
-                                  ReleaseUsedRegs(TmpUsedRegs);
-                                  continue;
-                                end;
-                              top_ref:
-                                if (taicpu(hp1).oper[1]^.typ = top_reg) then
-                                begin
-                                  { change "mov mem, %treg; mov %treg, %reg"
-                                    to "mov mem, %reg" }
-                                  taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
-                                  asml.remove(hp1);
-                                  hp1.free;
-                                  ReleaseUsedRegs(TmpUsedRegs);
-                                  continue;
-                                end;
-                            end;
-                          ReleaseUsedRegs(TmpUsedRegs);
-                        end
-                      else
-                    {Change "mov %reg1, %reg2; xxx %reg2, ???" to
-                    "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
-                    penalty}
-                        if (taicpu(p).oper[0]^.typ = top_reg) and
-                           (taicpu(p).oper[1]^.typ = top_reg) and
-                           GetNextInstruction(p,hp1) and
-                           (tai(hp1).typ = ait_instruction) and
-                           (taicpu(hp1).ops >= 1) and
-                           (taicpu(hp1).oper[0]^.typ = top_reg) and
-                           (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
-                    {we have "mov %reg1, %reg2; XXX %reg2, ???"}
-                          begin
-                            if ((taicpu(hp1).opcode = A_OR) or
-                                (taicpu(hp1).opcode = A_TEST)) and
-                               (taicpu(hp1).oper[1]^.typ = top_reg) and
-                               (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
-                  {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
-                              begin
-                                CopyUsedRegs(TmpUsedRegs);
-                                { reg1 will be used after the first instruction, }
-                                { so update the allocation info                  }
-                                AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
-                                if GetNextInstruction(hp1, hp2) and
-                                   (hp2.typ = ait_instruction) and
-                                   taicpu(hp2).is_jmp and
-                                   not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
-                { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
-                  "test %reg1, %reg1; jxx" }
-                                    begin
-                                      taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
-                                      taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
-                                      asml.remove(p);
-                                      p.free;
-                                      p := hp1;
-                                      ReleaseUsedRegs(TmpUsedRegs);
-                                      continue
-                                    end
-                                  else
-                {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
-                  "mov %reg1, %reg2; test/or %reg1, %reg1"}
-                                    begin
-                                      taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
-                                      taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
-                                    end;
-                                ReleaseUsedRegs(TmpUsedRegs);
-                              end
-{                              else
-                                if (taicpu(p.next)^.opcode
-                                  in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
-                        {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
-                          "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
-                          end
-                        else
-                    {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
-                    x >= RetOffset) as it doesn't do anything (it writes either to a
-                    parameter or to the temporary storage room for the function
-                    result)}
-                          if GetNextInstruction(p, hp1) and
-                             (tai(hp1).typ = ait_instruction) then
-                            if IsExitCode(hp1) and
-                               (taicpu(p).oper[1]^.typ = top_ref) and
-                               (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
-                               not(assigned(current_procinfo.procdef.funcretsym) and
-                                   (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
-                               (taicpu(p).oper[1]^.ref^.index = NR_NO) and
-                               (taicpu(p).oper[0]^.typ = top_reg) then
-                              begin
-                                asml.remove(p);
-                                p.free;
-                                p := hp1;
-                                RemoveLastDeallocForFuncRes(p);
-                              end
-                            else
-                              if (taicpu(p).oper[0]^.typ = top_reg) and
-                                  (taicpu(p).oper[1]^.typ = top_ref) and
-                                  (taicpu(p).opsize = taicpu(hp1).opsize) and
-                                  (taicpu(hp1).opcode = A_CMP) and
-                                  (taicpu(hp1).oper[1]^.typ = top_ref) and
-                                  RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
-                                {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
-                                begin
-                                  taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
-                                  AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
-                                end;
-                    { Next instruction is also a MOV ? }
-                      if GetNextInstruction(p, hp1) and
-                        MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
-                        begin
-                          if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
-                             (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
-                              {mov reg1, mem1     or     mov mem1, reg1
-                              mov mem2, reg2            mov reg2, mem2}
-                            begin
-                              if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
-                                {mov reg1, mem1     or     mov mem1, reg1
-                                 mov mem2, reg1            mov reg2, mem1}
-                                begin
-                                  if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
-                                    { Removes the second statement from
-                                      mov reg1, mem1/reg2
-                                      mov mem1/reg2, reg1 }
-                                    begin
-                                      if (taicpu(p).oper[0]^.typ = top_reg) then
-                                        AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
-                                      asml.remove(hp1);
-                                      hp1.free;
-                                    end
-                                  else
-                                    begin
-                                      CopyUsedRegs(TmpUsedRegs);
-                                      UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
-                                      if (taicpu(p).oper[1]^.typ = top_ref) and
-                                        { mov reg1, mem1
-                                          mov mem2, reg1 }
-                                         (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
-                                         GetNextInstruction(hp1, hp2) and
-                                         MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
-                                         OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
-                                         OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
-                                         not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
-                                         { change                   to
-                                           mov reg1, mem1           mov reg1, mem1
-                                           mov mem2, reg1           cmp reg1, mem2
-                                           cmp mem1, reg1                          }
-                                        begin
-                                          asml.remove(hp2);
-                                          hp2.free;
-                                          taicpu(hp1).opcode := A_CMP;
-                                          taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
-                                          taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
-                                          AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
-                                          DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
-                                        end;
-                                      ReleaseUsedRegs(TmpUsedRegs);
-                                    end;
-                                end
-                              else
-                                begin
-                                  CopyUsedRegs(TmpUsedRegs);
-                                  if GetNextInstruction(hp1, hp2) and
-                                     (taicpu(p).oper[0]^.typ = top_ref) and
-                                     (taicpu(p).oper[1]^.typ = top_reg) and
-                                     (taicpu(hp1).oper[0]^.typ = top_reg) and
-                                     (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
-                                     (taicpu(hp1).oper[1]^.typ = top_ref) and
-                                     MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
-                                     (taicpu(hp2).oper[1]^.typ = top_reg) and
-                                     (taicpu(hp2).oper[0]^.typ = top_ref) and
-                                     RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^)  then
-                                    if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
-                                       not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
-                                    {   mov mem1, %reg1
-                                        mov %reg1, mem2
-                                        mov mem2, reg2
-                                     to:
-                                        mov mem1, reg2
-                                        mov reg2, mem2}
-                                      begin
-                                        AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
-                                        taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
-                                        taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
-                                        asml.remove(hp2);
-                                        hp2.free;
-                                      end
-                                    else
-                                      if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
-                                         not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
-                                         not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
-                                         {   mov mem1, reg1         mov mem1, reg1
-                                             mov reg1, mem2         mov reg1, mem2
-                                             mov mem2, reg2         mov mem2, reg1
-                                          to:                    to:
-                                             mov mem1, reg1         mov mem1, reg1
-                                             mov mem1, reg2         mov reg1, mem2
-                                             mov reg1, mem2
-
-                                          or (if mem1 depends on reg1
-                                       and/or if mem2 depends on reg2)
-                                          to:
-                                              mov mem1, reg1
-                                              mov reg1, mem2
-                                              mov reg1, reg2
-                                         }
-                                        begin
-                                          taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
-                                          taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
-                                          taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
-                                          taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
-                                          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
-                                          if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
-                                             (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
-                                            AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
-                                          if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
-                                             (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
-                                            AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
-                                        end
-                                      else
-                                        if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
-                                          begin
-                                            taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
-                                            AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
-                                          end
-                                        else
-                                          begin
-                                            asml.remove(hp2);
-                                            hp2.free;
-                                          end;
-                                  ReleaseUsedRegs(TmpUsedRegs);
-                                end;
-                            end
-                          else
-(*                          {movl [mem1],reg1
-                            movl [mem1],reg2
-                            to:
-                              movl [mem1],reg1
-                              movl reg1,reg2 }
-                            if (taicpu(p).oper[0]^.typ = top_ref) and
-                              (taicpu(p).oper[1]^.typ = top_reg) and
-                              (taicpu(hp1).oper[0]^.typ = top_ref) and
-                              (taicpu(hp1).oper[1]^.typ = top_reg) and
-                              (taicpu(p).opsize = taicpu(hp1).opsize) and
-                              RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
-                              (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
-                              (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
-                              taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
-                            else*)
-                            {   movl const1,[mem1]
-                                movl [mem1],reg1
-                            to:
-                                movl const1,reg1
-                                movl reg1,[mem1] }
-                              if (taicpu(p).oper[0]^.typ = top_const) and
-                                 (taicpu(p).oper[1]^.typ = top_ref) and
-                                 (taicpu(hp1).oper[0]^.typ = top_ref) and
-                                 (taicpu(hp1).oper[1]^.typ = top_reg) and
-                                 (taicpu(p).opsize = taicpu(hp1).opsize) and
-                                 RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
-                                 not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
-                                begin
-                                  allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
-                                  taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
-                                  taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
-                                  taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
-                                  taicpu(hp1).fileinfo := taicpu(p).fileinfo;
-                                end
-                        end;
-                      if GetNextInstruction(p, hp1) and
-                         MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
-                         GetNextInstruction(hp1, hp2) and
-                         MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
-                         MatchOperand(Taicpu(p).oper[0]^,0) and
-                         (Taicpu(p).oper[1]^.typ = top_reg) and
-                         MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
-                         MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
-                         {mov reg1,0
-                          bts reg1,operand1             -->      mov reg1,operand2
-                          or  reg1,operand2                      bts reg1,operand1}
-                        begin
-                          Taicpu(hp2).opcode:=A_MOV;
-                          asml.remove(hp1);
-                          insertllitem(hp2,hp2.next,hp1);
-                          asml.remove(p);
-                          p.free;
-                          p:=hp1;
-                        end;
-                      if GetNextInstruction(p, hp1) and
-                         MatchInstruction(hp1,A_LEA,[S_L]) and
-                         (Taicpu(p).oper[0]^.typ = top_ref) and
-                         (Taicpu(p).oper[1]^.typ = top_reg) and
-                         ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
-                           (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
-                          ) or
-                          (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
-                           (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
-                          )
-                         ) then
-                         {mov reg1,ref
-                          lea reg2,[reg1,reg2]          -->      add reg2,ref}
-                        begin
-                          CopyUsedRegs(TmpUsedRegs);
-                          { reg1 may not be used afterwards }
-                          if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
-                            begin
-                              Taicpu(hp1).opcode:=A_ADD;
-                              Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
-                              DebugMsg('Peephole MovLea2Add done',hp1);
-                              asml.remove(p);
-                              p.free;
-                              p:=hp1;
-                            end;
-                          ReleaseUsedRegs(TmpUsedRegs);
-                        end;
+                      If OptPass1MOV(p) then
+                        Continue;
                     end;
 
                   A_MOVSX,

+ 589 - 2
compiler/x86/aoptx86.pas

@@ -32,7 +32,7 @@ unit aoptx86;
       cpubase,
       aasmtai,
       cgbase,cgutils,
-      aopt;
+      aopt,aoptobj;
 
     type
       TX86AsmOptimizer = class(TAsmOptimizer)
@@ -41,6 +41,14 @@ unit aoptx86;
         procedure PostPeepholeOptMov(const p : tai);
         function OptPass1VMOVAP(var p : tai) : boolean;
         function OptPass1VOP(const p : tai) : boolean;
+
+        function OptPass1MOV(var p : tai) : boolean;
+
+        procedure DebugMsg(const s : string; p : tai);inline;
+
+        procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
+        function IsExitCode(p : tai) : boolean;
+        procedure RemoveLastDeallocForFuncRes(p : tai);
       end;
 
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
@@ -61,9 +69,11 @@ unit aoptx86;
   implementation
 
     uses
+      cutils,
       verbose,
       aasmcpu,
-      aoptobj;
+      procinfo,
+      symconst,symsym;
 
     function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
       begin
@@ -179,6 +189,107 @@ unit aoptx86;
       end;
 
 
+{$ifdef DEBUG_AOPTCPU}
+    procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
+      begin
+        asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+      end;
+{$else DEBUG_AOPTCPU}
+    procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+      begin
+      end;
+{$endif DEBUG_AOPTCPU}
+
+
+    { allocates register reg between (and including) instructions p1 and p2
+      the type of p1 and p2 must not be in SkipInstr
+      note that this routine is both called from the peephole optimizer
+      where optinfo is not yet initialised) and from the cse (where it is)  }
+    procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
+      var
+        hp, start: tai;
+        removedsomething,
+        firstRemovedWasAlloc,
+        lastRemovedWasDealloc: boolean;
+      begin
+{$ifdef EXTDEBUG}
+{        if assigned(p1.optinfo) and
+           (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
+         internalerror(2004101010); }
+{$endif EXTDEBUG}
+        start := p1;
+       if (reg = NR_ESP) or
+          (reg = current_procinfo.framepointer) or
+           not(assigned(p1)) then
+          { this happens with registers which are loaded implicitely, outside the }
+          { current block (e.g. esi with self)                                    }
+          exit;
+        { make sure we allocate it for this instruction }
+        getnextinstruction(p2,p2);
+        lastRemovedWasDealloc := false;
+        removedSomething := false;
+        firstRemovedWasAlloc := false;
+{$ifdef allocregdebug}
+        hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
+          ' from here...'));
+        insertllitem(asml,p1.previous,p1,hp);
+        hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
+          ' till here...'));
+        insertllitem(asml,p2,p2.next,hp);
+{$endif allocregdebug}
+        if not(RegInUsedRegs(reg,initialusedregs)) then
+          begin
+            hp := tai_regalloc.alloc(reg,nil);
+            insertllItem(p1.previous,p1,hp);
+            IncludeRegInUsedRegs(reg,initialusedregs);
+          end;
+        while assigned(p1) and
+              (p1 <> p2) do
+          begin
+            if assigned(p1.optinfo) then
+              internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
+            p1 := tai(p1.next);
+            repeat
+              while assigned(p1) and
+                    (p1.typ in (SkipInstr-[ait_regalloc])) Do
+                p1 := tai(p1.next);
+
+              { remove all allocation/deallocation info about the register in between }
+              if assigned(p1) and
+                 (p1.typ = ait_regalloc) then
+                if tai_regalloc(p1).reg=reg then
+                  begin
+                    if not removedSomething then
+                      begin
+                        firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
+                        removedSomething := true;
+                      end;
+                    lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
+                    hp := tai(p1.Next);
+                    asml.Remove(p1);
+                    p1.free;
+                    p1 := hp;
+                  end
+                else p1 := tai(p1.next);
+            until not(assigned(p1)) or
+                  not(p1.typ in SkipInstr);
+          end;
+        if assigned(p1) then
+          begin
+            if firstRemovedWasAlloc then
+              begin
+                hp := tai_regalloc.Alloc(reg,nil);
+                insertLLItem(start.previous,start,hp);
+              end;
+            if lastRemovedWasDealloc then
+              begin
+                hp := tai_regalloc.DeAlloc(reg,nil);
+                insertLLItem(p1.previous,p1,hp);
+              end;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
       var
         p: taicpu;
@@ -217,6 +328,76 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
+      var
+        hp2,hp3 : tai;
+      begin
+        result:=(p.typ=ait_instruction) and
+        ((taicpu(p).opcode = A_RET) or
+         ((taicpu(p).opcode=A_LEAVE) and
+          GetNextInstruction(p,hp2) and
+          (hp2.typ=ait_instruction) and
+          (taicpu(hp2).opcode=A_RET)
+         ) or
+         ((taicpu(p).opcode=A_MOV) and
+          (taicpu(p).oper[0]^.typ=top_reg) and
+          (taicpu(p).oper[0]^.reg=NR_EBP) and
+          (taicpu(p).oper[1]^.typ=top_reg) and
+          (taicpu(p).oper[1]^.reg=NR_ESP) and
+          GetNextInstruction(p,hp2) and
+          (hp2.typ=ait_instruction) and
+          (taicpu(hp2).opcode=A_POP) and
+          (taicpu(hp2).oper[0]^.typ=top_reg) and
+          (taicpu(hp2).oper[0]^.reg=NR_EBP) and
+          GetNextInstruction(hp2,hp3) and
+          (hp3.typ=ait_instruction) and
+          (taicpu(hp3).opcode=A_RET)
+         )
+        );
+      end;
+
+
+    procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
+
+      procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
+        var
+          hp2: tai;
+        begin
+          hp2 := p;
+          repeat
+            hp2 := tai(hp2.previous);
+            if assigned(hp2) and
+               (hp2.typ = ait_regalloc) and
+               (tai_regalloc(hp2).ratype=ra_dealloc) and
+               (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
+               (getsupreg(tai_regalloc(hp2).reg) = supreg) then
+              begin
+                asml.remove(hp2);
+                hp2.free;
+                break;
+              end;
+          until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
+        end;
+
+      begin
+          case current_procinfo.procdef.returndef.typ of
+            arraydef,recorddef,pointerdef,
+               stringdef,enumdef,procdef,objectdef,errordef,
+               filedef,setdef,procvardef,
+               classrefdef,forwarddef:
+              DoRemoveLastDeallocForFuncRes(RS_EAX);
+            orddef:
+              if current_procinfo.procdef.returndef.size <> 0 then
+                begin
+                  DoRemoveLastDeallocForFuncRes(RS_EAX);
+                  { for int64/qword }
+                  if current_procinfo.procdef.returndef.size = 8 then
+                    DoRemoveLastDeallocForFuncRes(RS_EDX);
+                end;
+          end;
+      end;
+
+
     function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
       var
         TmpUsedRegs : TAllUsedRegs;
@@ -322,6 +503,412 @@ unit aoptx86;
       end;
 
 
+    function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
+      var
+        hp1, hp2: tai;
+        TmpUsedRegs : TAllUsedRegs;
+      begin
+        Result:=false;
+        if (taicpu(p).oper[1]^.typ = top_reg) and
+           (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
+           GetNextInstruction(p, hp1) and
+           MatchInstruction(hp1,A_MOV,[]) and
+           MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
+          begin
+            CopyUsedRegs(TmpUsedRegs);
+            { we have
+                mov x, %treg
+                mov %treg, y
+            }
+            if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
+               not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
+              { we've got
+
+                mov x, %treg
+                mov %treg, y
+
+                with %treg is not used after }
+              case taicpu(p).oper[0]^.typ Of
+                top_reg:
+                  begin
+                    { change
+                        mov %reg, %treg
+                        mov %treg, y
+
+                        to
+
+                        mov %reg, y
+                    }
+                    taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
+                    asml.remove(hp1);
+                    hp1.free;
+                    ReleaseUsedRegs(TmpUsedRegs);
+                    Exit;
+                  end;
+                top_ref:
+                  if (taicpu(hp1).oper[1]^.typ = top_reg) then
+                    begin
+                      { change
+                           mov mem, %treg
+                           mov %treg, %reg
+
+                           to
+
+                           mov mem, %reg"
+                      }
+                      taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
+                      asml.remove(hp1);
+                      hp1.free;
+                      ReleaseUsedRegs(TmpUsedRegs);
+                      Exit;
+                    end;
+              end;
+            ReleaseUsedRegs(TmpUsedRegs);
+          end
+        else
+          { Change
+             mov %reg1, %reg2
+             xxx %reg2, ???
+
+             to
+
+             mov %reg1, %reg2
+             xxx %reg1, ???
+
+             to avoid a write/read penalty
+          }
+          if MatchOpType(taicpu(p),top_reg,top_reg) and
+             GetNextInstruction(p,hp1) and
+             (tai(hp1).typ = ait_instruction) and
+             (taicpu(hp1).ops >= 1) and
+             MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
+            { we have
+
+              mov %reg1, %reg2
+              XXX %reg2, ???
+            }
+            begin
+              if ((taicpu(hp1).opcode = A_OR) or
+                  (taicpu(hp1).opcode = A_TEST)) and
+                 (taicpu(hp1).oper[1]^.typ = top_reg) and
+                 (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
+                {  we have
+
+                   mov %reg1, %reg2
+                   test/or %reg2, %reg2
+                }
+                begin
+                  CopyUsedRegs(TmpUsedRegs);
+                  { reg1 will be used after the first instruction,
+                    so update the allocation info                  }
+                  AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
+                  if GetNextInstruction(hp1, hp2) and
+                     (hp2.typ = ait_instruction) and
+                     taicpu(hp2).is_jmp and
+                     not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
+                      { change
+
+                        mov %reg1, %reg2
+                        test/or %reg2, %reg2
+                        jxx
+
+                        to
+
+                        test %reg1, %reg1
+                        jxx
+                      }
+                      begin
+                        taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
+                        taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
+                        asml.remove(p);
+                        p.free;
+                        p := hp1;
+                        ReleaseUsedRegs(TmpUsedRegs);
+                        Exit;
+                      end
+                    else
+                      { change
+
+                        mov %reg1, %reg2
+                        test/or %reg2, %reg2
+
+                        to
+
+                        mov %reg1, %reg2
+                        test/or %reg1, %reg1
+
+                        }
+                      begin
+                        taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
+                        taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
+                      end;
+                  ReleaseUsedRegs(TmpUsedRegs);
+                end
+            end
+        else
+          { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
+            x >= RetOffset) as it doesn't do anything (it writes either to a
+            parameter or to the temporary storage room for the function
+            result)
+          }
+          if GetNextInstruction(p, hp1) and
+            (tai(hp1).typ = ait_instruction) then
+            begin
+              if IsExitCode(hp1) and
+                MatchOpType(p,top_reg,top_ref) and
+                (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
+                not(assigned(current_procinfo.procdef.funcretsym) and
+                   (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
+                (taicpu(p).oper[1]^.ref^.index = NR_NO) then
+                begin
+                  asml.remove(p);
+                  p.free;
+                  p := hp1;
+                  DebugMsg('Peephole removed deadstore before leave/ret',p);
+                  RemoveLastDeallocForFuncRes(p);
+                end
+              { change
+                  mov reg1, mem1
+                  cmp x, mem1
+
+                  to
+
+                  mov reg1, mem1
+                  cmp x, reg1
+              }
+              else if MatchOpType(p,top_reg,top_ref) and
+                  MatchInstruction(hp1,A_CMP,[taicpu(p).opsize]) and
+                  (taicpu(hp1).oper[1]^.typ = top_ref) and
+                   RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
+                  begin
+                    taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
+                    AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
+                  end;
+            end;
+
+        { Next instruction is also a MOV ? }
+        if GetNextInstruction(p, hp1) and
+          MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
+          begin
+            if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
+               (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
+                {  mov reg1, mem1     or     mov mem1, reg1
+                   mov mem2, reg2            mov reg2, mem2}
+              begin
+                if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
+                  {mov reg1, mem1     or     mov mem1, reg1
+                   mov mem2, reg1            mov reg2, mem1}
+                  begin
+                    if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
+                      { Removes the second statement from
+                        mov reg1, mem1/reg2
+                        mov mem1/reg2, reg1 }
+                      begin
+                        if (taicpu(p).oper[0]^.typ = top_reg) then
+                          AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
+                        asml.remove(hp1);
+                        hp1.free;
+                      end
+                    else
+                      begin
+                        CopyUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
+                        if (taicpu(p).oper[1]^.typ = top_ref) and
+                          { mov reg1, mem1
+                            mov mem2, reg1 }
+                           (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
+                           GetNextInstruction(hp1, hp2) and
+                           MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
+                           OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
+                           OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
+                           not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
+                           { change                   to
+                             mov reg1, mem1           mov reg1, mem1
+                             mov mem2, reg1           cmp reg1, mem2
+                             cmp mem1, reg1
+                           }
+                          begin
+                            asml.remove(hp2);
+                            hp2.free;
+                            taicpu(hp1).opcode := A_CMP;
+                            taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
+                            taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
+                            AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
+                            DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
+                          end;
+                        ReleaseUsedRegs(TmpUsedRegs);
+                      end;
+                  end
+                else
+                  begin
+                    CopyUsedRegs(TmpUsedRegs);
+                    if GetNextInstruction(hp1, hp2) and
+                      MatchOpType(taicpu(p),top_ref,top_reg) and
+                      MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
+                      (taicpu(hp1).oper[1]^.typ = top_ref) and
+                      MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
+                      MatchOpType(taicpu(hp2),top_ref,top_reg) and
+                      RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^)  then
+                      if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
+                         not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
+                        {   mov mem1, %reg1
+                            mov %reg1, mem2
+                            mov mem2, reg2
+                         to:
+                            mov mem1, reg2
+                            mov reg2, mem2}
+                        begin
+                          AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
+                          taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
+                          taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
+                          asml.remove(hp2);
+                          hp2.free;
+                        end
+{$ifdef i386}
+                      { this is enabled for i386 only, as the rules to create the reg sets below
+                        are too complicated for x86-64, so this makes this code too error prone
+                        on x86-64
+                      }
+                      else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
+                        not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
+                        not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
+                        {   mov mem1, reg1         mov mem1, reg1
+                            mov reg1, mem2         mov reg1, mem2
+                            mov mem2, reg2         mov mem2, reg1
+                         to:                    to:
+                            mov mem1, reg1         mov mem1, reg1
+                            mov mem1, reg2         mov reg1, mem2
+                            mov reg1, mem2
+
+                         or (if mem1 depends on reg1
+                      and/or if mem2 depends on reg2)
+                         to:
+                             mov mem1, reg1
+                             mov reg1, mem2
+                             mov reg1, reg2
+                        }
+                        begin
+                          taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
+                          taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
+                          taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
+                          taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
+                          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
+                          if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
+                             (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
+                            AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
+                          if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
+                             (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
+                            AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
+                        end
+                      else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
+                        begin
+                          taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
+                          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
+                        end
+                      else
+                        begin
+                          asml.remove(hp2);
+                          hp2.free;
+                        end
+{$endif i386}
+                        ;
+                    ReleaseUsedRegs(TmpUsedRegs);
+                  end;
+              end
+(*          { movl [mem1],reg1
+              movl [mem1],reg2
+
+              to
+
+              movl [mem1],reg1
+              movl reg1,reg2
+             }
+             else if (taicpu(p).oper[0]^.typ = top_ref) and
+                (taicpu(p).oper[1]^.typ = top_reg) and
+                (taicpu(hp1).oper[0]^.typ = top_ref) and
+                (taicpu(hp1).oper[1]^.typ = top_reg) and
+                (taicpu(p).opsize = taicpu(hp1).opsize) and
+                RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
+                (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
+                (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
+                taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
+              else*)
+
+            {   movl const1,[mem1]
+                movl [mem1],reg1
+
+                to
+
+                movl const1,reg1
+                movl reg1,[mem1]
+            }
+            else if MatchOpType(Taicpu(p),top_const,top_ref) and
+                 MatchOpType(Taicpu(hp1),top_ref,top_reg) and
+                 (taicpu(p).opsize = taicpu(hp1).opsize) and
+                 RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
+                 not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
+              begin
+                allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
+                taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
+                taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
+                taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
+                taicpu(hp1).fileinfo := taicpu(p).fileinfo;
+              end
+          end;
+
+        if GetNextInstruction(p, hp1) and
+          MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
+          GetNextInstruction(hp1, hp2) and
+          MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
+          MatchOperand(Taicpu(p).oper[0]^,0) and
+          (Taicpu(p).oper[1]^.typ = top_reg) and
+          MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
+          MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
+          { mov reg1,0
+            bts reg1,operand1             -->      mov reg1,operand2
+            or  reg1,operand2                      bts reg1,operand1}
+          begin
+            Taicpu(hp2).opcode:=A_MOV;
+            asml.remove(hp1);
+            insertllitem(hp2,hp2.next,hp1);
+            asml.remove(p);
+            p.free;
+            p:=hp1;
+          end;
+
+        if GetNextInstruction(p, hp1) and
+           MatchInstruction(hp1,A_LEA,[S_L]) and
+           MatchOpType(Taicpu(p),top_ref,top_reg) and
+           ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
+             (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
+            ) or
+            (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
+             (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
+            )
+           ) then
+           { mov reg1,ref
+             lea reg2,[reg1,reg2]
+
+             to
+
+             add reg2,ref}
+          begin
+            CopyUsedRegs(TmpUsedRegs);
+            { reg1 may not be used afterwards }
+            if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
+              begin
+                Taicpu(hp1).opcode:=A_ADD;
+                Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
+                DebugMsg('Peephole MovLea2Add done',hp1);
+                asml.remove(p);
+                p.free;
+                p:=hp1;
+              end;
+            ReleaseUsedRegs(TmpUsedRegs);
+          end;
+      end;
+
+
     procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
       begin
        if MatchOperand(taicpu(p).oper[0]^,0) and