| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411 | {    Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe    This unit contains the peephole optimizer for i386    This program is free software; you can redistribute it and/or modify    it under the terms of the GNU General Public License as published by    the Free Software Foundation; either version 2 of the License, or    (at your option) any later version.    This program is distributed in the hope that it will be useful,    but WITHOUT ANY WARRANTY; without even the implied warranty of    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the    GNU General Public License for more details.    You should have received a copy of the GNU General Public License    along with this program; if not, write to the Free Software    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ****************************************************************************}unit aoptcpu;{$i fpcdefs.inc}{$ifdef EXTDEBUG}{$define DEBUG_AOPTCPU}{$endif EXTDEBUG}  Interface    uses      cgbase,      cpubase, aopt, aoptx86,      Aasmbase,aasmtai,aasmdata;    Type      TCpuAsmOptimizer = class(TX86AsmOptimizer)        function PrePeepHoleOptsCpu(var p: tai): boolean; override;        function PeepHoleOptPass1Cpu(var p: tai): boolean; override;        function PeepHoleOptPass2Cpu(var p: tai): boolean; override;        function PostPeepHoleOptsCpu(var p : tai) : boolean; override;      end;    Var      AsmOptimizer : TCpuAsmOptimizer;  Implementation    uses      verbose,globtype,globals,      cpuinfo,      aasmcpu,      aoptutils,      aasmcfi,      procinfo,      cgutils,      { units we should get rid off: }      symsym,symconst;    { Checks if the register is a 32 bit general purpose register }    function isgp32reg(reg: TRegister): boolean;      begin        {$push}{$warnings off}        isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);        {$pop}      end;    { returns true if p contains a memory operand with a segment set }    function InsContainsSegRef(p: taicpu): boolean;      var        i: longint;      begin        result:=true;        for i:=0 to p.opercnt-1 do          if (p.oper[i]^.typ=top_ref) and             (p.oper[i]^.ref^.segment<>NR_NO) then            exit;        result:=false;      end;    function TCPUAsmOPtimizer.PrePeepHoleOptsCpu(var p: tai): boolean;      begin        repeat          Result:=False;          case p.typ of            ait_instruction:              begin                if InsContainsSegRef(taicpu(p)) then                  begin                    p := tai(p.next);                    { Nothing's actually changed, so no need to set Result to True,                      but try again to see if an instruction immediately follows }                    Continue;                  end;                case taicpu(p).opcode Of                  A_IMUL:                    Result:=PrePeepholeOptIMUL(p);                  A_SAR,A_SHR:                    Result:=PrePeepholeOptSxx(p);                  A_AND:                    Result:=PrePeepholeOptAND(p);                  A_XOR:                    begin                      if (taicpu(p).oper[0]^.typ = top_reg) and                         (taicpu(p).oper[1]^.typ = top_reg) and                         (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then                       { temporarily change this to 'mov reg,0' to make it easier }                       { for the CSE. Will be changed back in pass 2              }                        begin                          taicpu(p).opcode := A_MOV;                          taicpu(p).loadConst(0,0);                          Result:=true;                        end;                    end;                  else                    { Do nothing };                end;            end;          else            { Do nothing };          end;          Break;        until False;      end;    function TCPUAsmOPtimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;      var        hp1 : tai;      begin        result:=False;        case p.Typ Of          ait_instruction:            begin              current_filepos:=taicpu(p).fileinfo;              if InsContainsSegRef(taicpu(p)) then                exit;              case taicpu(p).opcode Of                A_ADD:                  Result:=OptPass1ADD(p);                A_AND:                  Result:=OptPass1And(p);                A_IMUL:                  Result:=OptPass1Imul(p);                A_CMP:                  Result:=OptPass1Cmp(p);                A_VPXORD,                A_VPXORQ,                A_VXORPS,                A_VXORPD,                A_VPXOR:                  Result:=OptPass1VPXor(p);                A_XORPS,                A_XORPD,                A_PXOR:                  Result:=OptPass1PXor(p);                A_FLD:                  Result:=OptPass1FLD(p);                A_FSTP,A_FISTP:                  Result:=OptPass1FSTP(p);                A_LEA:                  Result:=OptPass1LEA(p);                A_MOV:                  Result:=OptPass1MOV(p);                A_MOVSX,                A_MOVZX :                  Result:=OptPass1Movx(p);                A_TEST:                  Result:=OptPass1Test(p);                A_PUSH:                  begin                    if (taicpu(p).opsize = S_W) and                       (taicpu(p).oper[0]^.typ = Top_Const) and                       GetNextInstruction(p, hp1) and                       (tai(hp1).typ = ait_instruction) and                       (taicpu(hp1).opcode = A_PUSH) and                       (taicpu(hp1).oper[0]^.typ = Top_Const) and                       (taicpu(hp1).opsize = S_W) then                      begin                        taicpu(p).changeopsize(S_L);                        taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));                        asml.remove(hp1);                        hp1.free;                        Result:=true;                      end;                  end;                A_SHL, A_SAL:                  Result:=OptPass1SHLSAL(p);                A_SHR:                  Result:=OptPass1SHR(p);                A_SUB:                  Result:=OptPass1Sub(p);                A_Jcc:                  Result:=OptPass1Jcc(p);                A_MOVDQA,                A_MOVAPD,                A_MOVAPS,                A_MOVUPD,                A_MOVUPS,                A_VMOVAPS,                A_VMOVAPD,                A_VMOVUPS,                A_VMOVUPD:                  Result:=OptPass1_V_MOVAP(p);                A_VDIVSD,                A_VDIVSS,                A_VSUBSD,                A_VSUBSS,                A_VMULSD,                A_VMULSS,                A_VADDSD,                A_VADDSS,                A_VANDPD,                A_VANDPS,                A_VORPD,                A_VORPS:                  Result:=OptPass1VOP(p);                A_MULSD,                A_MULSS,                A_ADDSD,                A_ADDSS:                  Result:=OptPass1OP(p);                A_VMOVSD,                A_VMOVSS,                A_MOVSD,                A_MOVSS:                  Result:=OptPass1MOVXX(p);                A_SHRX,                A_SHLX:                  Result:=OptPass1SHXX(p);                A_VMOVDQA,                A_VMOVDQU:                  Result:=OptPass1VMOVDQ(p);                A_VCVTSS2SD,                A_CVTSS2SD:                  Result:=OptPass1_V_Cvtss2sd(p);                else                  ;              end;            end;          else            ;        end;        { If this flag is set, force another run of pass 1 even if p wasn't          changed }        if aoc_ForceNewIteration in OptsToCheck then          begin            Exclude(OptsToCheck, aoc_ForceNewIteration);            if not Result then              begin                if (p.typ in SkipInstr) then                  UpdateUsedRegs(p);                p := tai(p.Next);                Result := True;              end;          end;      end;    function TCPUAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;      begin        Result:=false;        case p.Typ Of          Ait_Instruction:            begin              if InsContainsSegRef(taicpu(p)) then                exit;              case taicpu(p).opcode Of                A_ADD:                  Result:=OptPass2ADD(p);                A_Jcc:                  Result:=OptPass2Jcc(p);                A_Lea:                  Result:=OptPass2Lea(p);                A_FSTP,A_FISTP:                  Result:=OptPass1FSTP(p);                A_IMUL:                  Result:=OptPass2Imul(p);                A_JMP:                  Result:=OptPass2Jmp(p);                A_MOV:                  Result:=OptPass2MOV(p);                A_MOVZX:                  Result:=OptPass2Movx(p);                A_SUB:                  Result:=OptPass2SUB(p);                A_SETcc:                  Result:=OptPass2SETcc(p);                else                  ;              end;            end;          else            ;        end;      end;    function TCPUAsmOptimizer.PostPeepHoleOptsCpu(var p : tai) : boolean;      var        hp1: tai;      begin        Result:=false;        case p.Typ Of          Ait_Instruction:            begin              if InsContainsSegRef(taicpu(p)) then                Exit;              case taicpu(p).opcode Of                A_CALL:                  Result:=PostPeepHoleOptCall(p);                A_LEA:                  Result:=PostPeepholeOptLea(p);                A_CMP:                  Result:=PostPeepholeOptCmp(p);                A_MOV:                  Result:=PostPeepholeOptMov(p);                A_MOVZX:                  { if register vars are on, it's possible there is code like }                  {   "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx"           }                  { so we can't safely replace the movzx then with xor/mov,   }                  { since that would change the flags (JM)                    }                  if PostPeepholeOptMovzx(p) then                    Result := True                  else if not(cs_opt_regvar in current_settings.optimizerswitches) then                    begin                      if (taicpu(p).oper[1]^.typ = top_reg) then                        if (taicpu(p).oper[0]^.typ = top_reg)                          then                            case taicpu(p).opsize of                              S_BL:                                begin                                  if IsGP32Reg(taicpu(p).oper[1]^.reg) and                                     not(cs_opt_size in current_settings.optimizerswitches) and                                     (current_settings.optimizecputype = cpu_Pentium) then                                      {Change "movzbl %reg1, %reg2" to                                       "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and                                       PentiumMMX}                                    begin                                      hp1 := taicpu.op_reg_reg(A_XOR, S_L,                                                  taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);                                      InsertLLItem(p.previous, p, hp1);                                      taicpu(p).opcode := A_MOV;                                      taicpu(p).changeopsize(S_B);                                      setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);                                      Result := True;                                    end;                                end;                              else                                ;                            end                          else if (taicpu(p).oper[0]^.typ = top_ref) and                              (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and                              (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and                              not(cs_opt_size in current_settings.optimizerswitches) and                              IsGP32Reg(taicpu(p).oper[1]^.reg) and                              (current_settings.optimizecputype = cpu_Pentium) and                              (taicpu(p).opsize = S_BL) then                            {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for                              Pentium and PentiumMMX}                            begin                              hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,                                          taicpu(p).oper[1]^.reg);                              taicpu(p).opcode := A_MOV;                              taicpu(p).changeopsize(S_B);                              setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);                              InsertLLItem(p.previous, p, hp1);                              Result := True;                            end;                   end;                A_TEST, A_OR:                  Result:=PostPeepholeOptTestOr(p);                A_AND:                  Result:=PostPeepholeOptAnd(p);                A_MOVSX:                  Result:=PostPeepholeOptMOVSX(p);                A_SHR:                  Result:=PostPeepholeOptShr(p);                A_ADD,                A_SUB:                  Result:=PostPeepholeOptADDSUB(p);                A_XOR:                  Result:=PostPeepholeOptXor(p);                A_VPXOR:                  Result:=PostPeepholeOptVPXOR(p);                else                  ;              end;              { Optimise any reference-type operands (if Result is True, the                instruction will be checked on the next iteration) }              if not Result then                OptimizeRefs(taicpu(p));            end;          else            ;        end;      end;begin  casmoptimizer:=TCpuAsmOptimizer;end.
 |