|
@@ -68,6 +68,7 @@ unit aoptx86;
|
|
|
function OptPass1OP(const p : tai) : boolean;
|
|
|
function OptPass1LEA(var p : tai) : boolean;
|
|
|
function OptPass1Sub(var p : tai) : boolean;
|
|
|
+ function OptPass1SHLSAL(var p : tai) : boolean;
|
|
|
|
|
|
function OptPass2MOV(var p : tai) : boolean;
|
|
|
function OptPass2Imul(var p : tai) : boolean;
|
|
@@ -238,6 +239,25 @@ unit aoptx86;
|
|
|
end;
|
|
|
|
|
|
|
|
|
+ function InstrReadsFlags(p: tai): boolean;
|
|
|
+ var
|
|
|
+ l: longint;
|
|
|
+ begin
|
|
|
+ InstrReadsFlags := true;
|
|
|
+ case p.typ of
|
|
|
+ ait_instruction:
|
|
|
+ if InsProp[taicpu(p).opcode].Ch*
|
|
|
+ [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
|
|
|
+ Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
|
|
|
+ Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
|
|
|
+ exit;
|
|
|
+ ait_label:
|
|
|
+ exit;
|
|
|
+ end;
|
|
|
+ InstrReadsFlags := false;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
|
|
|
begin
|
|
|
Result:=RegReadByInstruction(reg,hp);
|
|
@@ -2054,6 +2074,125 @@ unit aoptx86;
|
|
|
end;
|
|
|
|
|
|
|
|
|
+ function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
|
|
|
+ var
|
|
|
+ TmpBool1,TmpBool2 : Boolean;
|
|
|
+ tmpref : treference;
|
|
|
+ hp1,hp2: tai;
|
|
|
+ begin
|
|
|
+ Result:=false;
|
|
|
+ if MatchOpType(taicpu(p),top_const,top_reg) and
|
|
|
+ (taicpu(p).opsize = S_L) and
|
|
|
+ (taicpu(p).oper[0]^.val <= 3) then
|
|
|
+ { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
|
|
|
+ begin
|
|
|
+ { should we check the next instruction? }
|
|
|
+ TmpBool1 := True;
|
|
|
+ { have we found an add/sub which could be
|
|
|
+ integrated in the lea? }
|
|
|
+ TmpBool2 := False;
|
|
|
+ reference_reset(tmpref,2,[]);
|
|
|
+ TmpRef.index := taicpu(p).oper[1]^.reg;
|
|
|
+ TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
|
|
|
+ while TmpBool1 and
|
|
|
+ GetNextInstruction(p, hp1) and
|
|
|
+ (tai(hp1).typ = ait_instruction) and
|
|
|
+ ((((taicpu(hp1).opcode = A_ADD) or
|
|
|
+ (taicpu(hp1).opcode = A_SUB)) and
|
|
|
+ (taicpu(hp1).oper[1]^.typ = Top_Reg) and
|
|
|
+ (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
|
|
|
+ (((taicpu(hp1).opcode = A_INC) or
|
|
|
+ (taicpu(hp1).opcode = A_DEC)) and
|
|
|
+ (taicpu(hp1).oper[0]^.typ = Top_Reg) and
|
|
|
+ (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
|
|
|
+ (not GetNextInstruction(hp1,hp2) or
|
|
|
+ not instrReadsFlags(hp2)) Do
|
|
|
+ begin
|
|
|
+ TmpBool1 := False;
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = Top_Const) then
|
|
|
+ begin
|
|
|
+ TmpBool1 := True;
|
|
|
+ TmpBool2 := True;
|
|
|
+ case taicpu(hp1).opcode of
|
|
|
+ A_ADD:
|
|
|
+ inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
|
|
|
+ A_SUB:
|
|
|
+ dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
|
|
|
+ end;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end
|
|
|
+ else
|
|
|
+ if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
|
|
|
+ (((taicpu(hp1).opcode = A_ADD) and
|
|
|
+ (TmpRef.base = NR_NO)) or
|
|
|
+ (taicpu(hp1).opcode = A_INC) or
|
|
|
+ (taicpu(hp1).opcode = A_DEC)) then
|
|
|
+ begin
|
|
|
+ TmpBool1 := True;
|
|
|
+ TmpBool2 := True;
|
|
|
+ case taicpu(hp1).opcode of
|
|
|
+ A_ADD:
|
|
|
+ TmpRef.base := taicpu(hp1).oper[0]^.reg;
|
|
|
+ A_INC:
|
|
|
+ inc(TmpRef.offset);
|
|
|
+ A_DEC:
|
|
|
+ dec(TmpRef.offset);
|
|
|
+ end;
|
|
|
+ asml.remove(hp1);
|
|
|
+ hp1.free;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ if TmpBool2 or
|
|
|
+ ((current_settings.optimizecputype < cpu_Pentium2) and
|
|
|
+ (taicpu(p).oper[0]^.val <= 3) and
|
|
|
+ not(cs_opt_size in current_settings.optimizerswitches)) then
|
|
|
+ begin
|
|
|
+ if not(TmpBool2) and
|
|
|
+ (taicpu(p).oper[0]^.val = 1) then
|
|
|
+ begin
|
|
|
+ hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
|
|
|
+ taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
|
|
|
+ end
|
|
|
+ else
|
|
|
+ hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
|
|
|
+ taicpu(p).oper[1]^.reg);
|
|
|
+ InsertLLItem(p.previous, p.next, hp1);
|
|
|
+ p.free;
|
|
|
+ p := hp1;
|
|
|
+ end;
|
|
|
+ end
|
|
|
+ else if (current_settings.optimizecputype < cpu_Pentium2) and
|
|
|
+ MatchOpType(taicpu(p),top_const,top_reg) then
|
|
|
+ begin
|
|
|
+ { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
|
|
|
+ but faster on a 486, and Tairable in both U and V pipes on the Pentium
|
|
|
+ (unlike shl, which is only Tairable in the U pipe) }
|
|
|
+ if taicpu(p).oper[0]^.val=1 then
|
|
|
+ begin
|
|
|
+ hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
|
|
|
+ taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
|
|
|
+ InsertLLItem(p.previous, p.next, hp1);
|
|
|
+ p.free;
|
|
|
+ p := hp1;
|
|
|
+ end
|
|
|
+ { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
|
|
|
+ "shl $3, %reg" to "lea (,%reg,8), %reg }
|
|
|
+ else if (taicpu(p).opsize = S_L) and
|
|
|
+ (taicpu(p).oper[0]^.val<= 3) then
|
|
|
+ begin
|
|
|
+ reference_reset(tmpref,2,[]);
|
|
|
+ TmpRef.index := taicpu(p).oper[1]^.reg;
|
|
|
+ TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
|
|
|
+ hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
|
|
|
+ InsertLLItem(p.previous, p.next, hp1);
|
|
|
+ p.free;
|
|
|
+ p := hp1;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+ end;
|
|
|
+
|
|
|
+
|
|
|
function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
|
|
|
var
|
|
|
TmpUsedRegs : TAllUsedRegs;
|