aoptx86.pas 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,
  25. cgbase,cgutils,
  26. aopt;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1VMOVAP(var p : tai) : boolean;
  33. end;
  34. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  35. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  36. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  37. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  38. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  39. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  40. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  41. function RefsEqual(const r1, r2: treference): boolean;
  42. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  43. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  44. implementation
  45. uses
  46. verbose,
  47. aasmcpu,
  48. aoptobj;
  49. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  50. begin
  51. result :=
  52. (instr.typ = ait_instruction) and
  53. (taicpu(instr).opcode = op) and
  54. ((opsize = []) or (taicpu(instr).opsize in opsize));
  55. end;
  56. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  57. begin
  58. result :=
  59. (instr.typ = ait_instruction) and
  60. ((taicpu(instr).opcode = op1) or
  61. (taicpu(instr).opcode = op2)
  62. ) and
  63. ((opsize = []) or (taicpu(instr).opsize in opsize));
  64. end;
  65. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  66. begin
  67. result :=
  68. (instr.typ = ait_instruction) and
  69. ((taicpu(instr).opcode = op1) or
  70. (taicpu(instr).opcode = op2) or
  71. (taicpu(instr).opcode = op3)
  72. ) and
  73. ((opsize = []) or (taicpu(instr).opsize in opsize));
  74. end;
  75. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  76. const opsize : topsizes) : boolean;
  77. var
  78. op : TAsmOp;
  79. begin
  80. result:=false;
  81. for op in ops do
  82. begin
  83. if (instr.typ = ait_instruction) and
  84. (taicpu(instr).opcode = op) and
  85. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  86. begin
  87. result:=true;
  88. exit;
  89. end;
  90. end;
  91. end;
  92. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  93. begin
  94. result := (oper.typ = top_reg) and (oper.reg = reg);
  95. end;
  96. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  97. begin
  98. result := (oper.typ = top_const) and (oper.val = a);
  99. end;
  100. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  101. begin
  102. result := oper1.typ = oper2.typ;
  103. if result then
  104. case oper1.typ of
  105. top_const:
  106. Result:=oper1.val = oper2.val;
  107. top_reg:
  108. Result:=oper1.reg = oper2.reg;
  109. top_ref:
  110. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  111. else
  112. internalerror(2013102801);
  113. end
  114. end;
  115. function RefsEqual(const r1, r2: treference): boolean;
  116. begin
  117. RefsEqual :=
  118. (r1.offset = r2.offset) and
  119. (r1.segment = r2.segment) and (r1.base = r2.base) and
  120. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  121. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  122. (r1.relsymbol = r2.relsymbol);
  123. end;
  124. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  125. begin
  126. Result:=(ref.offset=0) and
  127. (ref.scalefactor in [0,1]) and
  128. (ref.segment=NR_NO) and
  129. (ref.symbol=nil) and
  130. (ref.relsymbol=nil) and
  131. ((base=NR_INVALID) or
  132. (ref.base=base)) and
  133. ((index=NR_INVALID) or
  134. (ref.index=index));
  135. end;
  136. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  137. begin
  138. Result:=(taicpu(instr).ops=2) and
  139. (taicpu(instr).oper[0]^.typ=ot0) and
  140. (taicpu(instr).oper[1]^.typ=ot1);
  141. end;
  142. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  143. var
  144. p: taicpu;
  145. begin
  146. if not assigned(hp) or
  147. (hp.typ <> ait_instruction) then
  148. begin
  149. Result := false;
  150. exit;
  151. end;
  152. p := taicpu(hp);
  153. Result :=
  154. (((p.opcode = A_MOV) or
  155. (p.opcode = A_MOVZX) or
  156. (p.opcode = A_MOVSX) or
  157. (p.opcode = A_LEA) or
  158. (p.opcode = A_VMOVSS) or
  159. (p.opcode = A_VMOVSD) or
  160. (p.opcode = A_VMOVAPD) or
  161. (p.opcode = A_VMOVAPS) or
  162. (p.opcode = A_VMOVQ) or
  163. (p.opcode = A_MOVSS) or
  164. (p.opcode = A_MOVSD) or
  165. (p.opcode = A_MOVQ) or
  166. (p.opcode = A_MOVAPD) or
  167. (p.opcode = A_MOVAPS)) and
  168. (p.oper[1]^.typ = top_reg) and
  169. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  170. ((p.oper[0]^.typ = top_const) or
  171. ((p.oper[0]^.typ = top_reg) and
  172. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  173. ((p.oper[0]^.typ = top_ref) and
  174. not RegInRef(reg,p.oper[0]^.ref^)))) or
  175. ((p.opcode = A_POP) and
  176. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  177. end;
  178. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  179. var
  180. TmpUsedRegs : TAllUsedRegs;
  181. hp1,hp2 : tai;
  182. begin
  183. result:=false;
  184. if MatchOpType(taicpu(p),top_reg,top_reg) then
  185. begin
  186. { vmova* reg1,reg1
  187. =>
  188. <nop> }
  189. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  190. begin
  191. GetNextInstruction(p,hp1);
  192. asml.Remove(p);
  193. p.Free;
  194. p:=hp1;
  195. result:=true;
  196. end
  197. else if GetNextInstruction(p,hp1) then
  198. begin
  199. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  200. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  201. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  202. begin
  203. { vmova* reg1,reg2
  204. vmova* reg2,reg3
  205. dealloc reg2
  206. =>
  207. vmova* reg1,reg3 }
  208. CopyUsedRegs(TmpUsedRegs);
  209. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  210. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  211. begin
  212. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  213. asml.Remove(hp1);
  214. hp1.Free;
  215. result:=true;
  216. end
  217. { special case:
  218. vmova* reg1,reg2
  219. vmova* reg2,reg1
  220. =>
  221. vmova* reg1,reg2 }
  222. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  223. begin
  224. asml.Remove(hp1);
  225. hp1.Free;
  226. result:=true;
  227. end
  228. end
  229. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  230. { we mix single and double opperations here because we assume that the compiler
  231. generates vmovapd only after double operations and vmovaps only after single operations }
  232. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  233. GetNextInstruction(hp1,hp2) and
  234. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  235. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  236. begin
  237. CopyUsedRegs(TmpUsedRegs);
  238. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  239. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  240. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  241. then
  242. begin
  243. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  244. asml.Remove(p);
  245. p.Free;
  246. asml.Remove(hp2);
  247. hp2.Free;
  248. p:=hp1;
  249. end;
  250. end;
  251. end;
  252. end;
  253. end;
  254. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  255. begin
  256. if MatchOperand(taicpu(p).oper[0]^,0) and
  257. (taicpu(p).oper[1]^.typ = Top_Reg) and
  258. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  259. { change "mov $0, %reg" into "xor %reg, %reg" }
  260. begin
  261. taicpu(p).opcode := A_XOR;
  262. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  263. end;
  264. end;
  265. end.