aoptx86.pas 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,
  25. cgbase,cgutils,
  26. aopt;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. protected
  31. procedure PostPeepholeOptMov(const p : tai);
  32. function OptPass1VMOVAP(var p : tai) : boolean;
  33. function OptPass1VOP(const p : tai) : boolean;
  34. end;
  35. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  36. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  37. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  38. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  39. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  40. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  41. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  42. function RefsEqual(const r1, r2: treference): boolean;
  43. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  44. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  45. implementation
  46. uses
  47. verbose,
  48. aasmcpu,
  49. aoptobj;
  50. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  51. begin
  52. result :=
  53. (instr.typ = ait_instruction) and
  54. (taicpu(instr).opcode = op) and
  55. ((opsize = []) or (taicpu(instr).opsize in opsize));
  56. end;
  57. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  58. begin
  59. result :=
  60. (instr.typ = ait_instruction) and
  61. ((taicpu(instr).opcode = op1) or
  62. (taicpu(instr).opcode = op2)
  63. ) and
  64. ((opsize = []) or (taicpu(instr).opsize in opsize));
  65. end;
  66. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  67. begin
  68. result :=
  69. (instr.typ = ait_instruction) and
  70. ((taicpu(instr).opcode = op1) or
  71. (taicpu(instr).opcode = op2) or
  72. (taicpu(instr).opcode = op3)
  73. ) and
  74. ((opsize = []) or (taicpu(instr).opsize in opsize));
  75. end;
  76. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  77. const opsize : topsizes) : boolean;
  78. var
  79. op : TAsmOp;
  80. begin
  81. result:=false;
  82. for op in ops do
  83. begin
  84. if (instr.typ = ait_instruction) and
  85. (taicpu(instr).opcode = op) and
  86. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  87. begin
  88. result:=true;
  89. exit;
  90. end;
  91. end;
  92. end;
  93. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  94. begin
  95. result := (oper.typ = top_reg) and (oper.reg = reg);
  96. end;
  97. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  98. begin
  99. result := (oper.typ = top_const) and (oper.val = a);
  100. end;
  101. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  102. begin
  103. result := oper1.typ = oper2.typ;
  104. if result then
  105. case oper1.typ of
  106. top_const:
  107. Result:=oper1.val = oper2.val;
  108. top_reg:
  109. Result:=oper1.reg = oper2.reg;
  110. top_ref:
  111. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  112. else
  113. internalerror(2013102801);
  114. end
  115. end;
  116. function RefsEqual(const r1, r2: treference): boolean;
  117. begin
  118. RefsEqual :=
  119. (r1.offset = r2.offset) and
  120. (r1.segment = r2.segment) and (r1.base = r2.base) and
  121. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  122. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  123. (r1.relsymbol = r2.relsymbol);
  124. end;
  125. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  126. begin
  127. Result:=(ref.offset=0) and
  128. (ref.scalefactor in [0,1]) and
  129. (ref.segment=NR_NO) and
  130. (ref.symbol=nil) and
  131. (ref.relsymbol=nil) and
  132. ((base=NR_INVALID) or
  133. (ref.base=base)) and
  134. ((index=NR_INVALID) or
  135. (ref.index=index));
  136. end;
  137. function MatchOpType(const instr : tai;ot0,ot1 : toptype) : Boolean;
  138. begin
  139. Result:=(taicpu(instr).ops=2) and
  140. (taicpu(instr).oper[0]^.typ=ot0) and
  141. (taicpu(instr).oper[1]^.typ=ot1);
  142. end;
  143. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  144. var
  145. p: taicpu;
  146. begin
  147. if not assigned(hp) or
  148. (hp.typ <> ait_instruction) then
  149. begin
  150. Result := false;
  151. exit;
  152. end;
  153. p := taicpu(hp);
  154. Result :=
  155. (((p.opcode = A_MOV) or
  156. (p.opcode = A_MOVZX) or
  157. (p.opcode = A_MOVSX) or
  158. (p.opcode = A_LEA) or
  159. (p.opcode = A_VMOVSS) or
  160. (p.opcode = A_VMOVSD) or
  161. (p.opcode = A_VMOVAPD) or
  162. (p.opcode = A_VMOVAPS) or
  163. (p.opcode = A_VMOVQ) or
  164. (p.opcode = A_MOVSS) or
  165. (p.opcode = A_MOVSD) or
  166. (p.opcode = A_MOVQ) or
  167. (p.opcode = A_MOVAPD) or
  168. (p.opcode = A_MOVAPS)) and
  169. (p.oper[1]^.typ = top_reg) and
  170. (getsupreg(p.oper[1]^.reg) = getsupreg(reg)) and
  171. ((p.oper[0]^.typ = top_const) or
  172. ((p.oper[0]^.typ = top_reg) and
  173. (getsupreg(p.oper[0]^.reg) <> getsupreg(reg))) or
  174. ((p.oper[0]^.typ = top_ref) and
  175. not RegInRef(reg,p.oper[0]^.ref^)))) or
  176. ((p.opcode = A_POP) and
  177. (getsupreg(p.oper[0]^.reg) = getsupreg(reg)));
  178. end;
  179. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  180. var
  181. TmpUsedRegs : TAllUsedRegs;
  182. hp1,hp2 : tai;
  183. begin
  184. result:=false;
  185. if MatchOpType(taicpu(p),top_reg,top_reg) then
  186. begin
  187. { vmova* reg1,reg1
  188. =>
  189. <nop> }
  190. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  191. begin
  192. GetNextInstruction(p,hp1);
  193. asml.Remove(p);
  194. p.Free;
  195. p:=hp1;
  196. result:=true;
  197. end
  198. else if GetNextInstruction(p,hp1) then
  199. begin
  200. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  201. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  202. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  203. begin
  204. { vmova* reg1,reg2
  205. vmova* reg2,reg3
  206. dealloc reg2
  207. =>
  208. vmova* reg1,reg3 }
  209. CopyUsedRegs(TmpUsedRegs);
  210. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  211. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  212. begin
  213. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  214. asml.Remove(hp1);
  215. hp1.Free;
  216. result:=true;
  217. end
  218. { special case:
  219. vmova* reg1,reg2
  220. vmova* reg2,reg1
  221. =>
  222. vmova* reg1,reg2 }
  223. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  224. begin
  225. asml.Remove(hp1);
  226. hp1.Free;
  227. result:=true;
  228. end
  229. end
  230. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  231. { we mix single and double opperations here because we assume that the compiler
  232. generates vmovapd only after double operations and vmovaps only after single operations }
  233. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  234. GetNextInstruction(hp1,hp2) and
  235. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  236. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  237. begin
  238. CopyUsedRegs(TmpUsedRegs);
  239. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  240. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  241. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  242. then
  243. begin
  244. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  245. asml.Remove(p);
  246. p.Free;
  247. asml.Remove(hp2);
  248. hp2.Free;
  249. p:=hp1;
  250. end;
  251. end;
  252. end;
  253. end;
  254. end;
  255. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  256. var
  257. TmpUsedRegs : TAllUsedRegs;
  258. hp1 : tai;
  259. begin
  260. result:=false;
  261. if GetNextInstruction(p,hp1) and
  262. { we mix single and double opperations here because we assume that the compiler
  263. generates vmovapd only after double operations and vmovaps only after single operations }
  264. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  265. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  266. (taicpu(hp1).oper[1]^.typ=top_reg) then
  267. begin
  268. CopyUsedRegs(TmpUsedRegs);
  269. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  270. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  271. ) then
  272. begin
  273. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  274. asml.Remove(hp1);
  275. hp1.Free;
  276. result:=true;
  277. end;
  278. end;
  279. end;
  280. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  281. begin
  282. if MatchOperand(taicpu(p).oper[0]^,0) and
  283. (taicpu(p).oper[1]^.typ = Top_Reg) and
  284. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  285. { change "mov $0, %reg" into "xor %reg, %reg" }
  286. begin
  287. taicpu(p).opcode := A_XOR;
  288. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  289. end;
  290. end;
  291. end.