aoptcpu.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cgbase, cpubase, aasmtai, aopt, aoptx86, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  26. function PostPeepHoleOptsCpu(var p : tai) : boolean; override;
  27. end;
  28. implementation
  29. uses
  30. globtype, globals,
  31. cutils,
  32. verbose,
  33. cgutils,
  34. aoptobj,
  35. aasmbase, aasmdata, aasmcpu,
  36. itcpugas;
  37. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  38. var
  39. hp1, hp2: tai;
  40. begin
  41. Result := False;
  42. case p.typ of
  43. ait_instruction:
  44. begin
  45. case taicpu(p).opcode of
  46. A_AND:
  47. Result:=OptPass1AND(p);
  48. A_MOV:
  49. Result:=OptPass1MOV(p);
  50. A_MOVSX,
  51. A_MOVZX:
  52. begin
  53. if (taicpu(p).oper[1]^.typ = top_reg) and
  54. GetNextInstruction(p, hp1) and
  55. (hp1.typ = ait_instruction) and
  56. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  57. GetNextInstruction(hp1, hp2) and
  58. (hp2.typ = ait_instruction) and
  59. (taicpu(hp2).opcode = A_MOV) and
  60. (taicpu(hp2).oper[0]^.typ = top_reg) and
  61. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  62. { change movsX/movzX reg/ref, reg2 }
  63. { add/sub/or/... reg3/$const, reg2 }
  64. { mov reg2 reg/ref }
  65. { to add/sub/or/... reg3/$const, reg/ref }
  66. begin
  67. { by example:
  68. movswl %si,%eax movswl %si,%eax p
  69. decl %eax addl %edx,%eax hp1
  70. movw %ax,%si movw %ax,%si hp2
  71. ->
  72. movswl %si,%eax movswl %si,%eax p
  73. decw %eax addw %edx,%eax hp1
  74. movw %ax,%si movw %ax,%si hp2
  75. }
  76. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  77. {
  78. ->
  79. movswl %si,%eax movswl %si,%eax p
  80. decw %si addw %dx,%si hp1
  81. movw %ax,%si movw %ax,%si hp2
  82. }
  83. case taicpu(hp1).ops of
  84. 1:
  85. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  86. 2:
  87. begin
  88. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  89. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  90. setsubreg(taicpu(hp1).oper[0]^.reg,
  91. getsubreg(taicpu(hp2).oper[0]^.reg));
  92. end;
  93. else
  94. internalerror(2008042701);
  95. end;
  96. {
  97. ->
  98. decw %si addw %dx,%si p
  99. }
  100. if (cs_asm_source in current_settings.globalswitches) then
  101. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  102. asml.remove(p);
  103. asml.remove(hp2);
  104. p.Free;
  105. hp2.Free;
  106. p := hp1;
  107. end
  108. { removes superfluous And's after movzx's }
  109. else if taicpu(p).opcode = A_MOVZX then
  110. begin
  111. if (taicpu(p).oper[1]^.typ = top_reg) and
  112. GetNextInstruction(p, hp1) and
  113. (tai(hp1).typ = ait_instruction) and
  114. (taicpu(hp1).opcode = A_AND) and
  115. (taicpu(hp1).oper[0]^.typ = top_const) and
  116. (taicpu(hp1).oper[1]^.typ = top_reg) and
  117. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  118. begin
  119. case taicpu(p).opsize of
  120. S_BL, S_BW, S_BQ:
  121. if (taicpu(hp1).oper[0]^.val = $ff) then
  122. begin
  123. if (cs_asm_source in current_settings.globalswitches) then
  124. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  125. asml.remove(hp1);
  126. hp1.Free;
  127. end;
  128. S_WL, S_WQ:
  129. if (taicpu(hp1).oper[0]^.val = $ffff) then
  130. begin
  131. if (cs_asm_source in current_settings.globalswitches) then
  132. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  133. asml.remove(hp1);
  134. hp1.Free;
  135. end;
  136. S_LQ:
  137. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  138. begin
  139. if (cs_asm_source in current_settings.globalswitches) then
  140. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  141. asml.remove(hp1);
  142. hp1.Free;
  143. end;
  144. end;
  145. end;
  146. { changes some movzx constructs to faster synonims (all examples
  147. are given with eax/ax, but are also valid for other registers)}
  148. if (taicpu(p).oper[1]^.typ = top_reg) then
  149. if (taicpu(p).oper[0]^.typ = top_reg) then
  150. case taicpu(p).opsize of
  151. S_BW:
  152. begin
  153. if (getsupreg(taicpu(p).oper[0]^.reg) =
  154. getsupreg(taicpu(p).oper[1]^.reg)) and not
  155. (cs_opt_size in current_settings.optimizerswitches) then
  156. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  157. begin
  158. taicpu(p).opcode := A_AND;
  159. taicpu(p).changeopsize(S_W);
  160. taicpu(p).loadConst(0, $ff);
  161. if (cs_asm_source in current_settings.globalswitches) then
  162. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  163. end
  164. else if GetNextInstruction(p, hp1) and
  165. (tai(hp1).typ = ait_instruction) and
  166. (taicpu(hp1).opcode = A_AND) and
  167. (taicpu(hp1).oper[0]^.typ = top_const) and
  168. (taicpu(hp1).oper[1]^.typ = top_reg) and
  169. (taicpu(hp1).oper[1]^.reg =
  170. taicpu(p).oper[1]^.reg) then
  171. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  172. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  173. begin
  174. if (cs_asm_source in current_settings.globalswitches) then
  175. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  176. taicpu(p).opcode := A_MOV;
  177. taicpu(p).changeopsize(S_W);
  178. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  179. taicpu(hp1).loadConst(
  180. 0, taicpu(hp1).oper[0]^.val and $ff);
  181. end;
  182. end;
  183. S_BL:
  184. begin
  185. if (getsupreg(taicpu(p).oper[0]^.reg) =
  186. getsupreg(taicpu(p).oper[1]^.reg)) and not
  187. (cs_opt_size in current_settings.optimizerswitches) then
  188. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  189. begin
  190. if (cs_asm_source in current_settings.globalswitches) then
  191. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  192. taicpu(p).opcode := A_AND;
  193. taicpu(p).changeopsize(S_L);
  194. taicpu(p).loadConst(0, $ff);
  195. end
  196. else if GetNextInstruction(p, hp1) and
  197. (tai(hp1).typ = ait_instruction) and
  198. (taicpu(hp1).opcode = A_AND) and
  199. (taicpu(hp1).oper[0]^.typ = top_const) and
  200. (taicpu(hp1).oper[1]^.typ = top_reg) and
  201. (taicpu(hp1).oper[1]^.reg =
  202. taicpu(p).oper[1]^.reg) then
  203. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  204. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  205. begin
  206. if (cs_asm_source in current_settings.globalswitches) then
  207. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  208. taicpu(p).opcode := A_MOV;
  209. taicpu(p).changeopsize(S_L);
  210. { do not use R_SUBWHOLE
  211. as movl %rdx,%eax
  212. is invalid in assembler PM }
  213. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  214. taicpu(hp1).loadConst(
  215. 0, taicpu(hp1).oper[0]^.val and $ff);
  216. end;
  217. end;
  218. S_WL:
  219. begin
  220. if (getsupreg(taicpu(p).oper[0]^.reg) =
  221. getsupreg(taicpu(p).oper[1]^.reg)) and not
  222. (cs_opt_size in current_settings.optimizerswitches) then
  223. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  224. begin
  225. if (cs_asm_source in current_settings.globalswitches) then
  226. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  227. taicpu(p).opcode := A_AND;
  228. taicpu(p).changeopsize(S_L);
  229. taicpu(p).loadConst(0, $ffff);
  230. end
  231. else if GetNextInstruction(p, hp1) and
  232. (tai(hp1).typ = ait_instruction) and
  233. (taicpu(hp1).opcode = A_AND) and
  234. (taicpu(hp1).oper[0]^.typ = top_const) and
  235. (taicpu(hp1).oper[1]^.typ = top_reg) and
  236. (taicpu(hp1).oper[1]^.reg =
  237. taicpu(p).oper[1]^.reg) then
  238. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  239. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  240. begin
  241. if (cs_asm_source in current_settings.globalswitches) then
  242. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  243. taicpu(p).opcode := A_MOV;
  244. taicpu(p).changeopsize(S_L);
  245. { do not use R_SUBWHOLE
  246. as movl %rdx,%eax
  247. is invalid in assembler PM }
  248. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  249. taicpu(hp1).loadConst(
  250. 0, taicpu(hp1).oper[0]^.val and $ffff);
  251. end;
  252. end;
  253. end
  254. else if (taicpu(p).oper[0]^.typ = top_ref) then
  255. begin
  256. if GetNextInstruction(p, hp1) and
  257. (tai(hp1).typ = ait_instruction) and
  258. (taicpu(hp1).opcode = A_AND) and
  259. MatchOpType(taicpu(hp1),top_const,top_reg) and
  260. (taicpu(hp1).oper[1]^.reg =
  261. taicpu(p).oper[1]^.reg) then
  262. begin
  263. taicpu(p).opcode := A_MOV;
  264. case taicpu(p).opsize of
  265. S_BL:
  266. begin
  267. if (cs_asm_source in current_settings.globalswitches) then
  268. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  269. taicpu(p).changeopsize(S_L);
  270. taicpu(hp1).loadConst(
  271. 0, taicpu(hp1).oper[0]^.val and $ff);
  272. end;
  273. S_WL:
  274. begin
  275. if (cs_asm_source in current_settings.globalswitches) then
  276. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  277. taicpu(p).changeopsize(S_L);
  278. taicpu(hp1).loadConst(
  279. 0, taicpu(hp1).oper[0]^.val and $ffff);
  280. end;
  281. S_BW:
  282. begin
  283. if (cs_asm_source in current_settings.globalswitches) then
  284. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  285. taicpu(p).changeopsize(S_W);
  286. taicpu(hp1).loadConst(
  287. 0, taicpu(hp1).oper[0]^.val and $ff);
  288. end;
  289. S_BQ:
  290. begin
  291. if (cs_asm_source in current_settings.globalswitches) then
  292. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  293. taicpu(p).changeopsize(S_Q);
  294. taicpu(hp1).loadConst(
  295. 0, taicpu(hp1).oper[0]^.val and $ff);
  296. end;
  297. S_WQ:
  298. begin
  299. if (cs_asm_source in current_settings.globalswitches) then
  300. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  301. taicpu(p).changeopsize(S_Q);
  302. taicpu(hp1).loadConst(
  303. 0, taicpu(hp1).oper[0]^.val and $ffff);
  304. end;
  305. S_LQ:
  306. begin
  307. if (cs_asm_source in current_settings.globalswitches) then
  308. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  309. taicpu(p).changeopsize(S_Q);
  310. taicpu(hp1).loadConst(
  311. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  312. end;
  313. end;
  314. end;
  315. end;
  316. end;
  317. end;
  318. A_VMOVAPS,
  319. A_VMOVAPD:
  320. result:=OptPass1VMOVAP(p);
  321. A_VDIVSD,
  322. A_VDIVSS,
  323. A_VSUBSD,
  324. A_VSUBSS,
  325. A_VMULSD,
  326. A_VMULSS,
  327. A_VADDSD,
  328. A_VADDSS:
  329. result:=OptPass1VOP(p);
  330. end;
  331. end;
  332. end;
  333. end;
  334. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p : tai) : boolean;
  335. begin
  336. Result := False;
  337. case p.typ of
  338. ait_instruction:
  339. begin
  340. case taicpu(p).opcode of
  341. A_MOV:
  342. Result:=OptPass2MOV(p);
  343. A_IMUL:
  344. Result:=OptPass2Imul(p);
  345. end;
  346. end;
  347. end;
  348. end;
  349. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  350. begin
  351. result := false;
  352. case p.typ of
  353. ait_instruction:
  354. begin
  355. case taicpu(p).opcode of
  356. A_MOV:
  357. PostPeepholeOptMov(p);
  358. end;
  359. end;
  360. end;
  361. end;
  362. begin
  363. casmoptimizer := TCpuAsmOptimizer;
  364. end.