aoptcpu.pas 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cgbase, cpubase, aasmtai, aopt, aoptx86, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
  26. function PostPeepHoleOptsCpu(var p : tai) : boolean; override;
  27. end;
  28. implementation
  29. uses
  30. globtype, globals,
  31. cutils,
  32. verbose,
  33. cgutils,
  34. aoptobj,
  35. aasmbase, aasmdata, aasmcpu,
  36. itcpugas;
  37. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  38. var
  39. hp1, hp2: tai;
  40. GetNextIntruction_p : boolean;
  41. TmpUsedRegs : TAllUsedRegs;
  42. begin
  43. Result := False;
  44. case p.typ of
  45. ait_instruction:
  46. begin
  47. case taicpu(p).opcode of
  48. A_AND:
  49. Result:=OptPass1AND(p);
  50. A_MOV:
  51. Result:=OptPass1MOV(p);
  52. A_MOVSX,
  53. A_MOVZX:
  54. begin
  55. if (taicpu(p).oper[1]^.typ = top_reg) and
  56. GetNextInstruction(p, hp1) and
  57. (hp1.typ = ait_instruction) and
  58. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  59. GetNextInstruction(hp1, hp2) and
  60. (hp2.typ = ait_instruction) and
  61. (taicpu(hp2).opcode = A_MOV) and
  62. (taicpu(hp2).oper[0]^.typ = top_reg) and
  63. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  64. { change movsX/movzX reg/ref, reg2 }
  65. { add/sub/or/... reg3/$const, reg2 }
  66. { mov reg2 reg/ref }
  67. { to add/sub/or/... reg3/$const, reg/ref }
  68. begin
  69. { by example:
  70. movswl %si,%eax movswl %si,%eax p
  71. decl %eax addl %edx,%eax hp1
  72. movw %ax,%si movw %ax,%si hp2
  73. ->
  74. movswl %si,%eax movswl %si,%eax p
  75. decw %eax addw %edx,%eax hp1
  76. movw %ax,%si movw %ax,%si hp2
  77. }
  78. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  79. {
  80. ->
  81. movswl %si,%eax movswl %si,%eax p
  82. decw %si addw %dx,%si hp1
  83. movw %ax,%si movw %ax,%si hp2
  84. }
  85. case taicpu(hp1).ops of
  86. 1:
  87. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  88. 2:
  89. begin
  90. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  91. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  92. setsubreg(taicpu(hp1).oper[0]^.reg,
  93. getsubreg(taicpu(hp2).oper[0]^.reg));
  94. end;
  95. else
  96. internalerror(2008042701);
  97. end;
  98. {
  99. ->
  100. decw %si addw %dx,%si p
  101. }
  102. if (cs_asm_source in current_settings.globalswitches) then
  103. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  104. asml.remove(p);
  105. asml.remove(hp2);
  106. p.Free;
  107. hp2.Free;
  108. p := hp1;
  109. end
  110. { removes superfluous And's after movzx's }
  111. else if taicpu(p).opcode = A_MOVZX then
  112. begin
  113. if (taicpu(p).oper[1]^.typ = top_reg) and
  114. GetNextInstruction(p, hp1) and
  115. (tai(hp1).typ = ait_instruction) and
  116. (taicpu(hp1).opcode = A_AND) and
  117. (taicpu(hp1).oper[0]^.typ = top_const) and
  118. (taicpu(hp1).oper[1]^.typ = top_reg) and
  119. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  120. begin
  121. case taicpu(p).opsize of
  122. S_BL, S_BW, S_BQ:
  123. if (taicpu(hp1).oper[0]^.val = $ff) then
  124. begin
  125. if (cs_asm_source in current_settings.globalswitches) then
  126. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  127. asml.remove(hp1);
  128. hp1.Free;
  129. end;
  130. S_WL, S_WQ:
  131. if (taicpu(hp1).oper[0]^.val = $ffff) then
  132. begin
  133. if (cs_asm_source in current_settings.globalswitches) then
  134. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  135. asml.remove(hp1);
  136. hp1.Free;
  137. end;
  138. S_LQ:
  139. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  140. begin
  141. if (cs_asm_source in current_settings.globalswitches) then
  142. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  143. asml.remove(hp1);
  144. hp1.Free;
  145. end;
  146. end;
  147. end;
  148. { changes some movzx constructs to faster synonims (all examples
  149. are given with eax/ax, but are also valid for other registers)}
  150. if (taicpu(p).oper[1]^.typ = top_reg) then
  151. if (taicpu(p).oper[0]^.typ = top_reg) then
  152. case taicpu(p).opsize of
  153. S_BW:
  154. begin
  155. if (getsupreg(taicpu(p).oper[0]^.reg) =
  156. getsupreg(taicpu(p).oper[1]^.reg)) and not
  157. (cs_opt_size in current_settings.optimizerswitches) then
  158. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  159. begin
  160. taicpu(p).opcode := A_AND;
  161. taicpu(p).changeopsize(S_W);
  162. taicpu(p).loadConst(0, $ff);
  163. if (cs_asm_source in current_settings.globalswitches) then
  164. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  165. end
  166. else if GetNextInstruction(p, hp1) and
  167. (tai(hp1).typ = ait_instruction) and
  168. (taicpu(hp1).opcode = A_AND) and
  169. (taicpu(hp1).oper[0]^.typ = top_const) and
  170. (taicpu(hp1).oper[1]^.typ = top_reg) and
  171. (taicpu(hp1).oper[1]^.reg =
  172. taicpu(p).oper[1]^.reg) then
  173. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  174. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  175. begin
  176. if (cs_asm_source in current_settings.globalswitches) then
  177. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  178. taicpu(p).opcode := A_MOV;
  179. taicpu(p).changeopsize(S_W);
  180. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  181. taicpu(hp1).loadConst(
  182. 0, taicpu(hp1).oper[0]^.val and $ff);
  183. end;
  184. end;
  185. S_BL:
  186. begin
  187. if (getsupreg(taicpu(p).oper[0]^.reg) =
  188. getsupreg(taicpu(p).oper[1]^.reg)) and not
  189. (cs_opt_size in current_settings.optimizerswitches) then
  190. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  191. begin
  192. if (cs_asm_source in current_settings.globalswitches) then
  193. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  194. taicpu(p).opcode := A_AND;
  195. taicpu(p).changeopsize(S_L);
  196. taicpu(p).loadConst(0, $ff);
  197. end
  198. else if GetNextInstruction(p, hp1) and
  199. (tai(hp1).typ = ait_instruction) and
  200. (taicpu(hp1).opcode = A_AND) and
  201. (taicpu(hp1).oper[0]^.typ = top_const) and
  202. (taicpu(hp1).oper[1]^.typ = top_reg) and
  203. (taicpu(hp1).oper[1]^.reg =
  204. taicpu(p).oper[1]^.reg) then
  205. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  206. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  207. begin
  208. if (cs_asm_source in current_settings.globalswitches) then
  209. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  210. taicpu(p).opcode := A_MOV;
  211. taicpu(p).changeopsize(S_L);
  212. { do not use R_SUBWHOLE
  213. as movl %rdx,%eax
  214. is invalid in assembler PM }
  215. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  216. taicpu(hp1).loadConst(
  217. 0, taicpu(hp1).oper[0]^.val and $ff);
  218. end;
  219. end;
  220. S_WL:
  221. begin
  222. if (getsupreg(taicpu(p).oper[0]^.reg) =
  223. getsupreg(taicpu(p).oper[1]^.reg)) and not
  224. (cs_opt_size in current_settings.optimizerswitches) then
  225. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  226. begin
  227. if (cs_asm_source in current_settings.globalswitches) then
  228. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  229. taicpu(p).opcode := A_AND;
  230. taicpu(p).changeopsize(S_L);
  231. taicpu(p).loadConst(0, $ffff);
  232. end
  233. else if GetNextInstruction(p, hp1) and
  234. (tai(hp1).typ = ait_instruction) and
  235. (taicpu(hp1).opcode = A_AND) and
  236. (taicpu(hp1).oper[0]^.typ = top_const) and
  237. (taicpu(hp1).oper[1]^.typ = top_reg) and
  238. (taicpu(hp1).oper[1]^.reg =
  239. taicpu(p).oper[1]^.reg) then
  240. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  241. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  242. begin
  243. if (cs_asm_source in current_settings.globalswitches) then
  244. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  245. taicpu(p).opcode := A_MOV;
  246. taicpu(p).changeopsize(S_L);
  247. { do not use R_SUBWHOLE
  248. as movl %rdx,%eax
  249. is invalid in assembler PM }
  250. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  251. taicpu(hp1).loadConst(
  252. 0, taicpu(hp1).oper[0]^.val and $ffff);
  253. end;
  254. end;
  255. end
  256. else if (taicpu(p).oper[0]^.typ = top_ref) then
  257. begin
  258. if GetNextInstruction(p, hp1) and
  259. (tai(hp1).typ = ait_instruction) and
  260. (taicpu(hp1).opcode = A_AND) and
  261. MatchOpType(taicpu(hp1),top_const,top_reg) and
  262. (taicpu(hp1).oper[1]^.reg =
  263. taicpu(p).oper[1]^.reg) then
  264. begin
  265. taicpu(p).opcode := A_MOV;
  266. case taicpu(p).opsize of
  267. S_BL:
  268. begin
  269. if (cs_asm_source in current_settings.globalswitches) then
  270. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  271. taicpu(p).changeopsize(S_L);
  272. taicpu(hp1).loadConst(
  273. 0, taicpu(hp1).oper[0]^.val and $ff);
  274. end;
  275. S_WL:
  276. begin
  277. if (cs_asm_source in current_settings.globalswitches) then
  278. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  279. taicpu(p).changeopsize(S_L);
  280. taicpu(hp1).loadConst(
  281. 0, taicpu(hp1).oper[0]^.val and $ffff);
  282. end;
  283. S_BW:
  284. begin
  285. if (cs_asm_source in current_settings.globalswitches) then
  286. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  287. taicpu(p).changeopsize(S_W);
  288. taicpu(hp1).loadConst(
  289. 0, taicpu(hp1).oper[0]^.val and $ff);
  290. end;
  291. S_BQ:
  292. begin
  293. if (cs_asm_source in current_settings.globalswitches) then
  294. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  295. taicpu(p).changeopsize(S_Q);
  296. taicpu(hp1).loadConst(
  297. 0, taicpu(hp1).oper[0]^.val and $ff);
  298. end;
  299. S_WQ:
  300. begin
  301. if (cs_asm_source in current_settings.globalswitches) then
  302. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  303. taicpu(p).changeopsize(S_Q);
  304. taicpu(hp1).loadConst(
  305. 0, taicpu(hp1).oper[0]^.val and $ffff);
  306. end;
  307. S_LQ:
  308. begin
  309. if (cs_asm_source in current_settings.globalswitches) then
  310. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  311. taicpu(p).changeopsize(S_Q);
  312. taicpu(hp1).loadConst(
  313. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  314. end;
  315. end;
  316. end;
  317. end;
  318. end;
  319. end;
  320. A_VMOVAPS,
  321. A_VMOVAPD:
  322. result:=OptPass1VMOVAP(p);
  323. A_VDIVSD,
  324. A_VDIVSS,
  325. A_VSUBSD,
  326. A_VSUBSS,
  327. A_VMULSD,
  328. A_VMULSS,
  329. A_VADDSD,
  330. A_VADDSS:
  331. result:=OptPass1VOP(p);
  332. end;
  333. end;
  334. end;
  335. end;
  336. function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p : tai) : boolean;
  337. begin
  338. Result := False;
  339. case p.typ of
  340. ait_instruction:
  341. begin
  342. case taicpu(p).opcode of
  343. A_MOV:
  344. Result:=OptPass2MOV(p);
  345. end;
  346. end;
  347. end;
  348. end;
  349. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  350. begin
  351. result := false;
  352. case p.typ of
  353. ait_instruction:
  354. begin
  355. case taicpu(p).opcode of
  356. A_MOV:
  357. PostPeepholeOptMov(p);
  358. end;
  359. end;
  360. end;
  361. end;
  362. begin
  363. casmoptimizer := TCpuAsmOptimizer;
  364. end.