aoptcpu.pas 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cpubase, aasmtai, aopt, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. end;
  26. implementation
  27. uses
  28. globtype, globals,
  29. cutils,
  30. verbose,
  31. cgbase, cgutils,
  32. aoptobj,
  33. aasmbase, aasmdata, aasmcpu;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  39. isFoldableArithOp :=
  40. (taicpu(hp1).oper[1]^.typ = top_reg) and
  41. (taicpu(hp1).oper[1]^.reg = reg) and
  42. ((taicpu(hp1).oper[0]^.typ = top_const) or
  43. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  44. (taicpu(hp1).oper[0]^.reg<>reg)));
  45. A_INC, A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  52. var
  53. next1: tai;
  54. hp1, hp2: tai;
  55. GetNextIntruction_p : boolean;
  56. TmpUsedRegs : TAllUsedRegs;
  57. begin
  58. Result := False;
  59. case p.typ of
  60. ait_instruction:
  61. begin
  62. case taicpu(p).opcode of
  63. A_AND:
  64. begin
  65. if (taicpu(p).oper[0]^.typ = top_const) and
  66. (taicpu(p).oper[1]^.typ = top_reg) and
  67. GetNextInstruction(p, hp1) and
  68. (tai(hp1).typ = ait_instruction) and
  69. (taicpu(hp1).opcode = A_AND) and
  70. (taicpu(hp1).oper[0]^.typ = top_const) and
  71. (taicpu(hp1).oper[1]^.typ = top_reg) and
  72. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  73. taicpu(hp1).oper[1]^.reg)) and
  74. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  75. taicpu(hp1).oper[1]^.reg)) and
  76. (abs(taicpu(p).oper[0]^.val and
  77. taicpu(hp1).oper[0]^.val)<$80000000) then
  78. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  79. begin
  80. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  81. taicpu(hp1).oper[0]^.val);
  82. asml.remove(p);
  83. p.Free;
  84. p:=hp1;
  85. end;
  86. (* else
  87. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  88. jump, but only if it's a conditional jump (PFV) }
  89. if (taicpu(p).oper[1]^.typ = top_reg) and
  90. GetNextInstruction(p, hp1) and
  91. (hp1.typ = ait_instruction) and
  92. (taicpu(hp1).is_jmp) and
  93. (taicpu(hp1).opcode<>A_JMP) and
  94. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  95. taicpu(p).opcode := A_TEST;*)
  96. end;
  97. A_MOV:
  98. { removes superfluous And's after mov's }
  99. begin
  100. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  101. exit;
  102. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  103. if (taicpu(p).oper[1]^.typ = top_reg) and
  104. GetNextIntruction_p and
  105. (tai(hp1).typ = ait_instruction) and
  106. (taicpu(hp1).opcode = A_AND) and
  107. (taicpu(hp1).oper[0]^.typ = top_const) and
  108. (taicpu(hp1).oper[1]^.typ = top_reg) and
  109. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  110. case taicpu(p).opsize Of
  111. S_L:
  112. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  113. begin
  114. asml.remove(hp1);
  115. hp1.free;
  116. end;
  117. end
  118. else if (taicpu(p).oper[1]^.typ = top_reg) and
  119. GetNextIntruction_p and
  120. (hp1.typ = ait_instruction) and
  121. GetNextInstruction(hp1, hp2) and
  122. (hp2.typ = ait_instruction) and
  123. (taicpu(hp2).opcode = A_MOV) and
  124. (taicpu(hp2).oper[0]^.typ = top_reg) and
  125. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  126. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  127. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  128. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  129. ) then
  130. { change movsX/movzX reg/ref, reg2 }
  131. { add/sub/or/... reg3/$const, reg2 }
  132. { mov reg2 reg/ref }
  133. { to add/sub/or/... reg3/$const, reg/ref }
  134. begin
  135. CopyUsedRegs(TmpUsedRegs);
  136. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  137. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  138. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  139. begin
  140. { by example:
  141. movswl %si,%eax movswl %si,%eax p
  142. decl %eax addl %edx,%eax hp1
  143. movw %ax,%si movw %ax,%si hp2
  144. ->
  145. movswl %si,%eax movswl %si,%eax p
  146. decw %eax addw %edx,%eax hp1
  147. movw %ax,%si movw %ax,%si hp2
  148. }
  149. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  150. {
  151. ->
  152. movswl %si,%eax movswl %si,%eax p
  153. decw %si addw %dx,%si hp1
  154. movw %ax,%si movw %ax,%si hp2
  155. }
  156. case taicpu(hp1).ops of
  157. 1:
  158. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  159. 2:
  160. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  161. else
  162. internalerror(2008042701);
  163. end;
  164. {
  165. ->
  166. decw %si addw %dx,%si p
  167. }
  168. asml.remove(p);
  169. asml.remove(hp2);
  170. p.Free;
  171. hp2.Free;
  172. p := hp1;
  173. end;
  174. ReleaseUsedRegs(TmpUsedRegs);
  175. end
  176. end;
  177. A_MOVSX,
  178. A_MOVZX:
  179. begin
  180. if (taicpu(p).oper[1]^.typ = top_reg) and
  181. GetNextInstruction(p, hp1) and
  182. (hp1.typ = ait_instruction) and
  183. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  184. GetNextInstruction(hp1, hp2) and
  185. (hp2.typ = ait_instruction) and
  186. (taicpu(hp2).opcode = A_MOV) and
  187. (taicpu(hp2).oper[0]^.typ = top_reg) and
  188. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  189. { change movsX/movzX reg/ref, reg2 }
  190. { add/sub/or/... reg3/$const, reg2 }
  191. { mov reg2 reg/ref }
  192. { to add/sub/or/... reg3/$const, reg/ref }
  193. begin
  194. { by example:
  195. movswl %si,%eax movswl %si,%eax p
  196. decl %eax addl %edx,%eax hp1
  197. movw %ax,%si movw %ax,%si hp2
  198. ->
  199. movswl %si,%eax movswl %si,%eax p
  200. decw %eax addw %edx,%eax hp1
  201. movw %ax,%si movw %ax,%si hp2
  202. }
  203. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  204. {
  205. ->
  206. movswl %si,%eax movswl %si,%eax p
  207. decw %si addw %dx,%si hp1
  208. movw %ax,%si movw %ax,%si hp2
  209. }
  210. case taicpu(hp1).ops of
  211. 1:
  212. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  213. 2:
  214. begin
  215. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  216. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  217. setsubreg(taicpu(hp1).oper[0]^.reg,
  218. getsubreg(taicpu(hp2).oper[0]^.reg));
  219. end;
  220. else
  221. internalerror(2008042701);
  222. end;
  223. {
  224. ->
  225. decw %si addw %dx,%si p
  226. }
  227. asml.remove(p);
  228. asml.remove(hp2);
  229. p.Free;
  230. hp2.Free;
  231. p := hp1;
  232. end
  233. { removes superfluous And's after movzx's }
  234. else if taicpu(p).opcode = A_MOVZX then
  235. begin
  236. if (taicpu(p).oper[1]^.typ = top_reg) and
  237. GetNextInstruction(p, hp1) and
  238. (tai(hp1).typ = ait_instruction) and
  239. (taicpu(hp1).opcode = A_AND) and
  240. (taicpu(hp1).oper[0]^.typ = top_const) and
  241. (taicpu(hp1).oper[1]^.typ = top_reg) and
  242. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  243. begin
  244. case taicpu(p).opsize of
  245. S_BL, S_BW, S_BQ:
  246. if (taicpu(hp1).oper[0]^.val = $ff) then
  247. begin
  248. asml.remove(hp1);
  249. hp1.Free;
  250. end;
  251. S_WL, S_WQ:
  252. if (taicpu(hp1).oper[0]^.val = $ffff) then
  253. begin
  254. asml.remove(hp1);
  255. hp1.Free;
  256. end;
  257. S_LQ:
  258. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  259. begin
  260. asml.remove(hp1);
  261. hp1.Free;
  262. end;
  263. end;
  264. end;
  265. { changes some movzx constructs to faster synonims (all examples
  266. are given with eax/ax, but are also valid for other registers)}
  267. if (taicpu(p).oper[1]^.typ = top_reg) then
  268. if (taicpu(p).oper[0]^.typ = top_reg) then
  269. case taicpu(p).opsize of
  270. S_BW:
  271. begin
  272. if (getsupreg(taicpu(p).oper[0]^.reg) =
  273. getsupreg(taicpu(p).oper[1]^.reg)) and not
  274. (cs_opt_size in current_settings.optimizerswitches) then
  275. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  276. begin
  277. taicpu(p).opcode := A_AND;
  278. taicpu(p).changeopsize(S_W);
  279. taicpu(p).loadConst(0, $ff);
  280. end
  281. else if GetNextInstruction(p, hp1) and
  282. (tai(hp1).typ = ait_instruction) and
  283. (taicpu(hp1).opcode = A_AND) and
  284. (taicpu(hp1).oper[0]^.typ = top_const) and
  285. (taicpu(hp1).oper[1]^.typ = top_reg) and
  286. (taicpu(hp1).oper[1]^.reg =
  287. taicpu(p).oper[1]^.reg) then
  288. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  289. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  290. begin
  291. taicpu(p).opcode := A_MOV;
  292. taicpu(p).changeopsize(S_W);
  293. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  294. taicpu(hp1).loadConst(
  295. 0, taicpu(hp1).oper[0]^.val and $ff);
  296. end;
  297. end;
  298. S_BL:
  299. begin
  300. if (getsupreg(taicpu(p).oper[0]^.reg) =
  301. getsupreg(taicpu(p).oper[1]^.reg)) and not
  302. (cs_opt_size in current_settings.optimizerswitches) then
  303. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  304. begin
  305. taicpu(p).opcode := A_AND;
  306. taicpu(p).changeopsize(S_L);
  307. taicpu(p).loadConst(0, $ff);
  308. end
  309. else if GetNextInstruction(p, hp1) and
  310. (tai(hp1).typ = ait_instruction) and
  311. (taicpu(hp1).opcode = A_AND) and
  312. (taicpu(hp1).oper[0]^.typ = top_const) and
  313. (taicpu(hp1).oper[1]^.typ = top_reg) and
  314. (taicpu(hp1).oper[1]^.reg =
  315. taicpu(p).oper[1]^.reg) then
  316. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  317. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  318. begin
  319. taicpu(p).opcode := A_MOV;
  320. taicpu(p).changeopsize(S_L);
  321. setsubreg(taicpu(p).oper[0]^.reg, R_SUBWHOLE);
  322. taicpu(hp1).loadConst(
  323. 0, taicpu(hp1).oper[0]^.val and $ff);
  324. end;
  325. end;
  326. S_WL:
  327. begin
  328. if (getsupreg(taicpu(p).oper[0]^.reg) =
  329. getsupreg(taicpu(p).oper[1]^.reg)) and not
  330. (cs_opt_size in current_settings.optimizerswitches) then
  331. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  332. begin
  333. taicpu(p).opcode := A_AND;
  334. taicpu(p).changeopsize(S_L);
  335. taicpu(p).loadConst(0, $ffff);
  336. end
  337. else if GetNextInstruction(p, hp1) and
  338. (tai(hp1).typ = ait_instruction) and
  339. (taicpu(hp1).opcode = A_AND) and
  340. (taicpu(hp1).oper[0]^.typ = top_const) and
  341. (taicpu(hp1).oper[1]^.typ = top_reg) and
  342. (taicpu(hp1).oper[1]^.reg =
  343. taicpu(p).oper[1]^.reg) then
  344. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  345. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  346. begin
  347. taicpu(p).opcode := A_MOV;
  348. taicpu(p).changeopsize(S_L);
  349. setsubreg(taicpu(p).oper[0]^.reg, R_SUBWHOLE);
  350. taicpu(hp1).loadConst(
  351. 0, taicpu(hp1).oper[0]^.val and $ffff);
  352. end;
  353. end;
  354. end
  355. else if (taicpu(p).oper[0]^.typ = top_ref) then
  356. begin
  357. if GetNextInstruction(p, hp1) and
  358. (tai(hp1).typ = ait_instruction) and
  359. (taicpu(hp1).opcode = A_AND) and
  360. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  361. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  362. (taicpu(hp1).oper[1]^.reg =
  363. taicpu(p).oper[1]^.reg) then
  364. begin
  365. taicpu(p).opcode := A_MOV;
  366. case taicpu(p).opsize of
  367. S_BL:
  368. begin
  369. taicpu(p).changeopsize(S_L);
  370. taicpu(hp1).loadConst(
  371. 0, taicpu(hp1).oper[0]^.val and $ff);
  372. end;
  373. S_WL:
  374. begin
  375. taicpu(p).changeopsize(S_L);
  376. taicpu(hp1).loadConst(
  377. 0, taicpu(hp1).oper[0]^.val and $ffff);
  378. end;
  379. S_BW:
  380. begin
  381. taicpu(p).changeopsize(S_W);
  382. taicpu(hp1).loadConst(
  383. 0, taicpu(hp1).oper[0]^.val and $ff);
  384. end;
  385. S_BQ:
  386. begin
  387. taicpu(p).changeopsize(S_Q);
  388. taicpu(hp1).loadConst(
  389. 0, taicpu(hp1).oper[0]^.val and $ff);
  390. end;
  391. S_WQ:
  392. begin
  393. taicpu(p).changeopsize(S_Q);
  394. taicpu(hp1).loadConst(
  395. 0, taicpu(hp1).oper[0]^.val and $ffff);
  396. end;
  397. S_LQ:
  398. begin
  399. taicpu(p).changeopsize(S_Q);
  400. taicpu(hp1).loadConst(
  401. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  402. end;
  403. end;
  404. end;
  405. end;
  406. end;
  407. end;
  408. end;
  409. end;
  410. end;
  411. end;
  412. begin
  413. casmoptimizer := TCpuAsmOptimizer;
  414. end.