aoptcpu.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cpubase, aasmtai, aopt, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. end;
  26. implementation
  27. uses
  28. globtype, globals,
  29. cutils,
  30. verbose,
  31. cgbase, cgutils,
  32. aoptobj,
  33. aasmbase, aasmdata, aasmcpu;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  39. isFoldableArithOp :=
  40. (taicpu(hp1).oper[1]^.typ = top_reg) and
  41. (taicpu(hp1).oper[1]^.reg = reg) and
  42. ((taicpu(hp1).oper[0]^.typ = top_const) or
  43. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  44. (taicpu(hp1).oper[0]^.reg<>reg)));
  45. A_INC, A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  52. var
  53. next1: tai;
  54. hp1, hp2: tai;
  55. GetNextIntruction_p : boolean;
  56. TmpUsedRegs : TAllUsedRegs;
  57. begin
  58. Result := False;
  59. case p.typ of
  60. ait_instruction:
  61. begin
  62. case taicpu(p).opcode of
  63. A_AND:
  64. begin
  65. if (taicpu(p).oper[0]^.typ = top_const) and
  66. (taicpu(p).oper[1]^.typ = top_reg) and
  67. GetNextInstruction(p, hp1) and
  68. (tai(hp1).typ = ait_instruction) and
  69. (taicpu(hp1).opcode = A_AND) and
  70. (taicpu(hp1).oper[0]^.typ = top_const) and
  71. (taicpu(hp1).oper[1]^.typ = top_reg) and
  72. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  73. taicpu(hp1).oper[1]^.reg)) and
  74. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  75. taicpu(hp1).oper[1]^.reg)) and
  76. (abs(taicpu(p).oper[0]^.val and
  77. taicpu(hp1).oper[0]^.val)<$80000000) then
  78. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  79. begin
  80. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  81. taicpu(hp1).oper[0]^.val);
  82. if (cs_asm_source in current_settings.globalswitches) then
  83. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var1')),p);
  84. asml.remove(p);
  85. p.Free;
  86. p:=hp1;
  87. end;
  88. (* else
  89. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  90. jump, but only if it's a conditional jump (PFV) }
  91. if (taicpu(p).oper[1]^.typ = top_reg) and
  92. GetNextInstruction(p, hp1) and
  93. (hp1.typ = ait_instruction) and
  94. (taicpu(hp1).is_jmp) and
  95. (taicpu(hp1).opcode<>A_JMP) and
  96. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  97. taicpu(p).opcode := A_TEST;*)
  98. end;
  99. A_MOV:
  100. { removes superfluous And's after mov's }
  101. begin
  102. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  103. exit;
  104. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  105. if (taicpu(p).oper[1]^.typ = top_reg) and
  106. GetNextIntruction_p and
  107. (tai(hp1).typ = ait_instruction) and
  108. (taicpu(hp1).opcode = A_AND) and
  109. (taicpu(hp1).oper[0]^.typ = top_const) and
  110. (taicpu(hp1).oper[1]^.typ = top_reg) and
  111. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  112. case taicpu(p).opsize Of
  113. S_L:
  114. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  115. begin
  116. if (cs_asm_source in current_settings.globalswitches) then
  117. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2a')),p);
  118. asml.remove(hp1);
  119. hp1.free;
  120. end;
  121. end
  122. else if (taicpu(p).oper[1]^.typ = top_reg) and
  123. GetNextIntruction_p and
  124. (hp1.typ = ait_instruction) and
  125. GetNextInstruction(hp1, hp2) and
  126. (hp2.typ = ait_instruction) and
  127. (taicpu(hp2).opcode = A_MOV) and
  128. (taicpu(hp2).oper[0]^.typ = top_reg) and
  129. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  130. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  131. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  132. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  133. ) then
  134. { change movsX/movzX reg/ref, reg2 }
  135. { add/sub/or/... reg3/$const, reg2 }
  136. { mov reg2 reg/ref }
  137. { to add/sub/or/... reg3/$const, reg/ref }
  138. begin
  139. CopyUsedRegs(TmpUsedRegs);
  140. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  141. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  142. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  143. begin
  144. { by example:
  145. movswl %si,%eax movswl %si,%eax p
  146. decl %eax addl %edx,%eax hp1
  147. movw %ax,%si movw %ax,%si hp2
  148. ->
  149. movswl %si,%eax movswl %si,%eax p
  150. decw %eax addw %edx,%eax hp1
  151. movw %ax,%si movw %ax,%si hp2
  152. }
  153. if (cs_asm_source in current_settings.globalswitches) then
  154. begin
  155. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  156. asml.insertbefore(tai_comment.create(strpnew('P='+std_op2str[taicpu(p).opcode])),p);
  157. asml.insertbefore(tai_comment.create(strpnew('HP1='+std_op2str[taicpu(hp1).opcode])),p);
  158. asml.insertbefore(tai_comment.create(strpnew('HP2='+std_op2str[taicpu(hp2).opcode])),p);
  159. end;
  160. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  161. {
  162. ->
  163. movswl %si,%eax movswl %si,%eax p
  164. decw %si addw %dx,%si hp1
  165. movw %ax,%si movw %ax,%si hp2
  166. }
  167. case taicpu(hp1).ops of
  168. 1:
  169. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  170. 2:
  171. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  172. else
  173. internalerror(2008042701);
  174. end;
  175. {
  176. ->
  177. decw %si addw %dx,%si p
  178. }
  179. if (cs_asm_source in current_settings.globalswitches) then
  180. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  181. asml.remove(p);
  182. asml.remove(hp2);
  183. p.Free;
  184. hp2.Free;
  185. p := hp1;
  186. end;
  187. ReleaseUsedRegs(TmpUsedRegs);
  188. end
  189. end;
  190. A_MOVSX,
  191. A_MOVZX:
  192. begin
  193. if (taicpu(p).oper[1]^.typ = top_reg) and
  194. GetNextInstruction(p, hp1) and
  195. (hp1.typ = ait_instruction) and
  196. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  197. GetNextInstruction(hp1, hp2) and
  198. (hp2.typ = ait_instruction) and
  199. (taicpu(hp2).opcode = A_MOV) and
  200. (taicpu(hp2).oper[0]^.typ = top_reg) and
  201. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  202. { change movsX/movzX reg/ref, reg2 }
  203. { add/sub/or/... reg3/$const, reg2 }
  204. { mov reg2 reg/ref }
  205. { to add/sub/or/... reg3/$const, reg/ref }
  206. begin
  207. { by example:
  208. movswl %si,%eax movswl %si,%eax p
  209. decl %eax addl %edx,%eax hp1
  210. movw %ax,%si movw %ax,%si hp2
  211. ->
  212. movswl %si,%eax movswl %si,%eax p
  213. decw %eax addw %edx,%eax hp1
  214. movw %ax,%si movw %ax,%si hp2
  215. }
  216. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  217. {
  218. ->
  219. movswl %si,%eax movswl %si,%eax p
  220. decw %si addw %dx,%si hp1
  221. movw %ax,%si movw %ax,%si hp2
  222. }
  223. case taicpu(hp1).ops of
  224. 1:
  225. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  226. 2:
  227. begin
  228. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  229. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  230. setsubreg(taicpu(hp1).oper[0]^.reg,
  231. getsubreg(taicpu(hp2).oper[0]^.reg));
  232. end;
  233. else
  234. internalerror(2008042701);
  235. end;
  236. {
  237. ->
  238. decw %si addw %dx,%si p
  239. }
  240. if (cs_asm_source in current_settings.globalswitches) then
  241. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  242. asml.remove(p);
  243. asml.remove(hp2);
  244. p.Free;
  245. hp2.Free;
  246. p := hp1;
  247. end
  248. { removes superfluous And's after movzx's }
  249. else if taicpu(p).opcode = A_MOVZX then
  250. begin
  251. if (taicpu(p).oper[1]^.typ = top_reg) and
  252. GetNextInstruction(p, hp1) and
  253. (tai(hp1).typ = ait_instruction) and
  254. (taicpu(hp1).opcode = A_AND) and
  255. (taicpu(hp1).oper[0]^.typ = top_const) and
  256. (taicpu(hp1).oper[1]^.typ = top_reg) and
  257. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  258. begin
  259. case taicpu(p).opsize of
  260. S_BL, S_BW, S_BQ:
  261. if (taicpu(hp1).oper[0]^.val = $ff) then
  262. begin
  263. if (cs_asm_source in current_settings.globalswitches) then
  264. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  265. asml.remove(hp1);
  266. hp1.Free;
  267. end;
  268. S_WL, S_WQ:
  269. if (taicpu(hp1).oper[0]^.val = $ffff) then
  270. begin
  271. if (cs_asm_source in current_settings.globalswitches) then
  272. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  273. asml.remove(hp1);
  274. hp1.Free;
  275. end;
  276. S_LQ:
  277. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  278. begin
  279. if (cs_asm_source in current_settings.globalswitches) then
  280. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  281. asml.remove(hp1);
  282. hp1.Free;
  283. end;
  284. end;
  285. end;
  286. { changes some movzx constructs to faster synonims (all examples
  287. are given with eax/ax, but are also valid for other registers)}
  288. if (taicpu(p).oper[1]^.typ = top_reg) then
  289. if (taicpu(p).oper[0]^.typ = top_reg) then
  290. case taicpu(p).opsize of
  291. S_BW:
  292. begin
  293. if (getsupreg(taicpu(p).oper[0]^.reg) =
  294. getsupreg(taicpu(p).oper[1]^.reg)) and not
  295. (cs_opt_size in current_settings.optimizerswitches) then
  296. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  297. begin
  298. taicpu(p).opcode := A_AND;
  299. taicpu(p).changeopsize(S_W);
  300. taicpu(p).loadConst(0, $ff);
  301. if (cs_asm_source in current_settings.globalswitches) then
  302. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  303. end
  304. else if GetNextInstruction(p, hp1) and
  305. (tai(hp1).typ = ait_instruction) and
  306. (taicpu(hp1).opcode = A_AND) and
  307. (taicpu(hp1).oper[0]^.typ = top_const) and
  308. (taicpu(hp1).oper[1]^.typ = top_reg) and
  309. (taicpu(hp1).oper[1]^.reg =
  310. taicpu(p).oper[1]^.reg) then
  311. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  312. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  313. begin
  314. if (cs_asm_source in current_settings.globalswitches) then
  315. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  316. taicpu(p).opcode := A_MOV;
  317. taicpu(p).changeopsize(S_W);
  318. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  319. taicpu(hp1).loadConst(
  320. 0, taicpu(hp1).oper[0]^.val and $ff);
  321. end;
  322. end;
  323. S_BL:
  324. begin
  325. if (getsupreg(taicpu(p).oper[0]^.reg) =
  326. getsupreg(taicpu(p).oper[1]^.reg)) and not
  327. (cs_opt_size in current_settings.optimizerswitches) then
  328. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  329. begin
  330. if (cs_asm_source in current_settings.globalswitches) then
  331. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  332. taicpu(p).opcode := A_AND;
  333. taicpu(p).changeopsize(S_L);
  334. taicpu(p).loadConst(0, $ff);
  335. end
  336. else if GetNextInstruction(p, hp1) and
  337. (tai(hp1).typ = ait_instruction) and
  338. (taicpu(hp1).opcode = A_AND) and
  339. (taicpu(hp1).oper[0]^.typ = top_const) and
  340. (taicpu(hp1).oper[1]^.typ = top_reg) and
  341. (taicpu(hp1).oper[1]^.reg =
  342. taicpu(p).oper[1]^.reg) then
  343. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  344. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  345. begin
  346. if (cs_asm_source in current_settings.globalswitches) then
  347. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  348. taicpu(p).opcode := A_MOV;
  349. taicpu(p).changeopsize(S_L);
  350. { do not use R_SUBWHOLE
  351. as movl %rdx,%eax
  352. is invalid in assembler PM }
  353. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  354. taicpu(hp1).loadConst(
  355. 0, taicpu(hp1).oper[0]^.val and $ff);
  356. end;
  357. end;
  358. S_WL:
  359. begin
  360. if (getsupreg(taicpu(p).oper[0]^.reg) =
  361. getsupreg(taicpu(p).oper[1]^.reg)) and not
  362. (cs_opt_size in current_settings.optimizerswitches) then
  363. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  364. begin
  365. if (cs_asm_source in current_settings.globalswitches) then
  366. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  367. taicpu(p).opcode := A_AND;
  368. taicpu(p).changeopsize(S_L);
  369. taicpu(p).loadConst(0, $ffff);
  370. end
  371. else if GetNextInstruction(p, hp1) and
  372. (tai(hp1).typ = ait_instruction) and
  373. (taicpu(hp1).opcode = A_AND) and
  374. (taicpu(hp1).oper[0]^.typ = top_const) and
  375. (taicpu(hp1).oper[1]^.typ = top_reg) and
  376. (taicpu(hp1).oper[1]^.reg =
  377. taicpu(p).oper[1]^.reg) then
  378. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  379. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  380. begin
  381. if (cs_asm_source in current_settings.globalswitches) then
  382. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  383. taicpu(p).opcode := A_MOV;
  384. taicpu(p).changeopsize(S_L);
  385. { do not use R_SUBWHOLE
  386. as movl %rdx,%eax
  387. is invalid in assembler PM }
  388. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  389. taicpu(hp1).loadConst(
  390. 0, taicpu(hp1).oper[0]^.val and $ffff);
  391. end;
  392. end;
  393. end
  394. else if (taicpu(p).oper[0]^.typ = top_ref) then
  395. begin
  396. if GetNextInstruction(p, hp1) and
  397. (tai(hp1).typ = ait_instruction) and
  398. (taicpu(hp1).opcode = A_AND) and
  399. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  400. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  401. (taicpu(hp1).oper[1]^.reg =
  402. taicpu(p).oper[1]^.reg) then
  403. begin
  404. taicpu(p).opcode := A_MOV;
  405. case taicpu(p).opsize of
  406. S_BL:
  407. begin
  408. if (cs_asm_source in current_settings.globalswitches) then
  409. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  410. taicpu(p).changeopsize(S_L);
  411. taicpu(hp1).loadConst(
  412. 0, taicpu(hp1).oper[0]^.val and $ff);
  413. end;
  414. S_WL:
  415. begin
  416. if (cs_asm_source in current_settings.globalswitches) then
  417. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  418. taicpu(p).changeopsize(S_L);
  419. taicpu(hp1).loadConst(
  420. 0, taicpu(hp1).oper[0]^.val and $ffff);
  421. end;
  422. S_BW:
  423. begin
  424. if (cs_asm_source in current_settings.globalswitches) then
  425. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  426. taicpu(p).changeopsize(S_W);
  427. taicpu(hp1).loadConst(
  428. 0, taicpu(hp1).oper[0]^.val and $ff);
  429. end;
  430. S_BQ:
  431. begin
  432. if (cs_asm_source in current_settings.globalswitches) then
  433. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  434. taicpu(p).changeopsize(S_Q);
  435. taicpu(hp1).loadConst(
  436. 0, taicpu(hp1).oper[0]^.val and $ff);
  437. end;
  438. S_WQ:
  439. begin
  440. if (cs_asm_source in current_settings.globalswitches) then
  441. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  442. taicpu(p).changeopsize(S_Q);
  443. taicpu(hp1).loadConst(
  444. 0, taicpu(hp1).oper[0]^.val and $ffff);
  445. end;
  446. S_LQ:
  447. begin
  448. if (cs_asm_source in current_settings.globalswitches) then
  449. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  450. taicpu(p).changeopsize(S_Q);
  451. taicpu(hp1).loadConst(
  452. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  453. end;
  454. end;
  455. end;
  456. end;
  457. end;
  458. end;
  459. end;
  460. end;
  461. end;
  462. end;
  463. begin
  464. casmoptimizer := TCpuAsmOptimizer;
  465. end.