aoptcpu.pas 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cpubase, aasmtai, aopt, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. end;
  26. implementation
  27. uses
  28. globtype, globals,
  29. cutils,
  30. verbose,
  31. cgbase, cgutils,
  32. aoptobj,
  33. aasmbase, aasmdata, aasmcpu,
  34. itcpugas;
  35. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  36. begin
  37. isFoldableArithOp := False;
  38. case hp1.opcode of
  39. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  40. isFoldableArithOp :=
  41. (taicpu(hp1).oper[1]^.typ = top_reg) and
  42. (taicpu(hp1).oper[1]^.reg = reg) and
  43. ((taicpu(hp1).oper[0]^.typ = top_const) or
  44. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  45. (taicpu(hp1).oper[0]^.reg<>reg)));
  46. A_INC, A_DEC:
  47. isFoldableArithOp :=
  48. (taicpu(hp1).oper[0]^.typ = top_reg) and
  49. (taicpu(hp1).oper[0]^.reg = reg);
  50. end;
  51. end;
  52. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  53. var
  54. next1: tai;
  55. hp1, hp2: tai;
  56. GetNextIntruction_p : boolean;
  57. TmpUsedRegs : TAllUsedRegs;
  58. begin
  59. Result := False;
  60. case p.typ of
  61. ait_instruction:
  62. begin
  63. case taicpu(p).opcode of
  64. A_AND:
  65. begin
  66. if (taicpu(p).oper[0]^.typ = top_const) and
  67. (taicpu(p).oper[1]^.typ = top_reg) and
  68. GetNextInstruction(p, hp1) and
  69. (tai(hp1).typ = ait_instruction) and
  70. (taicpu(hp1).opcode = A_AND) and
  71. (taicpu(hp1).oper[0]^.typ = top_const) and
  72. (taicpu(hp1).oper[1]^.typ = top_reg) and
  73. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  74. taicpu(hp1).oper[1]^.reg)) and
  75. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  76. taicpu(hp1).oper[1]^.reg)) and
  77. (abs(taicpu(p).oper[0]^.val and
  78. taicpu(hp1).oper[0]^.val)<$80000000) then
  79. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  80. begin
  81. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  82. taicpu(hp1).oper[0]^.val);
  83. if (cs_asm_source in current_settings.globalswitches) then
  84. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var1')),p);
  85. asml.remove(p);
  86. p.Free;
  87. p:=hp1;
  88. end;
  89. (* else
  90. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  91. jump, but only if it's a conditional jump (PFV) }
  92. if (taicpu(p).oper[1]^.typ = top_reg) and
  93. GetNextInstruction(p, hp1) and
  94. (hp1.typ = ait_instruction) and
  95. (taicpu(hp1).is_jmp) and
  96. (taicpu(hp1).opcode<>A_JMP) and
  97. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  98. taicpu(p).opcode := A_TEST;*)
  99. end;
  100. A_MOV:
  101. { removes superfluous And's after mov's }
  102. begin
  103. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  104. exit;
  105. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  106. if (taicpu(p).oper[1]^.typ = top_reg) and
  107. GetNextIntruction_p and
  108. (tai(hp1).typ = ait_instruction) and
  109. (taicpu(hp1).opcode = A_AND) and
  110. (taicpu(hp1).oper[0]^.typ = top_const) and
  111. (taicpu(hp1).oper[1]^.typ = top_reg) and
  112. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  113. case taicpu(p).opsize Of
  114. S_L:
  115. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  116. begin
  117. if (cs_asm_source in current_settings.globalswitches) then
  118. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2a')),p);
  119. asml.remove(hp1);
  120. hp1.free;
  121. end;
  122. end
  123. else if (taicpu(p).oper[1]^.typ = top_reg) and
  124. GetNextIntruction_p and
  125. (hp1.typ = ait_instruction) and
  126. GetNextInstruction(hp1, hp2) and
  127. (hp2.typ = ait_instruction) and
  128. (taicpu(hp2).opcode = A_MOV) and
  129. (taicpu(hp2).oper[0]^.typ = top_reg) and
  130. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  131. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  132. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  133. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  134. ) then
  135. { change movsX/movzX reg/ref, reg2 }
  136. { add/sub/or/... reg3/$const, reg2 }
  137. { mov reg2 reg/ref }
  138. { to add/sub/or/... reg3/$const, reg/ref }
  139. begin
  140. CopyUsedRegs(TmpUsedRegs);
  141. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  142. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  143. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  144. begin
  145. { by example:
  146. movswl %si,%eax movswl %si,%eax p
  147. decl %eax addl %edx,%eax hp1
  148. movw %ax,%si movw %ax,%si hp2
  149. ->
  150. movswl %si,%eax movswl %si,%eax p
  151. decw %eax addw %edx,%eax hp1
  152. movw %ax,%si movw %ax,%si hp2
  153. }
  154. if (cs_asm_source in current_settings.globalswitches) then
  155. begin
  156. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  157. asml.insertbefore(tai_comment.create(strpnew('P='+std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize])),p);
  158. asml.insertbefore(tai_comment.create(strpnew('HP1='+std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize])),p);
  159. asml.insertbefore(tai_comment.create(strpnew('HP2='+std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize])),p);
  160. end;
  161. taicpu(hp1).changeopsize(taicpu(p).opsize);
  162. {
  163. ->
  164. movswl %si,%eax movswl %si,%eax p
  165. decw %si addw %dx,%si hp1
  166. movw %ax,%si movw %ax,%si hp2
  167. }
  168. case taicpu(hp1).ops of
  169. 1:
  170. begin
  171. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  172. if taicpu(hp1).oper[0]^.typ=top_reg then
  173. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  174. end;
  175. 2:
  176. begin
  177. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  178. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  179. (taicpu(hp1).opcode<>A_SHL) and
  180. (taicpu(hp1).opcode<>A_SHR) and
  181. (taicpu(hp1).opcode<>A_SAR) then
  182. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  183. end;
  184. else
  185. internalerror(2008042701);
  186. end;
  187. {
  188. ->
  189. decw %si addw %dx,%si p
  190. }
  191. if (cs_asm_source in current_settings.globalswitches) then
  192. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  193. asml.remove(p);
  194. asml.remove(hp2);
  195. p.Free;
  196. hp2.Free;
  197. p := hp1;
  198. end;
  199. ReleaseUsedRegs(TmpUsedRegs);
  200. end
  201. end;
  202. A_MOVSX,
  203. A_MOVZX:
  204. begin
  205. if (taicpu(p).oper[1]^.typ = top_reg) and
  206. GetNextInstruction(p, hp1) and
  207. (hp1.typ = ait_instruction) and
  208. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  209. GetNextInstruction(hp1, hp2) and
  210. (hp2.typ = ait_instruction) and
  211. (taicpu(hp2).opcode = A_MOV) and
  212. (taicpu(hp2).oper[0]^.typ = top_reg) and
  213. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  214. { change movsX/movzX reg/ref, reg2 }
  215. { add/sub/or/... reg3/$const, reg2 }
  216. { mov reg2 reg/ref }
  217. { to add/sub/or/... reg3/$const, reg/ref }
  218. begin
  219. { by example:
  220. movswl %si,%eax movswl %si,%eax p
  221. decl %eax addl %edx,%eax hp1
  222. movw %ax,%si movw %ax,%si hp2
  223. ->
  224. movswl %si,%eax movswl %si,%eax p
  225. decw %eax addw %edx,%eax hp1
  226. movw %ax,%si movw %ax,%si hp2
  227. }
  228. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  229. {
  230. ->
  231. movswl %si,%eax movswl %si,%eax p
  232. decw %si addw %dx,%si hp1
  233. movw %ax,%si movw %ax,%si hp2
  234. }
  235. case taicpu(hp1).ops of
  236. 1:
  237. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  238. 2:
  239. begin
  240. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  241. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  242. setsubreg(taicpu(hp1).oper[0]^.reg,
  243. getsubreg(taicpu(hp2).oper[0]^.reg));
  244. end;
  245. else
  246. internalerror(2008042701);
  247. end;
  248. {
  249. ->
  250. decw %si addw %dx,%si p
  251. }
  252. if (cs_asm_source in current_settings.globalswitches) then
  253. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  254. asml.remove(p);
  255. asml.remove(hp2);
  256. p.Free;
  257. hp2.Free;
  258. p := hp1;
  259. end
  260. { removes superfluous And's after movzx's }
  261. else if taicpu(p).opcode = A_MOVZX then
  262. begin
  263. if (taicpu(p).oper[1]^.typ = top_reg) and
  264. GetNextInstruction(p, hp1) and
  265. (tai(hp1).typ = ait_instruction) and
  266. (taicpu(hp1).opcode = A_AND) and
  267. (taicpu(hp1).oper[0]^.typ = top_const) and
  268. (taicpu(hp1).oper[1]^.typ = top_reg) and
  269. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  270. begin
  271. case taicpu(p).opsize of
  272. S_BL, S_BW, S_BQ:
  273. if (taicpu(hp1).oper[0]^.val = $ff) then
  274. begin
  275. if (cs_asm_source in current_settings.globalswitches) then
  276. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  277. asml.remove(hp1);
  278. hp1.Free;
  279. end;
  280. S_WL, S_WQ:
  281. if (taicpu(hp1).oper[0]^.val = $ffff) then
  282. begin
  283. if (cs_asm_source in current_settings.globalswitches) then
  284. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  285. asml.remove(hp1);
  286. hp1.Free;
  287. end;
  288. S_LQ:
  289. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  290. begin
  291. if (cs_asm_source in current_settings.globalswitches) then
  292. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  293. asml.remove(hp1);
  294. hp1.Free;
  295. end;
  296. end;
  297. end;
  298. { changes some movzx constructs to faster synonims (all examples
  299. are given with eax/ax, but are also valid for other registers)}
  300. if (taicpu(p).oper[1]^.typ = top_reg) then
  301. if (taicpu(p).oper[0]^.typ = top_reg) then
  302. case taicpu(p).opsize of
  303. S_BW:
  304. begin
  305. if (getsupreg(taicpu(p).oper[0]^.reg) =
  306. getsupreg(taicpu(p).oper[1]^.reg)) and not
  307. (cs_opt_size in current_settings.optimizerswitches) then
  308. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  309. begin
  310. taicpu(p).opcode := A_AND;
  311. taicpu(p).changeopsize(S_W);
  312. taicpu(p).loadConst(0, $ff);
  313. if (cs_asm_source in current_settings.globalswitches) then
  314. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  315. end
  316. else if GetNextInstruction(p, hp1) and
  317. (tai(hp1).typ = ait_instruction) and
  318. (taicpu(hp1).opcode = A_AND) and
  319. (taicpu(hp1).oper[0]^.typ = top_const) and
  320. (taicpu(hp1).oper[1]^.typ = top_reg) and
  321. (taicpu(hp1).oper[1]^.reg =
  322. taicpu(p).oper[1]^.reg) then
  323. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  324. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  325. begin
  326. if (cs_asm_source in current_settings.globalswitches) then
  327. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  328. taicpu(p).opcode := A_MOV;
  329. taicpu(p).changeopsize(S_W);
  330. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  331. taicpu(hp1).loadConst(
  332. 0, taicpu(hp1).oper[0]^.val and $ff);
  333. end;
  334. end;
  335. S_BL:
  336. begin
  337. if (getsupreg(taicpu(p).oper[0]^.reg) =
  338. getsupreg(taicpu(p).oper[1]^.reg)) and not
  339. (cs_opt_size in current_settings.optimizerswitches) then
  340. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  341. begin
  342. if (cs_asm_source in current_settings.globalswitches) then
  343. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  344. taicpu(p).opcode := A_AND;
  345. taicpu(p).changeopsize(S_L);
  346. taicpu(p).loadConst(0, $ff);
  347. end
  348. else if GetNextInstruction(p, hp1) and
  349. (tai(hp1).typ = ait_instruction) and
  350. (taicpu(hp1).opcode = A_AND) and
  351. (taicpu(hp1).oper[0]^.typ = top_const) and
  352. (taicpu(hp1).oper[1]^.typ = top_reg) and
  353. (taicpu(hp1).oper[1]^.reg =
  354. taicpu(p).oper[1]^.reg) then
  355. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  356. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  357. begin
  358. if (cs_asm_source in current_settings.globalswitches) then
  359. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  360. taicpu(p).opcode := A_MOV;
  361. taicpu(p).changeopsize(S_L);
  362. { do not use R_SUBWHOLE
  363. as movl %rdx,%eax
  364. is invalid in assembler PM }
  365. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  366. taicpu(hp1).loadConst(
  367. 0, taicpu(hp1).oper[0]^.val and $ff);
  368. end;
  369. end;
  370. S_WL:
  371. begin
  372. if (getsupreg(taicpu(p).oper[0]^.reg) =
  373. getsupreg(taicpu(p).oper[1]^.reg)) and not
  374. (cs_opt_size in current_settings.optimizerswitches) then
  375. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  376. begin
  377. if (cs_asm_source in current_settings.globalswitches) then
  378. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  379. taicpu(p).opcode := A_AND;
  380. taicpu(p).changeopsize(S_L);
  381. taicpu(p).loadConst(0, $ffff);
  382. end
  383. else if GetNextInstruction(p, hp1) and
  384. (tai(hp1).typ = ait_instruction) and
  385. (taicpu(hp1).opcode = A_AND) and
  386. (taicpu(hp1).oper[0]^.typ = top_const) and
  387. (taicpu(hp1).oper[1]^.typ = top_reg) and
  388. (taicpu(hp1).oper[1]^.reg =
  389. taicpu(p).oper[1]^.reg) then
  390. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  391. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  392. begin
  393. if (cs_asm_source in current_settings.globalswitches) then
  394. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  395. taicpu(p).opcode := A_MOV;
  396. taicpu(p).changeopsize(S_L);
  397. { do not use R_SUBWHOLE
  398. as movl %rdx,%eax
  399. is invalid in assembler PM }
  400. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  401. taicpu(hp1).loadConst(
  402. 0, taicpu(hp1).oper[0]^.val and $ffff);
  403. end;
  404. end;
  405. end
  406. else if (taicpu(p).oper[0]^.typ = top_ref) then
  407. begin
  408. if GetNextInstruction(p, hp1) and
  409. (tai(hp1).typ = ait_instruction) and
  410. (taicpu(hp1).opcode = A_AND) and
  411. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  412. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  413. (taicpu(hp1).oper[1]^.reg =
  414. taicpu(p).oper[1]^.reg) then
  415. begin
  416. taicpu(p).opcode := A_MOV;
  417. case taicpu(p).opsize of
  418. S_BL:
  419. begin
  420. if (cs_asm_source in current_settings.globalswitches) then
  421. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  422. taicpu(p).changeopsize(S_L);
  423. taicpu(hp1).loadConst(
  424. 0, taicpu(hp1).oper[0]^.val and $ff);
  425. end;
  426. S_WL:
  427. begin
  428. if (cs_asm_source in current_settings.globalswitches) then
  429. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  430. taicpu(p).changeopsize(S_L);
  431. taicpu(hp1).loadConst(
  432. 0, taicpu(hp1).oper[0]^.val and $ffff);
  433. end;
  434. S_BW:
  435. begin
  436. if (cs_asm_source in current_settings.globalswitches) then
  437. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  438. taicpu(p).changeopsize(S_W);
  439. taicpu(hp1).loadConst(
  440. 0, taicpu(hp1).oper[0]^.val and $ff);
  441. end;
  442. S_BQ:
  443. begin
  444. if (cs_asm_source in current_settings.globalswitches) then
  445. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  446. taicpu(p).changeopsize(S_Q);
  447. taicpu(hp1).loadConst(
  448. 0, taicpu(hp1).oper[0]^.val and $ff);
  449. end;
  450. S_WQ:
  451. begin
  452. if (cs_asm_source in current_settings.globalswitches) then
  453. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  454. taicpu(p).changeopsize(S_Q);
  455. taicpu(hp1).loadConst(
  456. 0, taicpu(hp1).oper[0]^.val and $ffff);
  457. end;
  458. S_LQ:
  459. begin
  460. if (cs_asm_source in current_settings.globalswitches) then
  461. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  462. taicpu(p).changeopsize(S_Q);
  463. taicpu(hp1).loadConst(
  464. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  465. end;
  466. end;
  467. end;
  468. end;
  469. end;
  470. end;
  471. end;
  472. end;
  473. end;
  474. end;
  475. begin
  476. casmoptimizer := TCpuAsmOptimizer;
  477. end.