aoptcpu.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cgbase, cpubase, aasmtai, aopt, aoptx86, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. function PostPeepHoleOptsCpu(var p : tai) : boolean; override;
  26. end;
  27. implementation
  28. uses
  29. globtype, globals,
  30. cutils,
  31. verbose,
  32. cgutils,
  33. aoptobj,
  34. aasmbase, aasmdata, aasmcpu,
  35. itcpugas;
  36. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  37. begin
  38. isFoldableArithOp := False;
  39. case hp1.opcode of
  40. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  41. isFoldableArithOp :=
  42. (taicpu(hp1).oper[1]^.typ = top_reg) and
  43. (taicpu(hp1).oper[1]^.reg = reg) and
  44. ((taicpu(hp1).oper[0]^.typ = top_const) or
  45. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  46. (taicpu(hp1).oper[0]^.reg<>reg)));
  47. A_INC, A_DEC:
  48. isFoldableArithOp :=
  49. (taicpu(hp1).oper[0]^.typ = top_reg) and
  50. (taicpu(hp1).oper[0]^.reg = reg);
  51. end;
  52. end;
  53. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  54. var
  55. hp1, hp2: tai;
  56. GetNextIntruction_p : boolean;
  57. TmpUsedRegs : TAllUsedRegs;
  58. begin
  59. Result := False;
  60. case p.typ of
  61. ait_instruction:
  62. begin
  63. case taicpu(p).opcode of
  64. A_AND:
  65. begin
  66. if (taicpu(p).oper[0]^.typ = top_const) and
  67. (taicpu(p).oper[1]^.typ = top_reg) and
  68. GetNextInstruction(p, hp1) and
  69. (tai(hp1).typ = ait_instruction) and
  70. (taicpu(hp1).opcode = A_AND) and
  71. (taicpu(hp1).oper[0]^.typ = top_const) and
  72. (taicpu(hp1).oper[1]^.typ = top_reg) and
  73. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  74. taicpu(hp1).oper[1]^.reg)) and
  75. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  76. taicpu(hp1).oper[1]^.reg)) and
  77. (abs(taicpu(p).oper[0]^.val and
  78. taicpu(hp1).oper[0]^.val)<$80000000) then
  79. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  80. begin
  81. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  82. taicpu(hp1).oper[0]^.val);
  83. if (cs_asm_source in current_settings.globalswitches) then
  84. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var1')),p);
  85. asml.remove(p);
  86. p.Free;
  87. p:=hp1;
  88. end
  89. else if (taicpu(p).oper[0]^.typ = top_const) and
  90. (taicpu(p).oper[1]^.typ = top_reg) and
  91. GetNextInstruction(p, hp1) and
  92. MatchInstruction(hp1,A_MOVZX,[]) and
  93. (taicpu(hp1).oper[0]^.typ = top_reg) and
  94. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  95. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  96. (((taicpu(p).opsize=S_W) and
  97. (taicpu(hp1).opsize=S_BW)) or
  98. ((taicpu(p).opsize=S_L) and
  99. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  100. ((taicpu(p).opsize=S_Q) and
  101. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  102. ) then
  103. begin
  104. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  105. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)) or
  106. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  107. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
  108. (((taicpu(hp1).opsize)=S_LQ) and
  109. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  110. ) then
  111. begin
  112. if (cs_asm_source in current_settings.globalswitches) then
  113. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovzToAnd')),p);
  114. asml.remove(hp1);
  115. hp1.free;
  116. end;
  117. end
  118. else if (taicpu(p).oper[0]^.typ = top_const) and
  119. (taicpu(p).oper[1]^.typ = top_reg) and
  120. GetNextInstruction(p, hp1) and
  121. MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
  122. (taicpu(hp1).oper[0]^.typ = top_reg) and
  123. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  124. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  125. (((taicpu(p).opsize=S_W) and
  126. (taicpu(hp1).opsize=S_BW)) or
  127. ((taicpu(p).opsize=S_L) and
  128. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  129. ((taicpu(p).opsize=S_Q) and
  130. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  131. ) then
  132. begin
  133. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  134. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)) or
  135. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  136. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)) or
  137. (((taicpu(hp1).opsize)=S_LQ) and
  138. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  139. ) then
  140. begin
  141. if (cs_asm_source in current_settings.globalswitches) then
  142. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovsxToAnd')),p);
  143. asml.remove(hp1);
  144. hp1.free;
  145. end;
  146. end;
  147. (* else
  148. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  149. jump, but only if it's a conditional jump (PFV) }
  150. if (taicpu(p).oper[1]^.typ = top_reg) and
  151. GetNextInstruction(p, hp1) and
  152. (hp1.typ = ait_instruction) and
  153. (taicpu(hp1).is_jmp) and
  154. (taicpu(hp1).opcode<>A_JMP) and
  155. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  156. taicpu(p).opcode := A_TEST;*)
  157. end;
  158. A_MOV:
  159. { removes superfluous And's after mov's }
  160. begin
  161. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  162. exit;
  163. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  164. if (taicpu(p).oper[1]^.typ = top_reg) and
  165. GetNextIntruction_p and
  166. (tai(hp1).typ = ait_instruction) and
  167. (taicpu(hp1).opcode = A_AND) and
  168. (taicpu(hp1).oper[0]^.typ = top_const) and
  169. (taicpu(hp1).oper[1]^.typ = top_reg) and
  170. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  171. case taicpu(p).opsize Of
  172. S_L:
  173. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  174. begin
  175. if (cs_asm_source in current_settings.globalswitches) then
  176. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2a')),p);
  177. asml.remove(hp1);
  178. hp1.free;
  179. end;
  180. end
  181. { Next instruction is also a MOV ? }
  182. else if GetNextIntruction_p and
  183. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  184. begin
  185. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  186. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  187. {mov reg1, mem1 or mov mem1, reg1
  188. mov mem2, reg2 mov reg2, mem2}
  189. begin
  190. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  191. {mov reg1, mem1 or mov mem1, reg1
  192. mov mem2, reg1 mov reg2, mem1}
  193. begin
  194. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  195. { Removes the second statement from
  196. mov reg1, mem1/reg2
  197. mov mem1/reg2, reg1 }
  198. begin
  199. { if (taicpu(p).oper[0]^.typ = top_reg) then
  200. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); }
  201. if (cs_asm_source in current_settings.globalswitches) then
  202. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2Mov1')),p);
  203. asml.remove(hp1);
  204. hp1.free;
  205. end;
  206. end
  207. else if (taicpu(p).oper[1]^.typ=top_ref) and
  208. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  209. begin
  210. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  211. if (cs_asm_source in current_settings.globalswitches) then
  212. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2MovMov1')),p);
  213. end;
  214. end
  215. end
  216. else if (taicpu(p).oper[1]^.typ = top_reg) and
  217. GetNextIntruction_p and
  218. (hp1.typ = ait_instruction) and
  219. GetNextInstruction(hp1, hp2) and
  220. (hp2.typ = ait_instruction) and
  221. (taicpu(hp2).opcode = A_MOV) and
  222. (taicpu(hp2).oper[0]^.typ = top_reg) and
  223. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  224. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  225. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  226. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  227. ) then
  228. { change movsX/movzX reg/ref, reg2 }
  229. { add/sub/or/... reg3/$const, reg2 }
  230. { mov reg2 reg/ref }
  231. { to add/sub/or/... reg3/$const, reg/ref }
  232. begin
  233. CopyUsedRegs(TmpUsedRegs);
  234. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  235. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  236. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  237. begin
  238. { by example:
  239. movswl %si,%eax movswl %si,%eax p
  240. decl %eax addl %edx,%eax hp1
  241. movw %ax,%si movw %ax,%si hp2
  242. ->
  243. movswl %si,%eax movswl %si,%eax p
  244. decw %eax addw %edx,%eax hp1
  245. movw %ax,%si movw %ax,%si hp2
  246. }
  247. if (cs_asm_source in current_settings.globalswitches) then
  248. begin
  249. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  250. asml.insertbefore(tai_comment.create(strpnew('P='+std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize])),p);
  251. asml.insertbefore(tai_comment.create(strpnew('HP1='+std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize])),p);
  252. asml.insertbefore(tai_comment.create(strpnew('HP2='+std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize])),p);
  253. end;
  254. taicpu(hp1).changeopsize(taicpu(p).opsize);
  255. {
  256. ->
  257. movswl %si,%eax movswl %si,%eax p
  258. decw %si addw %dx,%si hp1
  259. movw %ax,%si movw %ax,%si hp2
  260. }
  261. case taicpu(hp1).ops of
  262. 1:
  263. begin
  264. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  265. if taicpu(hp1).oper[0]^.typ=top_reg then
  266. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  267. end;
  268. 2:
  269. begin
  270. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  271. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  272. (taicpu(hp1).opcode<>A_SHL) and
  273. (taicpu(hp1).opcode<>A_SHR) and
  274. (taicpu(hp1).opcode<>A_SAR) then
  275. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  276. end;
  277. else
  278. internalerror(2008042701);
  279. end;
  280. {
  281. ->
  282. decw %si addw %dx,%si p
  283. }
  284. if (cs_asm_source in current_settings.globalswitches) then
  285. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  286. asml.remove(p);
  287. asml.remove(hp2);
  288. p.Free;
  289. hp2.Free;
  290. p := hp1;
  291. end;
  292. ReleaseUsedRegs(TmpUsedRegs);
  293. end
  294. end;
  295. A_MOVSX,
  296. A_MOVZX:
  297. begin
  298. if (taicpu(p).oper[1]^.typ = top_reg) and
  299. GetNextInstruction(p, hp1) and
  300. (hp1.typ = ait_instruction) and
  301. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  302. GetNextInstruction(hp1, hp2) and
  303. (hp2.typ = ait_instruction) and
  304. (taicpu(hp2).opcode = A_MOV) and
  305. (taicpu(hp2).oper[0]^.typ = top_reg) and
  306. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  307. { change movsX/movzX reg/ref, reg2 }
  308. { add/sub/or/... reg3/$const, reg2 }
  309. { mov reg2 reg/ref }
  310. { to add/sub/or/... reg3/$const, reg/ref }
  311. begin
  312. { by example:
  313. movswl %si,%eax movswl %si,%eax p
  314. decl %eax addl %edx,%eax hp1
  315. movw %ax,%si movw %ax,%si hp2
  316. ->
  317. movswl %si,%eax movswl %si,%eax p
  318. decw %eax addw %edx,%eax hp1
  319. movw %ax,%si movw %ax,%si hp2
  320. }
  321. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  322. {
  323. ->
  324. movswl %si,%eax movswl %si,%eax p
  325. decw %si addw %dx,%si hp1
  326. movw %ax,%si movw %ax,%si hp2
  327. }
  328. case taicpu(hp1).ops of
  329. 1:
  330. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  331. 2:
  332. begin
  333. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  334. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  335. setsubreg(taicpu(hp1).oper[0]^.reg,
  336. getsubreg(taicpu(hp2).oper[0]^.reg));
  337. end;
  338. else
  339. internalerror(2008042701);
  340. end;
  341. {
  342. ->
  343. decw %si addw %dx,%si p
  344. }
  345. if (cs_asm_source in current_settings.globalswitches) then
  346. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  347. asml.remove(p);
  348. asml.remove(hp2);
  349. p.Free;
  350. hp2.Free;
  351. p := hp1;
  352. end
  353. { removes superfluous And's after movzx's }
  354. else if taicpu(p).opcode = A_MOVZX then
  355. begin
  356. if (taicpu(p).oper[1]^.typ = top_reg) and
  357. GetNextInstruction(p, hp1) and
  358. (tai(hp1).typ = ait_instruction) and
  359. (taicpu(hp1).opcode = A_AND) and
  360. (taicpu(hp1).oper[0]^.typ = top_const) and
  361. (taicpu(hp1).oper[1]^.typ = top_reg) and
  362. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  363. begin
  364. case taicpu(p).opsize of
  365. S_BL, S_BW, S_BQ:
  366. if (taicpu(hp1).oper[0]^.val = $ff) then
  367. begin
  368. if (cs_asm_source in current_settings.globalswitches) then
  369. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  370. asml.remove(hp1);
  371. hp1.Free;
  372. end;
  373. S_WL, S_WQ:
  374. if (taicpu(hp1).oper[0]^.val = $ffff) then
  375. begin
  376. if (cs_asm_source in current_settings.globalswitches) then
  377. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  378. asml.remove(hp1);
  379. hp1.Free;
  380. end;
  381. S_LQ:
  382. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  383. begin
  384. if (cs_asm_source in current_settings.globalswitches) then
  385. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  386. asml.remove(hp1);
  387. hp1.Free;
  388. end;
  389. end;
  390. end;
  391. { changes some movzx constructs to faster synonims (all examples
  392. are given with eax/ax, but are also valid for other registers)}
  393. if (taicpu(p).oper[1]^.typ = top_reg) then
  394. if (taicpu(p).oper[0]^.typ = top_reg) then
  395. case taicpu(p).opsize of
  396. S_BW:
  397. begin
  398. if (getsupreg(taicpu(p).oper[0]^.reg) =
  399. getsupreg(taicpu(p).oper[1]^.reg)) and not
  400. (cs_opt_size in current_settings.optimizerswitches) then
  401. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  402. begin
  403. taicpu(p).opcode := A_AND;
  404. taicpu(p).changeopsize(S_W);
  405. taicpu(p).loadConst(0, $ff);
  406. if (cs_asm_source in current_settings.globalswitches) then
  407. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  408. end
  409. else if GetNextInstruction(p, hp1) and
  410. (tai(hp1).typ = ait_instruction) and
  411. (taicpu(hp1).opcode = A_AND) and
  412. (taicpu(hp1).oper[0]^.typ = top_const) and
  413. (taicpu(hp1).oper[1]^.typ = top_reg) and
  414. (taicpu(hp1).oper[1]^.reg =
  415. taicpu(p).oper[1]^.reg) then
  416. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  417. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  418. begin
  419. if (cs_asm_source in current_settings.globalswitches) then
  420. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  421. taicpu(p).opcode := A_MOV;
  422. taicpu(p).changeopsize(S_W);
  423. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  424. taicpu(hp1).loadConst(
  425. 0, taicpu(hp1).oper[0]^.val and $ff);
  426. end;
  427. end;
  428. S_BL:
  429. begin
  430. if (getsupreg(taicpu(p).oper[0]^.reg) =
  431. getsupreg(taicpu(p).oper[1]^.reg)) and not
  432. (cs_opt_size in current_settings.optimizerswitches) then
  433. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  434. begin
  435. if (cs_asm_source in current_settings.globalswitches) then
  436. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  437. taicpu(p).opcode := A_AND;
  438. taicpu(p).changeopsize(S_L);
  439. taicpu(p).loadConst(0, $ff);
  440. end
  441. else if GetNextInstruction(p, hp1) and
  442. (tai(hp1).typ = ait_instruction) and
  443. (taicpu(hp1).opcode = A_AND) and
  444. (taicpu(hp1).oper[0]^.typ = top_const) and
  445. (taicpu(hp1).oper[1]^.typ = top_reg) and
  446. (taicpu(hp1).oper[1]^.reg =
  447. taicpu(p).oper[1]^.reg) then
  448. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  449. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  450. begin
  451. if (cs_asm_source in current_settings.globalswitches) then
  452. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  453. taicpu(p).opcode := A_MOV;
  454. taicpu(p).changeopsize(S_L);
  455. { do not use R_SUBWHOLE
  456. as movl %rdx,%eax
  457. is invalid in assembler PM }
  458. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  459. taicpu(hp1).loadConst(
  460. 0, taicpu(hp1).oper[0]^.val and $ff);
  461. end;
  462. end;
  463. S_WL:
  464. begin
  465. if (getsupreg(taicpu(p).oper[0]^.reg) =
  466. getsupreg(taicpu(p).oper[1]^.reg)) and not
  467. (cs_opt_size in current_settings.optimizerswitches) then
  468. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  469. begin
  470. if (cs_asm_source in current_settings.globalswitches) then
  471. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  472. taicpu(p).opcode := A_AND;
  473. taicpu(p).changeopsize(S_L);
  474. taicpu(p).loadConst(0, $ffff);
  475. end
  476. else if GetNextInstruction(p, hp1) and
  477. (tai(hp1).typ = ait_instruction) and
  478. (taicpu(hp1).opcode = A_AND) and
  479. (taicpu(hp1).oper[0]^.typ = top_const) and
  480. (taicpu(hp1).oper[1]^.typ = top_reg) and
  481. (taicpu(hp1).oper[1]^.reg =
  482. taicpu(p).oper[1]^.reg) then
  483. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  484. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  485. begin
  486. if (cs_asm_source in current_settings.globalswitches) then
  487. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  488. taicpu(p).opcode := A_MOV;
  489. taicpu(p).changeopsize(S_L);
  490. { do not use R_SUBWHOLE
  491. as movl %rdx,%eax
  492. is invalid in assembler PM }
  493. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  494. taicpu(hp1).loadConst(
  495. 0, taicpu(hp1).oper[0]^.val and $ffff);
  496. end;
  497. end;
  498. end
  499. else if (taicpu(p).oper[0]^.typ = top_ref) then
  500. begin
  501. if GetNextInstruction(p, hp1) and
  502. (tai(hp1).typ = ait_instruction) and
  503. (taicpu(hp1).opcode = A_AND) and
  504. MatchOpType(taicpu(hp1),top_const,top_reg) and
  505. (taicpu(hp1).oper[1]^.reg =
  506. taicpu(p).oper[1]^.reg) then
  507. begin
  508. taicpu(p).opcode := A_MOV;
  509. case taicpu(p).opsize of
  510. S_BL:
  511. begin
  512. if (cs_asm_source in current_settings.globalswitches) then
  513. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  514. taicpu(p).changeopsize(S_L);
  515. taicpu(hp1).loadConst(
  516. 0, taicpu(hp1).oper[0]^.val and $ff);
  517. end;
  518. S_WL:
  519. begin
  520. if (cs_asm_source in current_settings.globalswitches) then
  521. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  522. taicpu(p).changeopsize(S_L);
  523. taicpu(hp1).loadConst(
  524. 0, taicpu(hp1).oper[0]^.val and $ffff);
  525. end;
  526. S_BW:
  527. begin
  528. if (cs_asm_source in current_settings.globalswitches) then
  529. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  530. taicpu(p).changeopsize(S_W);
  531. taicpu(hp1).loadConst(
  532. 0, taicpu(hp1).oper[0]^.val and $ff);
  533. end;
  534. S_BQ:
  535. begin
  536. if (cs_asm_source in current_settings.globalswitches) then
  537. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  538. taicpu(p).changeopsize(S_Q);
  539. taicpu(hp1).loadConst(
  540. 0, taicpu(hp1).oper[0]^.val and $ff);
  541. end;
  542. S_WQ:
  543. begin
  544. if (cs_asm_source in current_settings.globalswitches) then
  545. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  546. taicpu(p).changeopsize(S_Q);
  547. taicpu(hp1).loadConst(
  548. 0, taicpu(hp1).oper[0]^.val and $ffff);
  549. end;
  550. S_LQ:
  551. begin
  552. if (cs_asm_source in current_settings.globalswitches) then
  553. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  554. taicpu(p).changeopsize(S_Q);
  555. taicpu(hp1).loadConst(
  556. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  557. end;
  558. end;
  559. end;
  560. end;
  561. end;
  562. end;
  563. A_VMOVAPS,
  564. A_VMOVAPD:
  565. result:=OptPass1VMOVAP(p);
  566. A_VDIVSD,
  567. A_VDIVSS,
  568. A_VSUBSD,
  569. A_VSUBSS,
  570. A_VMULSD,
  571. A_VMULSS,
  572. A_VADDSD,
  573. A_VADDSS:
  574. begin
  575. if GetNextInstruction(p,hp1) and
  576. { we mix single and double opperations here because we assume that the compiler
  577. generates vmovapd only after double operations and vmovaps only after single operations }
  578. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  579. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  580. (taicpu(hp1).oper[1]^.typ=top_reg) then
  581. begin
  582. CopyUsedRegs(TmpUsedRegs);
  583. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  584. If not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  585. begin
  586. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  587. asml.Remove(hp1);
  588. hp1.Free;
  589. end;
  590. end;
  591. end;
  592. end;
  593. end;
  594. end;
  595. end;
  596. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  597. begin
  598. result := false;
  599. case p.typ of
  600. ait_instruction:
  601. begin
  602. case taicpu(p).opcode of
  603. A_MOV:
  604. PostPeepholeOptMov(p);
  605. end;
  606. end;
  607. end;
  608. end;
  609. begin
  610. casmoptimizer := TCpuAsmOptimizer;
  611. end.