aoptcpu.pas 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cgbase, cpubase, aasmtai, aopt, aoptx86, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. end;
  26. implementation
  27. uses
  28. globtype, globals,
  29. cutils,
  30. verbose,
  31. cgutils,
  32. aoptobj,
  33. aasmbase, aasmdata, aasmcpu,
  34. itcpugas;
  35. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  36. begin
  37. isFoldableArithOp := False;
  38. case hp1.opcode of
  39. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  40. isFoldableArithOp :=
  41. (taicpu(hp1).oper[1]^.typ = top_reg) and
  42. (taicpu(hp1).oper[1]^.reg = reg) and
  43. ((taicpu(hp1).oper[0]^.typ = top_const) or
  44. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  45. (taicpu(hp1).oper[0]^.reg<>reg)));
  46. A_INC, A_DEC:
  47. isFoldableArithOp :=
  48. (taicpu(hp1).oper[0]^.typ = top_reg) and
  49. (taicpu(hp1).oper[0]^.reg = reg);
  50. end;
  51. end;
  52. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  53. var
  54. hp1, hp2: tai;
  55. GetNextIntruction_p : boolean;
  56. TmpUsedRegs : TAllUsedRegs;
  57. begin
  58. Result := False;
  59. case p.typ of
  60. ait_instruction:
  61. begin
  62. case taicpu(p).opcode of
  63. A_AND:
  64. begin
  65. if (taicpu(p).oper[0]^.typ = top_const) and
  66. (taicpu(p).oper[1]^.typ = top_reg) and
  67. GetNextInstruction(p, hp1) and
  68. (tai(hp1).typ = ait_instruction) and
  69. (taicpu(hp1).opcode = A_AND) and
  70. (taicpu(hp1).oper[0]^.typ = top_const) and
  71. (taicpu(hp1).oper[1]^.typ = top_reg) and
  72. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  73. taicpu(hp1).oper[1]^.reg)) and
  74. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  75. taicpu(hp1).oper[1]^.reg)) and
  76. (abs(taicpu(p).oper[0]^.val and
  77. taicpu(hp1).oper[0]^.val)<$80000000) then
  78. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  79. begin
  80. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  81. taicpu(hp1).oper[0]^.val);
  82. if (cs_asm_source in current_settings.globalswitches) then
  83. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var1')),p);
  84. asml.remove(p);
  85. p.Free;
  86. p:=hp1;
  87. end
  88. else if (taicpu(p).oper[0]^.typ = top_const) and
  89. (taicpu(p).oper[1]^.typ = top_reg) and
  90. GetNextInstruction(p, hp1) and
  91. MatchInstruction(hp1,A_MOVZX,[]) and
  92. (taicpu(hp1).oper[0]^.typ = top_reg) and
  93. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  94. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  95. (((taicpu(p).opsize=S_W) and
  96. (taicpu(hp1).opsize=S_BW)) or
  97. ((taicpu(p).opsize=S_L) and
  98. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  99. ((taicpu(p).opsize=S_Q) and
  100. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  101. ) then
  102. begin
  103. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  104. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)) or
  105. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  106. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
  107. (((taicpu(hp1).opsize)=S_LQ) and
  108. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  109. ) then
  110. begin
  111. if (cs_asm_source in current_settings.globalswitches) then
  112. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovzToAnd')),p);
  113. asml.remove(hp1);
  114. hp1.free;
  115. end;
  116. end
  117. else if (taicpu(p).oper[0]^.typ = top_const) and
  118. (taicpu(p).oper[1]^.typ = top_reg) and
  119. GetNextInstruction(p, hp1) and
  120. MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
  121. (taicpu(hp1).oper[0]^.typ = top_reg) and
  122. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  123. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  124. (((taicpu(p).opsize=S_W) and
  125. (taicpu(hp1).opsize=S_BW)) or
  126. ((taicpu(p).opsize=S_L) and
  127. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  128. ((taicpu(p).opsize=S_Q) and
  129. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  130. ) then
  131. begin
  132. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  133. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)) or
  134. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  135. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)) or
  136. (((taicpu(hp1).opsize)=S_LQ) and
  137. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  138. ) then
  139. begin
  140. if (cs_asm_source in current_settings.globalswitches) then
  141. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovsxToAnd')),p);
  142. asml.remove(hp1);
  143. hp1.free;
  144. end;
  145. end;
  146. (* else
  147. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  148. jump, but only if it's a conditional jump (PFV) }
  149. if (taicpu(p).oper[1]^.typ = top_reg) and
  150. GetNextInstruction(p, hp1) and
  151. (hp1.typ = ait_instruction) and
  152. (taicpu(hp1).is_jmp) and
  153. (taicpu(hp1).opcode<>A_JMP) and
  154. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  155. taicpu(p).opcode := A_TEST;*)
  156. end;
  157. A_MOV:
  158. { removes superfluous And's after mov's }
  159. begin
  160. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  161. exit;
  162. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  163. if (taicpu(p).oper[1]^.typ = top_reg) and
  164. GetNextIntruction_p and
  165. (tai(hp1).typ = ait_instruction) and
  166. (taicpu(hp1).opcode = A_AND) and
  167. (taicpu(hp1).oper[0]^.typ = top_const) and
  168. (taicpu(hp1).oper[1]^.typ = top_reg) and
  169. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  170. case taicpu(p).opsize Of
  171. S_L:
  172. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  173. begin
  174. if (cs_asm_source in current_settings.globalswitches) then
  175. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2a')),p);
  176. asml.remove(hp1);
  177. hp1.free;
  178. end;
  179. end
  180. { Next instruction is also a MOV ? }
  181. else if GetNextIntruction_p and
  182. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  183. begin
  184. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  185. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  186. {mov reg1, mem1 or mov mem1, reg1
  187. mov mem2, reg2 mov reg2, mem2}
  188. begin
  189. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  190. {mov reg1, mem1 or mov mem1, reg1
  191. mov mem2, reg1 mov reg2, mem1}
  192. begin
  193. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  194. { Removes the second statement from
  195. mov reg1, mem1/reg2
  196. mov mem1/reg2, reg1 }
  197. begin
  198. { if (taicpu(p).oper[0]^.typ = top_reg) then
  199. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); }
  200. if (cs_asm_source in current_settings.globalswitches) then
  201. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2Mov1')),p);
  202. asml.remove(hp1);
  203. hp1.free;
  204. end;
  205. end
  206. else if (taicpu(p).oper[1]^.typ=top_ref) and
  207. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  208. begin
  209. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  210. if (cs_asm_source in current_settings.globalswitches) then
  211. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2MovMov1')),p);
  212. end;
  213. end
  214. end
  215. else if (taicpu(p).oper[1]^.typ = top_reg) and
  216. GetNextIntruction_p and
  217. (hp1.typ = ait_instruction) and
  218. GetNextInstruction(hp1, hp2) and
  219. (hp2.typ = ait_instruction) and
  220. (taicpu(hp2).opcode = A_MOV) and
  221. (taicpu(hp2).oper[0]^.typ = top_reg) and
  222. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  223. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  224. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  225. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  226. ) then
  227. { change movsX/movzX reg/ref, reg2 }
  228. { add/sub/or/... reg3/$const, reg2 }
  229. { mov reg2 reg/ref }
  230. { to add/sub/or/... reg3/$const, reg/ref }
  231. begin
  232. CopyUsedRegs(TmpUsedRegs);
  233. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  234. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  235. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  236. begin
  237. { by example:
  238. movswl %si,%eax movswl %si,%eax p
  239. decl %eax addl %edx,%eax hp1
  240. movw %ax,%si movw %ax,%si hp2
  241. ->
  242. movswl %si,%eax movswl %si,%eax p
  243. decw %eax addw %edx,%eax hp1
  244. movw %ax,%si movw %ax,%si hp2
  245. }
  246. if (cs_asm_source in current_settings.globalswitches) then
  247. begin
  248. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  249. asml.insertbefore(tai_comment.create(strpnew('P='+std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize])),p);
  250. asml.insertbefore(tai_comment.create(strpnew('HP1='+std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize])),p);
  251. asml.insertbefore(tai_comment.create(strpnew('HP2='+std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize])),p);
  252. end;
  253. taicpu(hp1).changeopsize(taicpu(p).opsize);
  254. {
  255. ->
  256. movswl %si,%eax movswl %si,%eax p
  257. decw %si addw %dx,%si hp1
  258. movw %ax,%si movw %ax,%si hp2
  259. }
  260. case taicpu(hp1).ops of
  261. 1:
  262. begin
  263. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  264. if taicpu(hp1).oper[0]^.typ=top_reg then
  265. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  266. end;
  267. 2:
  268. begin
  269. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  270. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  271. (taicpu(hp1).opcode<>A_SHL) and
  272. (taicpu(hp1).opcode<>A_SHR) and
  273. (taicpu(hp1).opcode<>A_SAR) then
  274. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  275. end;
  276. else
  277. internalerror(2008042701);
  278. end;
  279. {
  280. ->
  281. decw %si addw %dx,%si p
  282. }
  283. if (cs_asm_source in current_settings.globalswitches) then
  284. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  285. asml.remove(p);
  286. asml.remove(hp2);
  287. p.Free;
  288. hp2.Free;
  289. p := hp1;
  290. end;
  291. ReleaseUsedRegs(TmpUsedRegs);
  292. end
  293. end;
  294. A_MOVSX,
  295. A_MOVZX:
  296. begin
  297. if (taicpu(p).oper[1]^.typ = top_reg) and
  298. GetNextInstruction(p, hp1) and
  299. (hp1.typ = ait_instruction) and
  300. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  301. GetNextInstruction(hp1, hp2) and
  302. (hp2.typ = ait_instruction) and
  303. (taicpu(hp2).opcode = A_MOV) and
  304. (taicpu(hp2).oper[0]^.typ = top_reg) and
  305. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  306. { change movsX/movzX reg/ref, reg2 }
  307. { add/sub/or/... reg3/$const, reg2 }
  308. { mov reg2 reg/ref }
  309. { to add/sub/or/... reg3/$const, reg/ref }
  310. begin
  311. { by example:
  312. movswl %si,%eax movswl %si,%eax p
  313. decl %eax addl %edx,%eax hp1
  314. movw %ax,%si movw %ax,%si hp2
  315. ->
  316. movswl %si,%eax movswl %si,%eax p
  317. decw %eax addw %edx,%eax hp1
  318. movw %ax,%si movw %ax,%si hp2
  319. }
  320. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  321. {
  322. ->
  323. movswl %si,%eax movswl %si,%eax p
  324. decw %si addw %dx,%si hp1
  325. movw %ax,%si movw %ax,%si hp2
  326. }
  327. case taicpu(hp1).ops of
  328. 1:
  329. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  330. 2:
  331. begin
  332. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  333. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  334. setsubreg(taicpu(hp1).oper[0]^.reg,
  335. getsubreg(taicpu(hp2).oper[0]^.reg));
  336. end;
  337. else
  338. internalerror(2008042701);
  339. end;
  340. {
  341. ->
  342. decw %si addw %dx,%si p
  343. }
  344. if (cs_asm_source in current_settings.globalswitches) then
  345. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  346. asml.remove(p);
  347. asml.remove(hp2);
  348. p.Free;
  349. hp2.Free;
  350. p := hp1;
  351. end
  352. { removes superfluous And's after movzx's }
  353. else if taicpu(p).opcode = A_MOVZX then
  354. begin
  355. if (taicpu(p).oper[1]^.typ = top_reg) and
  356. GetNextInstruction(p, hp1) and
  357. (tai(hp1).typ = ait_instruction) and
  358. (taicpu(hp1).opcode = A_AND) and
  359. (taicpu(hp1).oper[0]^.typ = top_const) and
  360. (taicpu(hp1).oper[1]^.typ = top_reg) and
  361. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  362. begin
  363. case taicpu(p).opsize of
  364. S_BL, S_BW, S_BQ:
  365. if (taicpu(hp1).oper[0]^.val = $ff) then
  366. begin
  367. if (cs_asm_source in current_settings.globalswitches) then
  368. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  369. asml.remove(hp1);
  370. hp1.Free;
  371. end;
  372. S_WL, S_WQ:
  373. if (taicpu(hp1).oper[0]^.val = $ffff) then
  374. begin
  375. if (cs_asm_source in current_settings.globalswitches) then
  376. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  377. asml.remove(hp1);
  378. hp1.Free;
  379. end;
  380. S_LQ:
  381. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  382. begin
  383. if (cs_asm_source in current_settings.globalswitches) then
  384. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  385. asml.remove(hp1);
  386. hp1.Free;
  387. end;
  388. end;
  389. end;
  390. { changes some movzx constructs to faster synonims (all examples
  391. are given with eax/ax, but are also valid for other registers)}
  392. if (taicpu(p).oper[1]^.typ = top_reg) then
  393. if (taicpu(p).oper[0]^.typ = top_reg) then
  394. case taicpu(p).opsize of
  395. S_BW:
  396. begin
  397. if (getsupreg(taicpu(p).oper[0]^.reg) =
  398. getsupreg(taicpu(p).oper[1]^.reg)) and not
  399. (cs_opt_size in current_settings.optimizerswitches) then
  400. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  401. begin
  402. taicpu(p).opcode := A_AND;
  403. taicpu(p).changeopsize(S_W);
  404. taicpu(p).loadConst(0, $ff);
  405. if (cs_asm_source in current_settings.globalswitches) then
  406. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  407. end
  408. else if GetNextInstruction(p, hp1) and
  409. (tai(hp1).typ = ait_instruction) and
  410. (taicpu(hp1).opcode = A_AND) and
  411. (taicpu(hp1).oper[0]^.typ = top_const) and
  412. (taicpu(hp1).oper[1]^.typ = top_reg) and
  413. (taicpu(hp1).oper[1]^.reg =
  414. taicpu(p).oper[1]^.reg) then
  415. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  416. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  417. begin
  418. if (cs_asm_source in current_settings.globalswitches) then
  419. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  420. taicpu(p).opcode := A_MOV;
  421. taicpu(p).changeopsize(S_W);
  422. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  423. taicpu(hp1).loadConst(
  424. 0, taicpu(hp1).oper[0]^.val and $ff);
  425. end;
  426. end;
  427. S_BL:
  428. begin
  429. if (getsupreg(taicpu(p).oper[0]^.reg) =
  430. getsupreg(taicpu(p).oper[1]^.reg)) and not
  431. (cs_opt_size in current_settings.optimizerswitches) then
  432. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  433. begin
  434. if (cs_asm_source in current_settings.globalswitches) then
  435. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  436. taicpu(p).opcode := A_AND;
  437. taicpu(p).changeopsize(S_L);
  438. taicpu(p).loadConst(0, $ff);
  439. end
  440. else if GetNextInstruction(p, hp1) and
  441. (tai(hp1).typ = ait_instruction) and
  442. (taicpu(hp1).opcode = A_AND) and
  443. (taicpu(hp1).oper[0]^.typ = top_const) and
  444. (taicpu(hp1).oper[1]^.typ = top_reg) and
  445. (taicpu(hp1).oper[1]^.reg =
  446. taicpu(p).oper[1]^.reg) then
  447. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  448. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  449. begin
  450. if (cs_asm_source in current_settings.globalswitches) then
  451. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  452. taicpu(p).opcode := A_MOV;
  453. taicpu(p).changeopsize(S_L);
  454. { do not use R_SUBWHOLE
  455. as movl %rdx,%eax
  456. is invalid in assembler PM }
  457. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  458. taicpu(hp1).loadConst(
  459. 0, taicpu(hp1).oper[0]^.val and $ff);
  460. end;
  461. end;
  462. S_WL:
  463. begin
  464. if (getsupreg(taicpu(p).oper[0]^.reg) =
  465. getsupreg(taicpu(p).oper[1]^.reg)) and not
  466. (cs_opt_size in current_settings.optimizerswitches) then
  467. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  468. begin
  469. if (cs_asm_source in current_settings.globalswitches) then
  470. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  471. taicpu(p).opcode := A_AND;
  472. taicpu(p).changeopsize(S_L);
  473. taicpu(p).loadConst(0, $ffff);
  474. end
  475. else if GetNextInstruction(p, hp1) and
  476. (tai(hp1).typ = ait_instruction) and
  477. (taicpu(hp1).opcode = A_AND) and
  478. (taicpu(hp1).oper[0]^.typ = top_const) and
  479. (taicpu(hp1).oper[1]^.typ = top_reg) and
  480. (taicpu(hp1).oper[1]^.reg =
  481. taicpu(p).oper[1]^.reg) then
  482. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  483. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  484. begin
  485. if (cs_asm_source in current_settings.globalswitches) then
  486. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  487. taicpu(p).opcode := A_MOV;
  488. taicpu(p).changeopsize(S_L);
  489. { do not use R_SUBWHOLE
  490. as movl %rdx,%eax
  491. is invalid in assembler PM }
  492. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  493. taicpu(hp1).loadConst(
  494. 0, taicpu(hp1).oper[0]^.val and $ffff);
  495. end;
  496. end;
  497. end
  498. else if (taicpu(p).oper[0]^.typ = top_ref) then
  499. begin
  500. if GetNextInstruction(p, hp1) and
  501. (tai(hp1).typ = ait_instruction) and
  502. (taicpu(hp1).opcode = A_AND) and
  503. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  504. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  505. (taicpu(hp1).oper[1]^.reg =
  506. taicpu(p).oper[1]^.reg) then
  507. begin
  508. taicpu(p).opcode := A_MOV;
  509. case taicpu(p).opsize of
  510. S_BL:
  511. begin
  512. if (cs_asm_source in current_settings.globalswitches) then
  513. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  514. taicpu(p).changeopsize(S_L);
  515. taicpu(hp1).loadConst(
  516. 0, taicpu(hp1).oper[0]^.val and $ff);
  517. end;
  518. S_WL:
  519. begin
  520. if (cs_asm_source in current_settings.globalswitches) then
  521. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  522. taicpu(p).changeopsize(S_L);
  523. taicpu(hp1).loadConst(
  524. 0, taicpu(hp1).oper[0]^.val and $ffff);
  525. end;
  526. S_BW:
  527. begin
  528. if (cs_asm_source in current_settings.globalswitches) then
  529. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  530. taicpu(p).changeopsize(S_W);
  531. taicpu(hp1).loadConst(
  532. 0, taicpu(hp1).oper[0]^.val and $ff);
  533. end;
  534. S_BQ:
  535. begin
  536. if (cs_asm_source in current_settings.globalswitches) then
  537. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  538. taicpu(p).changeopsize(S_Q);
  539. taicpu(hp1).loadConst(
  540. 0, taicpu(hp1).oper[0]^.val and $ff);
  541. end;
  542. S_WQ:
  543. begin
  544. if (cs_asm_source in current_settings.globalswitches) then
  545. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  546. taicpu(p).changeopsize(S_Q);
  547. taicpu(hp1).loadConst(
  548. 0, taicpu(hp1).oper[0]^.val and $ffff);
  549. end;
  550. S_LQ:
  551. begin
  552. if (cs_asm_source in current_settings.globalswitches) then
  553. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  554. taicpu(p).changeopsize(S_Q);
  555. taicpu(hp1).loadConst(
  556. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  557. end;
  558. end;
  559. end;
  560. end;
  561. end;
  562. end;
  563. A_VDIVSD,
  564. A_VDIVSS,
  565. A_VSUBSD,
  566. A_VSUBSS,
  567. A_VMULSD,
  568. A_VMULSS,
  569. A_VADDSD,
  570. A_VADDSS:
  571. begin
  572. if GetNextInstruction(p,hp1) and
  573. { we mix single and double opperations here because we assume that the compiler
  574. generates vmovapd only after double operations and vmovaps only after single operations }
  575. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  576. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  577. (taicpu(hp1).oper[1]^.typ=top_reg) then
  578. begin
  579. CopyUsedRegs(TmpUsedRegs);
  580. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  581. If not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  582. begin
  583. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  584. asml.Remove(hp1);
  585. hp1.Free;
  586. end;
  587. end;
  588. end;
  589. end;
  590. end;
  591. end;
  592. end;
  593. begin
  594. casmoptimizer := TCpuAsmOptimizer;
  595. end.