aoptcpu.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. {
  2. Copyright (c) 1998-2004 by Jonas Maebe
  3. This unit calls the optimization procedures to optimize the assembler
  4. code for sparc
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit aoptcpu;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses cpubase, aasmtai, aopt, aoptcpub;
  22. type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  25. end;
  26. implementation
  27. uses
  28. globtype, globals,
  29. cutils,
  30. verbose,
  31. cgbase, cgutils,
  32. aoptobj,
  33. aasmbase, aasmdata, aasmcpu,
  34. itcpugas;
  35. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  36. begin
  37. isFoldableArithOp := False;
  38. case hp1.opcode of
  39. A_ADD, A_SUB, A_OR, A_XOR, A_AND, A_SHL, A_SHR, A_SAR:
  40. isFoldableArithOp :=
  41. (taicpu(hp1).oper[1]^.typ = top_reg) and
  42. (taicpu(hp1).oper[1]^.reg = reg) and
  43. ((taicpu(hp1).oper[0]^.typ = top_const) or
  44. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  45. (taicpu(hp1).oper[0]^.reg<>reg)));
  46. A_INC, A_DEC:
  47. isFoldableArithOp :=
  48. (taicpu(hp1).oper[0]^.typ = top_reg) and
  49. (taicpu(hp1).oper[0]^.reg = reg);
  50. end;
  51. end;
  52. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  53. begin
  54. result :=
  55. (instr.typ = ait_instruction) and
  56. (taicpu(instr).opcode = op) and
  57. ((opsize = []) or (taicpu(instr).opsize in opsize));
  58. end;
  59. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  60. begin
  61. result :=
  62. (instr.typ = ait_instruction) and
  63. ((taicpu(instr).opcode = op1) or
  64. (taicpu(instr).opcode = op2)
  65. ) and
  66. ((opsize = []) or (taicpu(instr).opsize in opsize));
  67. end;
  68. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  69. begin
  70. result :=
  71. (instr.typ = ait_instruction) and
  72. ((taicpu(instr).opcode = op1) or
  73. (taicpu(instr).opcode = op2) or
  74. (taicpu(instr).opcode = op3)
  75. ) and
  76. ((opsize = []) or (taicpu(instr).opsize in opsize));
  77. end;
  78. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  79. begin
  80. result := (oper.typ = top_reg) and (oper.reg = reg);
  81. end;
  82. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  83. begin
  84. result := (oper.typ = top_const) and (oper.val = a);
  85. end;
  86. function refsequal(const r1, r2: treference): boolean;
  87. begin
  88. refsequal :=
  89. (r1.offset = r2.offset) and
  90. (r1.segment = r2.segment) and (r1.base = r2.base) and
  91. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  92. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  93. (r1.relsymbol = r2.relsymbol);
  94. end;
  95. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  96. begin
  97. result := oper1.typ = oper2.typ;
  98. if result then
  99. case oper1.typ of
  100. top_const:
  101. Result:=oper1.val = oper2.val;
  102. top_reg:
  103. Result:=oper1.reg = oper2.reg;
  104. top_ref:
  105. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  106. else
  107. internalerror(2013102801);
  108. end
  109. end;
  110. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  111. begin
  112. Result:=(ref.offset=0) and
  113. (ref.scalefactor in [0,1]) and
  114. (ref.segment=NR_NO) and
  115. (ref.symbol=nil) and
  116. (ref.relsymbol=nil) and
  117. ((base=NR_INVALID) or
  118. (ref.base=base)) and
  119. ((index=NR_INVALID) or
  120. (ref.index=index));
  121. end;
  122. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  123. var
  124. next1: tai;
  125. hp1, hp2: tai;
  126. GetNextIntruction_p : boolean;
  127. TmpUsedRegs : TAllUsedRegs;
  128. begin
  129. Result := False;
  130. case p.typ of
  131. ait_instruction:
  132. begin
  133. case taicpu(p).opcode of
  134. A_AND:
  135. begin
  136. if (taicpu(p).oper[0]^.typ = top_const) and
  137. (taicpu(p).oper[1]^.typ = top_reg) and
  138. GetNextInstruction(p, hp1) and
  139. (tai(hp1).typ = ait_instruction) and
  140. (taicpu(hp1).opcode = A_AND) and
  141. (taicpu(hp1).oper[0]^.typ = top_const) and
  142. (taicpu(hp1).oper[1]^.typ = top_reg) and
  143. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(
  144. taicpu(hp1).oper[1]^.reg)) and
  145. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(
  146. taicpu(hp1).oper[1]^.reg)) and
  147. (abs(taicpu(p).oper[0]^.val and
  148. taicpu(hp1).oper[0]^.val)<$80000000) then
  149. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  150. begin
  151. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and
  152. taicpu(hp1).oper[0]^.val);
  153. if (cs_asm_source in current_settings.globalswitches) then
  154. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var1')),p);
  155. asml.remove(p);
  156. p.Free;
  157. p:=hp1;
  158. end
  159. else if (taicpu(p).oper[0]^.typ = top_const) and
  160. (taicpu(p).oper[1]^.typ = top_reg) and
  161. GetNextInstruction(p, hp1) and
  162. MatchInstruction(hp1,A_MOVZX,[]) and
  163. (taicpu(hp1).oper[0]^.typ = top_reg) and
  164. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  165. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  166. (((taicpu(p).opsize=S_W) and
  167. (taicpu(hp1).opsize=S_BW)) or
  168. ((taicpu(p).opsize=S_L) and
  169. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  170. ((taicpu(p).opsize=S_Q) and
  171. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  172. ) then
  173. begin
  174. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  175. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)) or
  176. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  177. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val)) or
  178. (((taicpu(hp1).opsize)=S_LQ) and
  179. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  180. ) then
  181. begin
  182. if (cs_asm_source in current_settings.globalswitches) then
  183. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovzToAnd')),p);
  184. asml.remove(hp1);
  185. hp1.free;
  186. end;
  187. end
  188. else if (taicpu(p).oper[0]^.typ = top_const) and
  189. (taicpu(p).oper[1]^.typ = top_reg) and
  190. GetNextInstruction(p, hp1) and
  191. MatchInstruction(hp1,A_MOVSX,A_MOVSXD,[]) and
  192. (taicpu(hp1).oper[0]^.typ = top_reg) and
  193. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  194. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  195. (((taicpu(p).opsize=S_W) and
  196. (taicpu(hp1).opsize=S_BW)) or
  197. ((taicpu(p).opsize=S_L) and
  198. (taicpu(hp1).opsize in [S_WL,S_BL])) or
  199. ((taicpu(p).opsize=S_Q) and
  200. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  201. ) then
  202. begin
  203. if (((taicpu(hp1).opsize) in [S_BW,S_BL,S_BQ]) and
  204. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)) or
  205. (((taicpu(hp1).opsize) in [S_WL,S_WQ]) and
  206. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val)) or
  207. (((taicpu(hp1).opsize)=S_LQ) and
  208. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  209. ) then
  210. begin
  211. if (cs_asm_source in current_settings.globalswitches) then
  212. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,AndMovsxToAnd')),p);
  213. asml.remove(hp1);
  214. hp1.free;
  215. end;
  216. end;
  217. (* else
  218. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  219. jump, but only if it's a conditional jump (PFV) }
  220. if (taicpu(p).oper[1]^.typ = top_reg) and
  221. GetNextInstruction(p, hp1) and
  222. (hp1.typ = ait_instruction) and
  223. (taicpu(hp1).is_jmp) and
  224. (taicpu(hp1).opcode<>A_JMP) and
  225. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  226. taicpu(p).opcode := A_TEST;*)
  227. end;
  228. A_MOV:
  229. { removes superfluous And's after mov's }
  230. begin
  231. if not(cs_opt_level3 in current_settings.optimizerswitches) then
  232. exit;
  233. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  234. if (taicpu(p).oper[1]^.typ = top_reg) and
  235. GetNextIntruction_p and
  236. (tai(hp1).typ = ait_instruction) and
  237. (taicpu(hp1).opcode = A_AND) and
  238. (taicpu(hp1).oper[0]^.typ = top_const) and
  239. (taicpu(hp1).oper[1]^.typ = top_reg) and
  240. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  241. case taicpu(p).opsize Of
  242. S_L:
  243. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  244. begin
  245. if (cs_asm_source in current_settings.globalswitches) then
  246. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2a')),p);
  247. asml.remove(hp1);
  248. hp1.free;
  249. end;
  250. end
  251. { Next instruction is also a MOV ? }
  252. else if GetNextIntruction_p and
  253. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  254. begin
  255. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  256. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  257. {mov reg1, mem1 or mov mem1, reg1
  258. mov mem2, reg2 mov reg2, mem2}
  259. begin
  260. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  261. {mov reg1, mem1 or mov mem1, reg1
  262. mov mem2, reg1 mov reg2, mem1}
  263. begin
  264. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  265. { Removes the second statement from
  266. mov reg1, mem1/reg2
  267. mov mem1/reg2, reg1 }
  268. begin
  269. { if (taicpu(p).oper[0]^.typ = top_reg) then
  270. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs); }
  271. if (cs_asm_source in current_settings.globalswitches) then
  272. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2Mov1')),p);
  273. asml.remove(hp1);
  274. hp1.free;
  275. end;
  276. end
  277. else if (taicpu(p).oper[1]^.typ=top_ref) and
  278. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  279. begin
  280. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  281. if (cs_asm_source in current_settings.globalswitches) then
  282. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,MovMov2MovMov1')),p);
  283. end;
  284. end
  285. end
  286. else if (taicpu(p).oper[1]^.typ = top_reg) and
  287. GetNextIntruction_p and
  288. (hp1.typ = ait_instruction) and
  289. GetNextInstruction(hp1, hp2) and
  290. (hp2.typ = ait_instruction) and
  291. (taicpu(hp2).opcode = A_MOV) and
  292. (taicpu(hp2).oper[0]^.typ = top_reg) and
  293. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  294. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  295. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  296. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  297. ) then
  298. { change movsX/movzX reg/ref, reg2 }
  299. { add/sub/or/... reg3/$const, reg2 }
  300. { mov reg2 reg/ref }
  301. { to add/sub/or/... reg3/$const, reg/ref }
  302. begin
  303. CopyUsedRegs(TmpUsedRegs);
  304. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  305. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  306. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  307. begin
  308. { by example:
  309. movswl %si,%eax movswl %si,%eax p
  310. decl %eax addl %edx,%eax hp1
  311. movw %ax,%si movw %ax,%si hp2
  312. ->
  313. movswl %si,%eax movswl %si,%eax p
  314. decw %eax addw %edx,%eax hp1
  315. movw %ax,%si movw %ax,%si hp2
  316. }
  317. if (cs_asm_source in current_settings.globalswitches) then
  318. begin
  319. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  320. asml.insertbefore(tai_comment.create(strpnew('P='+std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize])),p);
  321. asml.insertbefore(tai_comment.create(strpnew('HP1='+std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize])),p);
  322. asml.insertbefore(tai_comment.create(strpnew('HP2='+std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize])),p);
  323. end;
  324. taicpu(hp1).changeopsize(taicpu(p).opsize);
  325. {
  326. ->
  327. movswl %si,%eax movswl %si,%eax p
  328. decw %si addw %dx,%si hp1
  329. movw %ax,%si movw %ax,%si hp2
  330. }
  331. case taicpu(hp1).ops of
  332. 1:
  333. begin
  334. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  335. if taicpu(hp1).oper[0]^.typ=top_reg then
  336. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  337. end;
  338. 2:
  339. begin
  340. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  341. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  342. (taicpu(hp1).opcode<>A_SHL) and
  343. (taicpu(hp1).opcode<>A_SHR) and
  344. (taicpu(hp1).opcode<>A_SAR) then
  345. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(p).oper[1]^.reg));
  346. end;
  347. else
  348. internalerror(2008042701);
  349. end;
  350. {
  351. ->
  352. decw %si addw %dx,%si p
  353. }
  354. if (cs_asm_source in current_settings.globalswitches) then
  355. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var2')),p);
  356. asml.remove(p);
  357. asml.remove(hp2);
  358. p.Free;
  359. hp2.Free;
  360. p := hp1;
  361. end;
  362. ReleaseUsedRegs(TmpUsedRegs);
  363. end
  364. end;
  365. A_MOVSX,
  366. A_MOVZX:
  367. begin
  368. if (taicpu(p).oper[1]^.typ = top_reg) and
  369. GetNextInstruction(p, hp1) and
  370. (hp1.typ = ait_instruction) and
  371. IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) and
  372. GetNextInstruction(hp1, hp2) and
  373. (hp2.typ = ait_instruction) and
  374. (taicpu(hp2).opcode = A_MOV) and
  375. (taicpu(hp2).oper[0]^.typ = top_reg) and
  376. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) then
  377. { change movsX/movzX reg/ref, reg2 }
  378. { add/sub/or/... reg3/$const, reg2 }
  379. { mov reg2 reg/ref }
  380. { to add/sub/or/... reg3/$const, reg/ref }
  381. begin
  382. { by example:
  383. movswl %si,%eax movswl %si,%eax p
  384. decl %eax addl %edx,%eax hp1
  385. movw %ax,%si movw %ax,%si hp2
  386. ->
  387. movswl %si,%eax movswl %si,%eax p
  388. decw %eax addw %edx,%eax hp1
  389. movw %ax,%si movw %ax,%si hp2
  390. }
  391. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  392. {
  393. ->
  394. movswl %si,%eax movswl %si,%eax p
  395. decw %si addw %dx,%si hp1
  396. movw %ax,%si movw %ax,%si hp2
  397. }
  398. case taicpu(hp1).ops of
  399. 1:
  400. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  401. 2:
  402. begin
  403. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  404. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  405. setsubreg(taicpu(hp1).oper[0]^.reg,
  406. getsubreg(taicpu(hp2).oper[0]^.reg));
  407. end;
  408. else
  409. internalerror(2008042701);
  410. end;
  411. {
  412. ->
  413. decw %si addw %dx,%si p
  414. }
  415. if (cs_asm_source in current_settings.globalswitches) then
  416. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var3')),p);
  417. asml.remove(p);
  418. asml.remove(hp2);
  419. p.Free;
  420. hp2.Free;
  421. p := hp1;
  422. end
  423. { removes superfluous And's after movzx's }
  424. else if taicpu(p).opcode = A_MOVZX then
  425. begin
  426. if (taicpu(p).oper[1]^.typ = top_reg) and
  427. GetNextInstruction(p, hp1) and
  428. (tai(hp1).typ = ait_instruction) and
  429. (taicpu(hp1).opcode = A_AND) and
  430. (taicpu(hp1).oper[0]^.typ = top_const) and
  431. (taicpu(hp1).oper[1]^.typ = top_reg) and
  432. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  433. begin
  434. case taicpu(p).opsize of
  435. S_BL, S_BW, S_BQ:
  436. if (taicpu(hp1).oper[0]^.val = $ff) then
  437. begin
  438. if (cs_asm_source in current_settings.globalswitches) then
  439. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var4')),p);
  440. asml.remove(hp1);
  441. hp1.Free;
  442. end;
  443. S_WL, S_WQ:
  444. if (taicpu(hp1).oper[0]^.val = $ffff) then
  445. begin
  446. if (cs_asm_source in current_settings.globalswitches) then
  447. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var5')),p);
  448. asml.remove(hp1);
  449. hp1.Free;
  450. end;
  451. S_LQ:
  452. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  453. begin
  454. if (cs_asm_source in current_settings.globalswitches) then
  455. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  456. asml.remove(hp1);
  457. hp1.Free;
  458. end;
  459. end;
  460. end;
  461. { changes some movzx constructs to faster synonims (all examples
  462. are given with eax/ax, but are also valid for other registers)}
  463. if (taicpu(p).oper[1]^.typ = top_reg) then
  464. if (taicpu(p).oper[0]^.typ = top_reg) then
  465. case taicpu(p).opsize of
  466. S_BW:
  467. begin
  468. if (getsupreg(taicpu(p).oper[0]^.reg) =
  469. getsupreg(taicpu(p).oper[1]^.reg)) and not
  470. (cs_opt_size in current_settings.optimizerswitches) then
  471. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  472. begin
  473. taicpu(p).opcode := A_AND;
  474. taicpu(p).changeopsize(S_W);
  475. taicpu(p).loadConst(0, $ff);
  476. if (cs_asm_source in current_settings.globalswitches) then
  477. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var7')),p);
  478. end
  479. else if GetNextInstruction(p, hp1) and
  480. (tai(hp1).typ = ait_instruction) and
  481. (taicpu(hp1).opcode = A_AND) and
  482. (taicpu(hp1).oper[0]^.typ = top_const) and
  483. (taicpu(hp1).oper[1]^.typ = top_reg) and
  484. (taicpu(hp1).oper[1]^.reg =
  485. taicpu(p).oper[1]^.reg) then
  486. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  487. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  488. begin
  489. if (cs_asm_source in current_settings.globalswitches) then
  490. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var8')),p);
  491. taicpu(p).opcode := A_MOV;
  492. taicpu(p).changeopsize(S_W);
  493. setsubreg(taicpu(p).oper[0]^.reg, R_SUBW);
  494. taicpu(hp1).loadConst(
  495. 0, taicpu(hp1).oper[0]^.val and $ff);
  496. end;
  497. end;
  498. S_BL:
  499. begin
  500. if (getsupreg(taicpu(p).oper[0]^.reg) =
  501. getsupreg(taicpu(p).oper[1]^.reg)) and not
  502. (cs_opt_size in current_settings.optimizerswitches) then
  503. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  504. begin
  505. if (cs_asm_source in current_settings.globalswitches) then
  506. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var9')),p);
  507. taicpu(p).opcode := A_AND;
  508. taicpu(p).changeopsize(S_L);
  509. taicpu(p).loadConst(0, $ff);
  510. end
  511. else if GetNextInstruction(p, hp1) and
  512. (tai(hp1).typ = ait_instruction) and
  513. (taicpu(hp1).opcode = A_AND) and
  514. (taicpu(hp1).oper[0]^.typ = top_const) and
  515. (taicpu(hp1).oper[1]^.typ = top_reg) and
  516. (taicpu(hp1).oper[1]^.reg =
  517. taicpu(p).oper[1]^.reg) then
  518. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  519. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  520. begin
  521. if (cs_asm_source in current_settings.globalswitches) then
  522. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var10')),p);
  523. taicpu(p).opcode := A_MOV;
  524. taicpu(p).changeopsize(S_L);
  525. { do not use R_SUBWHOLE
  526. as movl %rdx,%eax
  527. is invalid in assembler PM }
  528. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  529. taicpu(hp1).loadConst(
  530. 0, taicpu(hp1).oper[0]^.val and $ff);
  531. end;
  532. end;
  533. S_WL:
  534. begin
  535. if (getsupreg(taicpu(p).oper[0]^.reg) =
  536. getsupreg(taicpu(p).oper[1]^.reg)) and not
  537. (cs_opt_size in current_settings.optimizerswitches) then
  538. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  539. begin
  540. if (cs_asm_source in current_settings.globalswitches) then
  541. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var11')),p);
  542. taicpu(p).opcode := A_AND;
  543. taicpu(p).changeopsize(S_L);
  544. taicpu(p).loadConst(0, $ffff);
  545. end
  546. else if GetNextInstruction(p, hp1) and
  547. (tai(hp1).typ = ait_instruction) and
  548. (taicpu(hp1).opcode = A_AND) and
  549. (taicpu(hp1).oper[0]^.typ = top_const) and
  550. (taicpu(hp1).oper[1]^.typ = top_reg) and
  551. (taicpu(hp1).oper[1]^.reg =
  552. taicpu(p).oper[1]^.reg) then
  553. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  554. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  555. begin
  556. if (cs_asm_source in current_settings.globalswitches) then
  557. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var12')),p);
  558. taicpu(p).opcode := A_MOV;
  559. taicpu(p).changeopsize(S_L);
  560. { do not use R_SUBWHOLE
  561. as movl %rdx,%eax
  562. is invalid in assembler PM }
  563. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  564. taicpu(hp1).loadConst(
  565. 0, taicpu(hp1).oper[0]^.val and $ffff);
  566. end;
  567. end;
  568. end
  569. else if (taicpu(p).oper[0]^.typ = top_ref) then
  570. begin
  571. if GetNextInstruction(p, hp1) and
  572. (tai(hp1).typ = ait_instruction) and
  573. (taicpu(hp1).opcode = A_AND) and
  574. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  575. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  576. (taicpu(hp1).oper[1]^.reg =
  577. taicpu(p).oper[1]^.reg) then
  578. begin
  579. taicpu(p).opcode := A_MOV;
  580. case taicpu(p).opsize of
  581. S_BL:
  582. begin
  583. if (cs_asm_source in current_settings.globalswitches) then
  584. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var13')),p);
  585. taicpu(p).changeopsize(S_L);
  586. taicpu(hp1).loadConst(
  587. 0, taicpu(hp1).oper[0]^.val and $ff);
  588. end;
  589. S_WL:
  590. begin
  591. if (cs_asm_source in current_settings.globalswitches) then
  592. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var14')),p);
  593. taicpu(p).changeopsize(S_L);
  594. taicpu(hp1).loadConst(
  595. 0, taicpu(hp1).oper[0]^.val and $ffff);
  596. end;
  597. S_BW:
  598. begin
  599. if (cs_asm_source in current_settings.globalswitches) then
  600. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var15')),p);
  601. taicpu(p).changeopsize(S_W);
  602. taicpu(hp1).loadConst(
  603. 0, taicpu(hp1).oper[0]^.val and $ff);
  604. end;
  605. S_BQ:
  606. begin
  607. if (cs_asm_source in current_settings.globalswitches) then
  608. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var16')),p);
  609. taicpu(p).changeopsize(S_Q);
  610. taicpu(hp1).loadConst(
  611. 0, taicpu(hp1).oper[0]^.val and $ff);
  612. end;
  613. S_WQ:
  614. begin
  615. if (cs_asm_source in current_settings.globalswitches) then
  616. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var17')),p);
  617. taicpu(p).changeopsize(S_Q);
  618. taicpu(hp1).loadConst(
  619. 0, taicpu(hp1).oper[0]^.val and $ffff);
  620. end;
  621. S_LQ:
  622. begin
  623. if (cs_asm_source in current_settings.globalswitches) then
  624. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var18')),p);
  625. taicpu(p).changeopsize(S_Q);
  626. taicpu(hp1).loadConst(
  627. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  628. end;
  629. end;
  630. end;
  631. end;
  632. end;
  633. end;
  634. A_VDIVSD,
  635. A_VDIVSS,
  636. A_VSUBSD,
  637. A_VSUBSS,
  638. A_VMULSD,
  639. A_VMULSS,
  640. A_VADDSD,
  641. A_VADDSS:
  642. begin
  643. if GetNextInstruction(p,hp1) and
  644. { we mix single and double opperations here because we assume that the compiler
  645. generates vmovapd only after double operations and vmovaps only after single operations }
  646. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  647. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  648. (taicpu(hp1).oper[1]^.typ=top_reg) then
  649. begin
  650. CopyUsedRegs(TmpUsedRegs);
  651. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  652. If not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)) then
  653. begin
  654. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  655. asml.Remove(hp1);
  656. hp1.Free;
  657. end;
  658. end;
  659. end;
  660. end;
  661. end;
  662. end;
  663. end;
  664. begin
  665. casmoptimizer := TCpuAsmOptimizer;
  666. end.