aoptcpu.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the PowerPC optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. Interface
  20. {$i fpcdefs.inc}
  21. uses cpubase, aoptobj, aoptcpub, aopt, aasmtai,aasmdata, aasmcpu;
  22. Type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. { uses the same constructor as TAopObj }
  25. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  26. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  27. private
  28. function cmpi_mfcr_opt(p, next1, next2: taicpu): boolean;
  29. End;
  30. Implementation
  31. uses
  32. cutils, cgbase, cgcpu, cgobj;
  33. const
  34. calculation_target_op0: array[tasmop] of tasmop = (a_none,
  35. a_add, a_add_, a_addo, a_addo_, a_addc, a_addc_, a_addco, a_addco_,
  36. a_adde, a_adde_, a_addeo, a_addeo_, a_addi, a_addic, a_addic_, a_addis,
  37. a_addme, a_addme_, a_addmeo, a_addmeo_, a_addze, a_addze_, a_addzeo,
  38. a_addzeo_, a_and, a_and_, a_andc, a_andc_, a_andi_, a_andis_, a_none,
  39. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  40. a_none, a_none, a_none, a_none, a_none, a_cntlzw, a_cntlzw_, a_none,
  41. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  42. a_none, a_none, a_none, a_none, a_none, a_none, a_divw, a_divw_, a_divwo, a_divwo_,
  43. a_divwu, a_divwu_, a_divwuo, a_divwuo_, a_none, a_none, a_none, a_eqv,
  44. a_eqv_, a_extsb, a_extsb_, a_extsh, a_extsh_, a_fabs, a_fabs_, a_fadd,
  45. a_fadd_, a_fadds, a_fadds_, a_none, a_none, a_none, a_none, a_none,
  46. a_none, a_fdiv, a_fdiv_, a_fdivs, a_fdivs_, a_fmadd, a_fmadd_, a_fmadds,
  47. a_fmadds_, a_none, a_fmsub, a_fmsub_, a_fmsubs, a_fmsubs_, a_fmul, a_fmul_,
  48. a_fmuls, a_fmuls_, a_fnabs, a_fnabs_, a_fneg, a_fneg_, a_fnmadd,
  49. a_fnmadd_, a_fnmadds, a_fnmadds_, a_fnmsub, a_fnmsub_, a_fnmsubs,
  50. a_fnmsubs_, a_fres, a_fres_, a_frsp, a_frsp_, a_frsqrte, a_frsqrte_,
  51. a_none, a_none, a_fsqrt, a_fsqrt_, a_fsqrts, a_fsqrts_, a_fsub, a_fsub_,
  52. a_fsubs, a_fsubs_, a_none, a_none, a_none, a_none, a_none, a_none,
  53. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  54. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  55. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  56. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  57. a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  58. a_none, a_none, a_none, a_none, a_none, a_none, a_mulhw,
  59. a_mulhw_, a_mulhwu, a_mulhwu_, a_mulli, a_mullw, a_mullw_, a_mullwo,
  60. a_mullwo_, a_nand, a_nand_, a_neg, a_neg_, a_nego, a_nego_, a_nor, a_nor_,
  61. a_or, a_or_, a_orc, a_orc_, a_ori, a_oris, a_rfi, a_rlwimi, a_rlwimi_,
  62. a_rlwinm, a_rlwinm_, a_rlwnm, a_rlwnm_, a_none, a_slw, a_slw_, a_sraw, a_sraw_,
  63. a_srawi, a_srawi_,a_srw, a_srw_, a_stb, a_none, a_none, a_none, a_none,
  64. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  65. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  66. a_none, a_none, a_none, a_none, a_none, a_subf, a_subf_, a_subfo,
  67. a_subfo_, a_subfc, a_subfc_, a_subfco, a_subfco_, a_subfe, a_subfe_,
  68. a_subfeo, a_subfeo_, a_subfic, a_subfme, a_subfme_, a_subfmeo, a_subfmeo_,
  69. a_subfze, a_subfze_, a_subfzeo, a_subfzeo_, a_none, a_none, a_none,
  70. a_none, a_none, a_none, a_xor, a_xor_, a_xori, a_xoris,
  71. { simplified mnemonics }
  72. a_subi, a_subis, a_subic, a_subic_, a_sub, a_sub_, a_subo, a_subo_,
  73. a_subc, a_subc_, a_subco, a_subco_, a_none, a_none, a_none, a_none,
  74. a_extlwi, a_extlwi_, a_extrwi, a_extrwi_, a_inslwi, a_inslwi_, a_insrwi,
  75. a_insrwi_, a_rotlwi, a_rotlwi_, a_rotlw, a_rotlw_, a_slwi, a_slwi_,
  76. a_srwi, a_srwi_, a_clrlwi, a_clrlwi_, a_clrrwi, a_clrrwi_, a_clrslwi,
  77. a_clrslwi_, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  78. a_none, a_none {move to special prupose reg}, a_none {move from special purpose reg},
  79. a_none, a_none, a_none, a_none, a_none, a_none, a_not, a_not_, a_none, a_none, a_none,
  80. a_none, a_none, a_none);
  81. function TCpuAsmOptimizer.cmpi_mfcr_opt(p, next1, next2: taicpu): boolean;
  82. var
  83. next3: tai;
  84. inverse: boolean;
  85. begin
  86. result := true;
  87. inverse :=
  88. getnextinstruction(next2,next3) and
  89. (next3.typ = ait_instruction) and
  90. (taicpu(next3).opcode = A_XORI) and
  91. (taicpu(next3).oper[0]^.reg = taicpu(next3).oper[1]^.reg) and
  92. (taicpu(next3).oper[0]^.reg = taicpu(next2).oper[0]^.reg);
  93. case taicpu(next2).oper[2]^.val of
  94. 1:
  95. begin
  96. // less than zero or greater/equal than zero (the xori remains in
  97. // in the latter case). Doesn't make sense for unsigned comparisons.
  98. if (p.opcode = A_CMPWI) then
  99. begin
  100. p.opcode := A_SRWI;
  101. p.ops := 3;
  102. p.loadreg(1,p.oper[0]^.reg);
  103. p.loadreg(0,next1.oper[0]^.reg);
  104. p.loadconst(2,31);
  105. asml.remove(next1);
  106. next1.free;
  107. asml.remove(next2);
  108. next2.free;
  109. end
  110. else
  111. result := false;
  112. end;
  113. {
  114. needs two registers to work with
  115. 2:
  116. begin
  117. // greater or less/equal to zero
  118. end;
  119. }
  120. 3:
  121. begin
  122. // equal/not equal to zero (the xori remains in the latter case;
  123. // there's a more optimal sequence without it, but needs extra
  124. // register)
  125. p.opcode := A_CNTLZW;
  126. p.loadreg(1,p.oper[0]^.reg);
  127. p.loadreg(0,next1.oper[0]^.reg);
  128. next1.ops := 3;
  129. next1.opcode := A_SRWI;
  130. next1.loadreg(1,next1.oper[0]^.reg);
  131. next1.loadconst(2,5);
  132. asml.remove(next2);
  133. next2.free;
  134. end;
  135. else
  136. result := false;
  137. end;
  138. end;
  139. function rlwinm2mask(l1,l2: longint): longint;
  140. begin
  141. // 1 shl 32 = 1 instead of 0 on x86
  142. if (l1 <> 0) then
  143. result := (1 shl (32 - l1) - 1) xor (1 shl (31 - l2) - 1)
  144. else
  145. result := not(1 shl (31 - l2) - 1);
  146. if (l1 > l2) then
  147. result := not(result);
  148. end;
  149. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  150. var
  151. next1, next2: tai;
  152. l1, l2, shlcount: longint;
  153. begin
  154. result := false;
  155. case p.typ of
  156. ait_instruction:
  157. begin
  158. case taicpu(p).opcode of
  159. A_CMPWI,
  160. A_CMPLWI:
  161. begin
  162. if (taicpu(p).oper[1]^.typ = top_const) and
  163. (taicpu(p).oper[1]^.val = 0) and
  164. getnextinstruction(p,next1) and
  165. (next1.typ = ait_instruction) and
  166. (taicpu(next1).opcode = A_MFCR) and
  167. getnextinstruction(next1,next2) and
  168. (taicpu(next2).opcode = A_RLWINM) and
  169. (taicpu(next2).oper[0]^.reg = taicpu(next2).oper[1]^.reg) and
  170. (taicpu(next2).oper[0]^.reg = taicpu(next1).oper[0]^.reg) and
  171. (taicpu(next2).oper[3]^.val = 31) and
  172. (taicpu(next2).oper[4]^.val = 31) and
  173. cmpi_mfcr_opt(taicpu(p),taicpu(next1),taicpu(next2)) then
  174. result := true;
  175. end;
  176. { seems the register allocator doesn't generate superfluous fmr's }
  177. { A_FMR, }
  178. A_MR:
  179. begin
  180. if getnextinstruction(p,next1) and
  181. (next1.typ = ait_instruction) and
  182. (calculation_target_op0[taicpu(next1).opcode] <> a_none) and
  183. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) then
  184. begin
  185. for l1 := 1 to taicpu(next1).ops - 1 do
  186. if (taicpu(next1).oper[l1]^.typ = top_reg) and
  187. (taicpu(next1).oper[l1]^.reg = taicpu(p).oper[0]^.reg) then
  188. taicpu(next1).loadreg(l1,taicpu(p).oper[1]^.reg);
  189. asml.remove(p);
  190. p.free;
  191. p := next1;
  192. result := true;
  193. end;
  194. end;
  195. A_SLWI:
  196. begin
  197. if getnextinstruction(p,next1) and
  198. (next1.typ = ait_instruction) and
  199. (taicpu(next1).opcode = A_RLWINM) and
  200. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  201. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then
  202. begin
  203. { convert slwi to rlwinm and see if the rlwinm }
  204. { optimization can do something with it }
  205. taicpu(p).opcode := A_RLWINM;
  206. taicpu(p).ops := 5;
  207. taicpu(p).loadconst(2,taicpu(p).oper[2]^.val);
  208. taicpu(p).loadconst(3,0);
  209. taicpu(p).loadconst(4,31-taicpu(p).oper[2]^.val);
  210. result := true;
  211. end;
  212. end;
  213. A_SRWI:
  214. begin
  215. if getnextinstruction(p,next1) and
  216. (next1.typ = ait_instruction) and
  217. ((taicpu(next1).opcode = A_SLWI) or
  218. (taicpu(next1).opcode = A_RLWINM)) and
  219. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  220. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then
  221. case taicpu(next1).opcode of
  222. A_SLWI:
  223. begin
  224. taicpu(p).opcode := A_RLWINM;
  225. taicpu(p).ops := 5;
  226. taicpu(p).loadconst(2,taicpu(next1).oper[2]^.val-taicpu(p).oper[2]^.val);
  227. if (taicpu(p).oper[2]^.val < 0) then
  228. begin
  229. taicpu(p).loadconst(3,-taicpu(p).oper[2]^.val);
  230. taicpu(p).loadconst(4,31-taicpu(next1).oper[2]^.val);
  231. inc(taicpu(p).oper[2]^.val,32);
  232. end
  233. else
  234. begin
  235. taicpu(p).loadconst(3,0);
  236. taicpu(p).loadconst(4,31-taicpu(next1).oper[2]^.val);
  237. end;
  238. asml.remove(next1);
  239. next1.free;
  240. result := true;
  241. end;
  242. A_RLWINM:
  243. begin
  244. { convert srwi to rlwinm and see if the rlwinm }
  245. { optimization can do something with it }
  246. taicpu(p).opcode := A_RLWINM;
  247. taicpu(p).ops := 5;
  248. taicpu(p).loadconst(3,taicpu(p).oper[2]^.val);
  249. taicpu(p).loadconst(4,31);
  250. taicpu(p).loadconst(2,(32-taicpu(p).oper[2]^.val) and 31);
  251. result := true;
  252. end;
  253. end;
  254. end;
  255. A_RLWINM:
  256. begin
  257. if getnextinstruction(p,next1) and
  258. (next1.typ = ait_instruction) and
  259. ((taicpu(next1).opcode = A_RLWINM) or
  260. (taicpu(next1).opcode = A_SRWI) or
  261. (taicpu(next1).opcode = A_SLWI)) and
  262. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  263. // both source and target of next1 must equal target of p
  264. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then
  265. begin
  266. case taicpu(next1).opcode of
  267. A_RLWINM:
  268. begin
  269. shlcount := taicpu(next1).oper[2]^.val;
  270. l2 := rlwinm2mask(taicpu(next1).oper[3]^.val,taicpu(next1).oper[4]^.val);
  271. end;
  272. A_SLWI:
  273. begin
  274. shlcount := taicpu(next1).oper[2]^.val;
  275. l2 := (-1) shl shlcount;
  276. end;
  277. A_SRWI:
  278. begin
  279. shlcount := 32-taicpu(next1).oper[2]^.val;
  280. l2 := (-1) shr taicpu(next1).oper[2]^.val;
  281. end;
  282. end;
  283. l1 := rlwinm2mask((taicpu(p).oper[3]^.val-shlcount) and 31,(taicpu(p).oper[4]^.val-shlcount) and 31);
  284. l1 := l1 and l2;
  285. case l1 of
  286. -1:
  287. begin
  288. taicpu(p).oper[2]^.val := (taicpu(p).oper[2]^.val + shlcount) and 31;
  289. asml.remove(next1);
  290. next1.free;
  291. if (taicpu(p).oper[2]^.val = 0) then
  292. begin
  293. next1 := tai(p.next);
  294. asml.remove(p);
  295. p.free;
  296. p := next1;
  297. result := true;
  298. end;
  299. end;
  300. 0:
  301. begin
  302. // masks have no bits in common
  303. taicpu(p).opcode := A_LI;
  304. taicpu(p).loadconst(1,0);
  305. taicpu(p).clearop(2);
  306. taicpu(p).clearop(3);
  307. taicpu(p).clearop(4);
  308. taicpu(p).ops := 2;
  309. taicpu(p).opercnt := 2;
  310. asml.remove(next1);
  311. next1.free;
  312. result := true;
  313. end
  314. else if tcgppc(cg).get_rlwi_const(l1,l1,l2) then
  315. begin
  316. taicpu(p).oper[2]^.val := (taicpu(p).oper[2]^.val + shlcount) and 31;
  317. taicpu(p).oper[3]^.val := l1;
  318. taicpu(p).oper[4]^.val := l2;
  319. asml.remove(next1);
  320. next1.free;
  321. result := true;
  322. end;
  323. end;
  324. end;
  325. end;
  326. end;
  327. end;
  328. end;
  329. end;
  330. const
  331. modifyflags: array[tasmop] of tasmop =
  332. (a_none, a_add_, a_add_, a_addo_, a_addo_, a_addc_, a_addc_, a_addco_, a_addco_,
  333. a_adde_, a_adde_, a_addeo_, a_addeo_, {a_addi could be addic_ if sure doesn't disturb carry} a_none, a_addic_, a_addic_, a_none,
  334. a_addme_, a_addme_, a_addmeo_, a_addmeo_, a_addze_, a_addze_, a_addzeo_,
  335. a_addzeo_, a_and_, a_and_, a_andc_, a_andc_, a_andi_, a_andis_, a_none,
  336. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  337. a_none, a_none, a_none, a_none, a_none, a_cntlzw_, a_cntlzw_, a_none,
  338. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  339. a_none, a_none, a_none, a_none, a_none, a_none, a_divw_, a_divw_, a_divwo_, a_divwo_,
  340. a_divwu_, a_divwu_, a_divwuo_, a_divwuo_, a_none, a_none, a_none, a_eqv_,
  341. a_eqv_, a_extsb_, a_extsb_, a_extsh_, a_extsh_, a_none, a_none, a_none,
  342. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  343. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  344. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  345. a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  346. a_none, a_none, a_none, a_none, a_none, a_none,
  347. a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  348. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  349. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  350. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  351. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  352. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  353. a_none, a_none, a_none, a_mffs, a_mffs_, a_mfmsr, a_mfspr, a_mfsr,
  354. a_mfsrin, a_mftb, a_mtcrf, a_none, a_none, a_none, a_none,
  355. a_none, a_none, a_none, a_none, a_none, a_none, a_mulhw_,
  356. a_mulhw_, a_mulhwu_, a_mulhwu_, a_none, a_mullw_, a_mullw_, a_mullwo_,
  357. a_mullwo_, a_nand_, a_nand_, a_neg_, a_neg_, a_nego_, a_nego_, a_nor_, a_nor_,
  358. a_or_, a_or_, a_orc_, a_orc_, a_none, a_none, a_none, a_rlwimi_, a_rlwimi_,
  359. a_rlwinm_, a_rlwinm_, a_rlwnm_, a_rlwnm_, a_none, a_slw_, a_slw_, a_sraw_, a_sraw_,
  360. a_srawi_, a_srawi_,a_srw_, a_srw_, a_none, a_none, a_none, a_none, a_none,
  361. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  362. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  363. a_none, a_none, a_none, a_none, a_none, a_subf_, a_subf_, a_subfo_,
  364. a_subfo_, a_subfc_, a_subfc_, a_subfco_, a_subfco_, a_subfe_, a_subfe_,
  365. a_subfeo_, a_subfeo_, a_none, a_subfme_, a_subfme_, a_subfmeo_, a_subfmeo_,
  366. a_subfze_, a_subfze_, a_subfzeo_, a_subfzeo_, a_none, a_none, a_none,
  367. a_none, a_none, a_none, a_xor_, a_xor_, a_none, a_none,
  368. { simplified mnemonics }
  369. a_none, a_none, a_subic_, a_subic_, a_sub_, a_sub_, a_subo_, a_subo_,
  370. a_subc_, a_subc_, a_subco_, a_subco_, a_none, a_none, a_none, a_none,
  371. a_extlwi_, a_extlwi_, a_extrwi_, a_extrwi_, a_inslwi_, a_inslwi_, a_insrwi_,
  372. a_insrwi_, a_rotlwi_, a_rotlwi_, a_rotlw_, a_rotlw_, a_slwi_, a_slwi_,
  373. a_srwi_, a_srwi_, a_clrlwi_, a_clrlwi_, a_clrrwi_, a_clrrwi_, a_clrslwi_,
  374. a_clrslwi_, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  375. a_none, a_none {move to special prupose reg}, a_none {move from special purpose reg},
  376. a_none, a_none, a_none, a_none, a_mr_, a_mr_, a_not_, a_not_, a_none, a_none, a_none,
  377. a_none, a_none, a_none);
  378. function changetomodifyflags(p: taicpu): boolean;
  379. begin
  380. result := false;
  381. if (modifyflags[p.opcode] <> a_none) then
  382. begin
  383. p.opcode := modifyflags[p.opcode];
  384. result := true;
  385. end;
  386. end;
  387. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  388. var
  389. next1: tai;
  390. begin
  391. result := false;
  392. case p.typ of
  393. ait_instruction:
  394. begin
  395. case taicpu(p).opcode of
  396. A_RLWINM_:
  397. begin
  398. // rlwinm_ is cracked on the G5, andi_/andis_ aren't
  399. if (taicpu(p).oper[2]^.val = 0) then
  400. if (taicpu(p).oper[3]^.val < 16) and
  401. (taicpu(p).oper[4]^.val < 16) then
  402. begin
  403. taicpu(p).opcode := A_ANDIS_;
  404. taicpu(p).oper[2]^.val := word(
  405. ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) xor
  406. ((1 shl (15-taicpu(p).oper[4]^.val)) - 1));
  407. taicpu(p).clearop(3);
  408. taicpu(p).clearop(4);
  409. taicpu(p).ops := 3;
  410. taicpu(p).opercnt := 2;
  411. end
  412. else if (taicpu(p).oper[3]^.val >= 16) and
  413. (taicpu(p).oper[4]^.val >= 16) then
  414. begin
  415. taicpu(p).opcode := A_ANDI_;
  416. taicpu(p).oper[2]^.val := word(rlwinm2mask(taicpu(p).oper[3]^.val,taicpu(p).oper[4]^.val));
  417. taicpu(p).clearop(3);
  418. taicpu(p).clearop(4);
  419. taicpu(p).ops := 3;
  420. taicpu(p).opercnt := 2;
  421. end;
  422. end;
  423. end;
  424. // change "integer operation with destination reg" followed by a
  425. // comparison to zero of that reg, with a variant of that integer
  426. // operation which sets the flags (if it exists)
  427. if not(result) and
  428. (taicpu(p).ops >= 2) and
  429. (taicpu(p).oper[0]^.typ = top_reg) and
  430. (taicpu(p).oper[1]^.typ = top_reg) and
  431. getnextinstruction(p,next1) and
  432. (next1.typ = ait_instruction) and
  433. (taicpu(next1).opcode = A_CMPWI) and
  434. // make sure it the result goes to cr0
  435. (((taicpu(next1).ops = 2) and
  436. (taicpu(next1).oper[1]^.val = 0) and
  437. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg)) or
  438. ((taicpu(next1).ops = 3) and
  439. (taicpu(next1).oper[2]^.val = 0) and
  440. (taicpu(next1).oper[0]^.typ = top_reg) and
  441. (getsupreg(taicpu(next1).oper[0]^.reg) = RS_CR0) and
  442. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg))) and
  443. changetomodifyflags(taicpu(p)) then
  444. begin
  445. asml.remove(next1);
  446. next1.free;
  447. result := true;
  448. end;
  449. end;
  450. end;
  451. end;
  452. begin
  453. casmoptimizer:=TCpuAsmOptimizer;
  454. End.