aoptcpu.pas 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. {
  2. Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
  3. Development Team
  4. This unit implements the PowerPC optimizer object
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit aoptcpu;
  19. Interface
  20. {$i fpcdefs.inc}
  21. uses cpubase, aoptobj, aoptcpub, aopt, aasmtai;
  22. Type
  23. TCpuAsmOptimizer = class(TAsmOptimizer)
  24. { uses the same constructor as TAopObj }
  25. function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
  26. function PostPeepHoleOptsCpu(var p: tai): boolean; override;
  27. End;
  28. Implementation
  29. uses
  30. cutils, aasmcpu, cgbase;
  31. function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
  32. var
  33. next1, next2: tai;
  34. l1, l2: longint;
  35. begin
  36. result := false;
  37. case p.typ of
  38. ait_instruction:
  39. begin
  40. case taicpu(p).opcode of
  41. A_SRWI:
  42. begin
  43. if getnextinstruction(p,next1) and
  44. (next1.typ = ait_instruction) and
  45. ((taicpu(next1).opcode = A_SLWI) or
  46. (taicpu(next1).opcode = A_RLWINM)) and
  47. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  48. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) then
  49. case taicpu(next1).opcode of
  50. A_SLWI:
  51. begin
  52. taicpu(p).opcode := A_RLWINM;
  53. taicpu(p).ops := 5;
  54. taicpu(p).loadconst(2,taicpu(next1).oper[2]^.val-taicpu(p).oper[2]^.val);
  55. if (taicpu(p).oper[2]^.val < 0) then
  56. begin
  57. taicpu(p).loadconst(3,-taicpu(p).oper[2]^.val);
  58. taicpu(p).loadconst(4,31-taicpu(next1).oper[2]^.val);
  59. inc(taicpu(p).oper[2]^.val,32);
  60. end
  61. else
  62. begin
  63. taicpu(p).loadconst(3,0);
  64. taicpu(p).loadconst(4,31-taicpu(next1).oper[2]^.val);
  65. end;
  66. asml.remove(next1);
  67. next1.free;
  68. result := true;
  69. end;
  70. A_RLWINM:
  71. begin
  72. if (taicpu(next1).oper[2]^.val = 0) then
  73. begin
  74. { convert srwi to rlwinm and see if the rlwinm }
  75. { optimization can do something with it }
  76. taicpu(p).opcode := A_RLWINM;
  77. taicpu(p).ops := 5;
  78. taicpu(p).loadconst(3,taicpu(p).oper[2]^.val);
  79. taicpu(p).loadconst(4,31);
  80. taicpu(p).loadconst(2,(32-taicpu(p).oper[2]^.val) and 31);
  81. result := true;
  82. end;
  83. end;
  84. end;
  85. end;
  86. A_RLWINM:
  87. begin
  88. if getnextinstruction(p,next1) and
  89. (next1.typ = ait_instruction) and
  90. (taicpu(next1).opcode = A_RLWINM) and
  91. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg) and
  92. // both source and target of next1 must equal target of p
  93. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
  94. (taicpu(next1).oper[2]^.val = 0) then
  95. begin
  96. l1 := taicpu(p).oper[4]^.val;
  97. if (l1 < taicpu(p).oper[3]^.val) then
  98. inc(l1,32);
  99. l2 := taicpu(next1).oper[4]^.val;
  100. if (l2 < taicpu(next1).oper[3]^.val) then
  101. inc(l2,32);
  102. if (taicpu(p).oper[3]^.val > l2) or
  103. (taicpu(next1).oper[3]^.val > l1) then
  104. begin
  105. // masks have no bits in common
  106. taicpu(p).opcode := A_LI;
  107. taicpu(p).loadconst(1,0);
  108. taicpu(p).clearop(2);
  109. taicpu(p).clearop(3);
  110. taicpu(p).clearop(4);
  111. taicpu(p).ops := 2;
  112. taicpu(p).opercnt := 2;
  113. asml.remove(next1);
  114. next1.free;
  115. end
  116. else
  117. // some of the cases with l1>32 or l2>32 can be
  118. // optimized, but others can't (like 19,17 and 25,23)
  119. if (l1 < 32) and
  120. (l2 < 32) then
  121. begin
  122. taicpu(p).oper[3]^.val := max(taicpu(p).oper[3]^.val,taicpu(next1).oper[3]^.val);
  123. taicpu(p).oper[4]^.val := min(taicpu(p).oper[4]^.val,taicpu(next1).oper[4]^.val);
  124. asml.remove(next1);
  125. next1.free;
  126. result := true;
  127. end;
  128. end;
  129. end;
  130. end;
  131. end;
  132. end;
  133. end;
  134. const
  135. modifyflags: array[tasmop] of tasmop =
  136. (a_none, a_add_, a_add_, a_addo_, a_addo_, a_addc_, a_addc_, a_addco_, a_addco_,
  137. a_adde_, a_adde_, a_addeo_, a_addeo_, {a_addi could be addic_ if sure doesn't disturb carry} a_none, a_addic_, a_addic_, a_none,
  138. a_addme_, a_addme_, a_addmeo_, a_addmeo_, a_addze_, a_addze_, a_addzeo_,
  139. a_addzeo_, a_and_, a_and_, a_andc_, a_andc_, a_andi_, a_andis_, a_none,
  140. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  141. a_none, a_none, a_none, a_none, a_none, a_cntlzw_, a_cntlzw_, a_none,
  142. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  143. a_none, a_none, a_none, a_none, a_none, a_none, a_divw_, a_divw_, a_divwo_, a_divwo_,
  144. a_divwu_, a_divwu_, a_divwuo_, a_divwuo_, a_none, a_none, a_none, a_eqv_,
  145. a_eqv_, a_extsb_, a_extsb_, a_extsh_, a_extsh_, a_none, a_none, a_none,
  146. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  147. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  148. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  149. a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  150. a_none, a_none, a_none, a_none, a_none, a_none,
  151. a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  152. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  153. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  154. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  155. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  156. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  157. a_none, a_none, a_none, a_mffs, a_mffs_, a_mfmsr, a_mfspr, a_mfsr,
  158. a_mfsrin, a_mftb, a_mtcrf, a_none, a_none, a_none, a_none,
  159. a_none, a_none, a_none, a_none, a_none, a_none, a_mulhw_,
  160. a_mulhw_, a_mulhwu_, a_mulhwu_, a_none, a_mullw_, a_mullw_, a_mullwo_,
  161. a_mullwo_, a_nand_, a_nand_, a_neg_, a_neg_, a_nego_, a_nego_, a_nor_, a_nor_,
  162. a_or_, a_or_, a_orc_, a_orc_, a_none, a_none, a_none, a_rlwimi_, a_rlwimi_,
  163. a_rlwinm_, a_rlwinm_, a_rlwnm_, a_rlwnm_, a_none, a_slw_, a_slw_, a_sraw_, a_sraw_,
  164. a_srawi_, a_srawi_,a_srw_, a_srw_, a_none, a_none, a_none, a_none, a_none,
  165. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  166. a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  167. a_none, a_none, a_none, a_none, a_none, a_subf_, a_subf_, a_subfo_,
  168. a_subfo_, a_subfc_, a_subfc_, a_subfco_, a_subfco_, a_subfe_, a_subfe_,
  169. a_subfeo_, a_subfeo_, a_none, a_subfme_, a_subfme_, a_subfmeo_, a_subfmeo_,
  170. a_subfze_, a_subfze_, a_subfzeo_, a_subfzeo_, a_none, a_none, a_none,
  171. a_none, a_none, a_none, a_xor_, a_xor_, a_none, a_none,
  172. { simplified mnemonics }
  173. a_none, a_none, a_subic_, a_subic_, a_sub_, a_sub_, a_subo_, a_subo_,
  174. a_subc_, a_subc_, a_subco_, a_subco_, a_none, a_none, a_none, a_none,
  175. a_extlwi_, a_extlwi_, a_extrwi_, a_extrwi_, a_inslwi_, a_inslwi_, a_insrwi_,
  176. a_insrwi_, a_rotlwi_, a_rotlwi_, a_rotlw_, a_rotlw_, a_slwi_, a_slwi_,
  177. a_srwi_, a_srwi_, a_clrlwi_, a_clrlwi_, a_clrrwi_, a_clrrwi_, a_clrslwi_,
  178. a_clrslwi_, a_none, a_none, a_none, a_none, a_none, a_none, a_none,
  179. a_none, a_none {move to special prupose reg}, a_none {move from special purpose reg},
  180. a_none, a_none, a_none, a_none, a_mr_, a_mr_, a_not_, a_not_, a_none, a_none, a_none,
  181. a_none, a_none);
  182. function changetomodifyflags(p: taicpu): boolean;
  183. begin
  184. result := false;
  185. if (modifyflags[p.opcode] <> a_none) then
  186. begin
  187. p.opcode := modifyflags[p.opcode];
  188. result := true;
  189. end;
  190. end;
  191. function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
  192. var
  193. next1: tai;
  194. begin
  195. result := false;
  196. case p.typ of
  197. ait_instruction:
  198. begin
  199. case taicpu(p).opcode of
  200. A_RLWINM_:
  201. begin
  202. // rlwinm_ is cracked on the G5, andi_/andis_ aren't
  203. if (taicpu(p).oper[2]^.val = 0) then
  204. if (taicpu(p).oper[3]^.val < 16) and
  205. (taicpu(p).oper[4]^.val < 16) then
  206. begin
  207. taicpu(p).opcode := A_ANDIS_;
  208. taicpu(p).oper[2]^.val :=
  209. ((1 shl (16-taicpu(p).oper[3]^.val)) - 1) and
  210. not((1 shl (15-taicpu(p).oper[4]^.val)) - 1);
  211. taicpu(p).clearop(3);
  212. taicpu(p).clearop(4);
  213. taicpu(p).ops := 3;
  214. taicpu(p).opercnt := 2;
  215. end
  216. else if (taicpu(p).oper[3]^.val >= 16) and
  217. (taicpu(p).oper[4]^.val >= 16) then
  218. begin
  219. taicpu(p).opcode := A_ANDI_;
  220. taicpu(p).oper[2]^.val :=
  221. ((1 shl (32-taicpu(p).oper[3]^.val)) - 1) and
  222. not((1 shl (31-taicpu(p).oper[4]^.val)) - 1);
  223. taicpu(p).clearop(3);
  224. taicpu(p).clearop(4);
  225. taicpu(p).ops := 3;
  226. taicpu(p).opercnt := 2;
  227. end;
  228. end;
  229. end;
  230. // change "integer operation with destination reg" followed by a
  231. // comparison to zero of that reg, with a variant of that integer
  232. // operation which sets the flags (if it exists)
  233. if not(result) and
  234. (taicpu(p).ops >= 2) and
  235. (taicpu(p).oper[0]^.typ = top_reg) and
  236. (taicpu(p).oper[1]^.typ = top_reg) and
  237. getnextinstruction(p,next1) and
  238. (next1.typ = ait_instruction) and
  239. (taicpu(next1).opcode = A_CMPWI) and
  240. // make sure it the result goes to cr0
  241. (((taicpu(next1).ops = 2) and
  242. (taicpu(next1).oper[1]^.val = 0) and
  243. (taicpu(next1).oper[0]^.reg = taicpu(p).oper[0]^.reg)) or
  244. ((taicpu(next1).ops = 3) and
  245. (taicpu(next1).oper[2]^.val = 0) and
  246. (taicpu(next1).oper[0]^.typ = top_reg) and
  247. (getsupreg(taicpu(next1).oper[0]^.reg) = RS_CR0) and
  248. (taicpu(next1).oper[1]^.reg = taicpu(p).oper[0]^.reg))) and
  249. changetomodifyflags(taicpu(p)) then
  250. begin
  251. asml.remove(next1);
  252. next1.free;
  253. result := true;
  254. end;
  255. end;
  256. end;
  257. end;
  258. begin
  259. casmoptimizer:=TCpuAsmOptimizer;
  260. End.