popt386.pas 110 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  39. isFoldableArithOp :=
  40. ((taicpu(hp1).oper[0]^.typ = top_const) or
  41. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  42. (taicpu(hp1).oper[0]^.reg <> reg))) and
  43. (taicpu(hp1).oper[1]^.typ = top_reg) and
  44. (taicpu(hp1).oper[1]^.reg = reg);
  45. A_INC,A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  52. var
  53. supreg: tsuperregister;
  54. begin
  55. supreg := getsupreg(reg);
  56. UpdateUsedRegs(UsedRegs, tai(p.Next));
  57. RegUsedAfterInstruction :=
  58. (supreg in UsedRegs) and
  59. (not(getNextInstruction(p,p)) or
  60. not(regLoadedWithNewValue(supreg,false,p)));
  61. end;
  62. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  63. { returns true if a "continue" should be done after this optimization }
  64. var hp1, hp2: tai;
  65. begin
  66. doFpuLoadStoreOpt := false;
  67. if (taicpu(p).oper[0]^.typ = top_ref) and
  68. getNextInstruction(p, hp1) and
  69. (hp1.typ = ait_instruction) and
  70. (((taicpu(hp1).opcode = A_FLD) and
  71. (taicpu(p).opcode = A_FSTP)) or
  72. ((taicpu(p).opcode = A_FISTP) and
  73. (taicpu(hp1).opcode = A_FILD))) and
  74. (taicpu(hp1).oper[0]^.typ = top_ref) and
  75. (taicpu(hp1).opsize = taicpu(p).opsize) and
  76. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  77. begin
  78. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  79. if (taicpu(p).opsize=S_FX) and
  80. getNextInstruction(hp1, hp2) and
  81. (hp2.typ = ait_instruction) and
  82. ((taicpu(hp2).opcode = A_LEAVE) or
  83. (taicpu(hp2).opcode = A_RET)) and
  84. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  85. not(assigned(current_procinfo.procdef.funcretsym) and
  86. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  87. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  88. begin
  89. asml.remove(p);
  90. asml.remove(hp1);
  91. p.free;
  92. hp1.free;
  93. p := hp2;
  94. removeLastDeallocForFuncRes(asmL, p);
  95. doFPULoadStoreOpt := true;
  96. end
  97. { can't be done because the store operation rounds
  98. else
  99. { fst can't store an extended value! }
  100. if (taicpu(p).opsize <> S_FX) and
  101. (taicpu(p).opsize <> S_IQ) then
  102. begin
  103. if (taicpu(p).opcode = A_FSTP) then
  104. taicpu(p).opcode := A_FST
  105. else taicpu(p).opcode := A_FIST;
  106. asml.remove(hp1);
  107. hp1.free;
  108. end
  109. }
  110. end;
  111. end;
  112. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  113. var
  114. p,hp1: tai;
  115. l: aint;
  116. tmpRef: treference;
  117. begin
  118. p := BlockStart;
  119. while (p <> BlockEnd) Do
  120. begin
  121. case p.Typ Of
  122. Ait_Instruction:
  123. begin
  124. case taicpu(p).opcode Of
  125. A_IMUL:
  126. {changes certain "imul const, %reg"'s to lea sequences}
  127. begin
  128. if (taicpu(p).oper[0]^.typ = Top_Const) and
  129. (taicpu(p).oper[1]^.typ = Top_Reg) and
  130. (taicpu(p).opsize = S_L) then
  131. if (taicpu(p).oper[0]^.val = 1) then
  132. if (taicpu(p).ops = 2) then
  133. {remove "imul $1, reg"}
  134. begin
  135. hp1 := tai(p.Next);
  136. asml.remove(p);
  137. p.free;
  138. p := hp1;
  139. continue;
  140. end
  141. else
  142. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  143. begin
  144. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  145. InsertLLItem(asml, p.previous, p.next, hp1);
  146. p.free;
  147. p := hp1;
  148. end
  149. else if
  150. ((taicpu(p).ops <= 2) or
  151. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  152. (current_settings.optimizecputype < cpu_Pentium2) and
  153. (taicpu(p).oper[0]^.val <= 12) and
  154. not(cs_opt_size in current_settings.optimizerswitches) and
  155. (not(GetNextInstruction(p, hp1)) or
  156. {GetNextInstruction(p, hp1) and}
  157. not((tai(hp1).typ = ait_instruction) and
  158. ((taicpu(hp1).opcode=A_Jcc) and
  159. (taicpu(hp1).condition in [C_O,C_NO])))) then
  160. begin
  161. reference_reset(tmpref);
  162. case taicpu(p).oper[0]^.val Of
  163. 3: begin
  164. {imul 3, reg1, reg2 to
  165. lea (reg1,reg1,2), reg2
  166. imul 3, reg1 to
  167. lea (reg1,reg1,2), reg1}
  168. TmpRef.base := taicpu(p).oper[1]^.reg;
  169. TmpRef.index := taicpu(p).oper[1]^.reg;
  170. TmpRef.ScaleFactor := 2;
  171. if (taicpu(p).ops = 2) then
  172. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  173. else
  174. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  175. InsertLLItem(asml,p.previous, p.next, hp1);
  176. p.free;
  177. p := hp1;
  178. end;
  179. 5: begin
  180. {imul 5, reg1, reg2 to
  181. lea (reg1,reg1,4), reg2
  182. imul 5, reg1 to
  183. lea (reg1,reg1,4), reg1}
  184. TmpRef.base := taicpu(p).oper[1]^.reg;
  185. TmpRef.index := taicpu(p).oper[1]^.reg;
  186. TmpRef.ScaleFactor := 4;
  187. if (taicpu(p).ops = 2) then
  188. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  189. else
  190. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  191. InsertLLItem(asml,p.previous, p.next, hp1);
  192. p.free;
  193. p := hp1;
  194. end;
  195. 6: begin
  196. {imul 6, reg1, reg2 to
  197. lea (,reg1,2), reg2
  198. lea (reg2,reg1,4), reg2
  199. imul 6, reg1 to
  200. lea (reg1,reg1,2), reg1
  201. add reg1, reg1}
  202. if (current_settings.optimizecputype <= cpu_386) then
  203. begin
  204. TmpRef.index := taicpu(p).oper[1]^.reg;
  205. if (taicpu(p).ops = 3) then
  206. begin
  207. TmpRef.base := taicpu(p).oper[2]^.reg;
  208. TmpRef.ScaleFactor := 4;
  209. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  210. end
  211. else
  212. begin
  213. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  214. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  215. end;
  216. InsertLLItem(asml,p, p.next, hp1);
  217. reference_reset(tmpref);
  218. TmpRef.index := taicpu(p).oper[1]^.reg;
  219. TmpRef.ScaleFactor := 2;
  220. if (taicpu(p).ops = 3) then
  221. begin
  222. TmpRef.base := NR_NO;
  223. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  224. taicpu(p).oper[2]^.reg);
  225. end
  226. else
  227. begin
  228. TmpRef.base := taicpu(p).oper[1]^.reg;
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  230. end;
  231. InsertLLItem(asml,p.previous, p.next, hp1);
  232. p.free;
  233. p := tai(hp1.next);
  234. end
  235. end;
  236. 9: begin
  237. {imul 9, reg1, reg2 to
  238. lea (reg1,reg1,8), reg2
  239. imul 9, reg1 to
  240. lea (reg1,reg1,8), reg1}
  241. TmpRef.base := taicpu(p).oper[1]^.reg;
  242. TmpRef.index := taicpu(p).oper[1]^.reg;
  243. TmpRef.ScaleFactor := 8;
  244. if (taicpu(p).ops = 2) then
  245. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  246. else
  247. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  248. InsertLLItem(asml,p.previous, p.next, hp1);
  249. p.free;
  250. p := hp1;
  251. end;
  252. 10: begin
  253. {imul 10, reg1, reg2 to
  254. lea (reg1,reg1,4), reg2
  255. add reg2, reg2
  256. imul 10, reg1 to
  257. lea (reg1,reg1,4), reg1
  258. add reg1, reg1}
  259. if (current_settings.optimizecputype <= cpu_386) then
  260. begin
  261. if (taicpu(p).ops = 3) then
  262. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  263. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  264. else
  265. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  266. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  267. InsertLLItem(asml,p, p.next, hp1);
  268. TmpRef.base := taicpu(p).oper[1]^.reg;
  269. TmpRef.index := taicpu(p).oper[1]^.reg;
  270. TmpRef.ScaleFactor := 4;
  271. if (taicpu(p).ops = 3) then
  272. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  273. else
  274. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  275. InsertLLItem(asml,p.previous, p.next, hp1);
  276. p.free;
  277. p := tai(hp1.next);
  278. end
  279. end;
  280. 12: begin
  281. {imul 12, reg1, reg2 to
  282. lea (,reg1,4), reg2
  283. lea (,reg1,8) reg2
  284. imul 12, reg1 to
  285. lea (reg1,reg1,2), reg1
  286. lea (,reg1,4), reg1}
  287. if (current_settings.optimizecputype <= cpu_386)
  288. then
  289. begin
  290. TmpRef.index := taicpu(p).oper[1]^.reg;
  291. if (taicpu(p).ops = 3) then
  292. begin
  293. TmpRef.base := taicpu(p).oper[2]^.reg;
  294. TmpRef.ScaleFactor := 8;
  295. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  296. end
  297. else
  298. begin
  299. TmpRef.base := NR_NO;
  300. TmpRef.ScaleFactor := 4;
  301. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  302. end;
  303. InsertLLItem(asml,p, p.next, hp1);
  304. reference_reset(tmpref);
  305. TmpRef.index := taicpu(p).oper[1]^.reg;
  306. if (taicpu(p).ops = 3) then
  307. begin
  308. TmpRef.base := NR_NO;
  309. TmpRef.ScaleFactor := 4;
  310. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  311. end
  312. else
  313. begin
  314. TmpRef.base := taicpu(p).oper[1]^.reg;
  315. TmpRef.ScaleFactor := 2;
  316. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  317. end;
  318. InsertLLItem(asml,p.previous, p.next, hp1);
  319. p.free;
  320. p := tai(hp1.next);
  321. end
  322. end
  323. end;
  324. end;
  325. end;
  326. A_SAR, A_SHR:
  327. {changes the code sequence
  328. shr/sar const1, x
  329. shl const2, x
  330. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  331. begin
  332. if GetNextInstruction(p, hp1) and
  333. (tai(hp1).typ = ait_instruction) and
  334. (taicpu(hp1).opcode = A_SHL) and
  335. (taicpu(p).oper[0]^.typ = top_const) and
  336. (taicpu(hp1).oper[0]^.typ = top_const) and
  337. (taicpu(hp1).opsize = taicpu(p).opsize) and
  338. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  339. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  340. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  341. not(cs_opt_size in current_settings.optimizerswitches) then
  342. { shr/sar const1, %reg
  343. shl const2, %reg
  344. with const1 > const2 }
  345. begin
  346. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  347. taicpu(hp1).opcode := A_AND;
  348. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  349. case taicpu(p).opsize Of
  350. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  351. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  352. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  353. end;
  354. end
  355. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  356. not(cs_opt_size in current_settings.optimizerswitches) then
  357. { shr/sar const1, %reg
  358. shl const2, %reg
  359. with const1 < const2 }
  360. begin
  361. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  362. taicpu(p).opcode := A_AND;
  363. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  364. case taicpu(p).opsize Of
  365. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  366. S_B: taicpu(p).loadConst(0,l Xor $ff);
  367. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  368. end;
  369. end
  370. else
  371. { shr/sar const1, %reg
  372. shl const2, %reg
  373. with const1 = const2 }
  374. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  375. begin
  376. taicpu(p).opcode := A_AND;
  377. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  378. case taicpu(p).opsize Of
  379. S_B: taicpu(p).loadConst(0,l Xor $ff);
  380. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  381. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  382. end;
  383. asml.remove(hp1);
  384. hp1.free;
  385. end;
  386. end;
  387. A_XOR:
  388. if (taicpu(p).oper[0]^.typ = top_reg) and
  389. (taicpu(p).oper[1]^.typ = top_reg) and
  390. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  391. { temporarily change this to 'mov reg,0' to make it easier }
  392. { for the CSE. Will be changed back in pass 2 }
  393. begin
  394. taicpu(p).opcode := A_MOV;
  395. taicpu(p).loadConst(0,0);
  396. end;
  397. end;
  398. end;
  399. end;
  400. p := tai(p.next)
  401. end;
  402. end;
  403. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  404. {First pass of peepholeoptimizations}
  405. var
  406. l : longint;
  407. p,hp1,hp2 : tai;
  408. hp3,hp4: tai;
  409. v:aint;
  410. TmpRef: TReference;
  411. UsedRegs, TmpUsedRegs: TRegSet;
  412. TmpBool1, TmpBool2: Boolean;
  413. function SkipLabels(hp: tai; var hp2: tai): boolean;
  414. {skips all labels and returns the next "real" instruction}
  415. begin
  416. while assigned(hp.next) and
  417. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  418. hp := tai(hp.next);
  419. if assigned(hp.next) then
  420. begin
  421. SkipLabels := True;
  422. hp2 := tai(hp.next)
  423. end
  424. else
  425. begin
  426. hp2 := hp;
  427. SkipLabels := False
  428. end;
  429. end;
  430. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  431. {traces sucessive jumps to their final destination and sets it, e.g.
  432. je l1 je l3
  433. <code> <code>
  434. l1: becomes l1:
  435. je l2 je l3
  436. <code> <code>
  437. l2: l2:
  438. jmp l3 jmp l3
  439. the level parameter denotes how deeep we have already followed the jump,
  440. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  441. var p1, p2: tai;
  442. l: tasmlabel;
  443. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  444. begin
  445. FindAnyLabel := false;
  446. while assigned(hp.next) and
  447. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  448. hp := tai(hp.next);
  449. if assigned(hp.next) and
  450. (tai(hp.next).typ = ait_label) then
  451. begin
  452. FindAnyLabel := true;
  453. l := tai_label(hp.next).labsym;
  454. end
  455. end;
  456. begin
  457. GetfinalDestination := false;
  458. if level > 20 then
  459. exit;
  460. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  461. if assigned(p1) then
  462. begin
  463. SkipLabels(p1,p1);
  464. if (tai(p1).typ = ait_instruction) and
  465. (taicpu(p1).is_jmp) then
  466. if { the next instruction after the label where the jump hp arrives}
  467. { is unconditional or of the same type as hp, so continue }
  468. (taicpu(p1).condition in [C_None,hp.condition]) or
  469. { the next instruction after the label where the jump hp arrives}
  470. { is the opposite of hp (so this one is never taken), but after }
  471. { that one there is a branch that will be taken, so perform a }
  472. { little hack: set p1 equal to this instruction (that's what the}
  473. { last SkipLabels is for, only works with short bool evaluation)}
  474. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  475. SkipLabels(p1,p2) and
  476. (p2.typ = ait_instruction) and
  477. (taicpu(p2).is_jmp) and
  478. (taicpu(p2).condition in [C_None,hp.condition]) and
  479. SkipLabels(p1,p1)) then
  480. begin
  481. { quick check for loops of the form "l5: ; jmp l5 }
  482. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  483. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  484. exit;
  485. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  486. exit;
  487. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  488. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  489. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  490. end
  491. else
  492. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  493. if not FindAnyLabel(p1,l) then
  494. begin
  495. {$ifdef finaldestdebug}
  496. insertllitem(asml,p1,p1.next,tai_comment.Create(
  497. strpnew('previous label inserted'))));
  498. {$endif finaldestdebug}
  499. current_asmdata.getjumplabel(l);
  500. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  501. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  502. hp.oper[0]^.ref^.symbol := l;
  503. l.increfs;
  504. { this won't work, since the new label isn't in the labeltable }
  505. { so it will fail the rangecheck. Labeltable should become a }
  506. { hashtable to support this: }
  507. { GetFinalDestination(asml, hp); }
  508. end
  509. else
  510. begin
  511. {$ifdef finaldestdebug}
  512. insertllitem(asml,p1,p1.next,tai_comment.Create(
  513. strpnew('next label reused'))));
  514. {$endif finaldestdebug}
  515. l.increfs;
  516. hp.oper[0]^.ref^.symbol := l;
  517. if not GetFinalDestination(asml, hp,succ(level)) then
  518. exit;
  519. end;
  520. end;
  521. GetFinalDestination := true;
  522. end;
  523. function DoSubAddOpt(var p: tai): Boolean;
  524. begin
  525. DoSubAddOpt := False;
  526. if GetLastInstruction(p, hp1) and
  527. (hp1.typ = ait_instruction) and
  528. (taicpu(hp1).opsize = taicpu(p).opsize) then
  529. case taicpu(hp1).opcode Of
  530. A_DEC:
  531. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  532. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  533. begin
  534. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  535. asml.remove(hp1);
  536. hp1.free;
  537. end;
  538. A_SUB:
  539. if (taicpu(hp1).oper[0]^.typ = top_const) and
  540. (taicpu(hp1).oper[1]^.typ = top_reg) and
  541. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  542. begin
  543. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  544. asml.remove(hp1);
  545. hp1.free;
  546. end;
  547. A_ADD:
  548. if (taicpu(hp1).oper[0]^.typ = top_const) and
  549. (taicpu(hp1).oper[1]^.typ = top_reg) and
  550. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  551. begin
  552. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  553. asml.remove(hp1);
  554. hp1.free;
  555. if (taicpu(p).oper[0]^.val = 0) then
  556. begin
  557. hp1 := tai(p.next);
  558. asml.remove(p);
  559. p.free;
  560. if not GetLastInstruction(hp1, p) then
  561. p := hp1;
  562. DoSubAddOpt := True;
  563. end
  564. end;
  565. end;
  566. end;
  567. begin
  568. p := BlockStart;
  569. UsedRegs := [];
  570. while (p <> BlockEnd) Do
  571. begin
  572. UpDateUsedRegs(UsedRegs, tai(p.next));
  573. case p.Typ Of
  574. ait_instruction:
  575. begin
  576. { Handle Jmp Optimizations }
  577. if taicpu(p).is_jmp then
  578. begin
  579. {the following if-block removes all code between a jmp and the next label,
  580. because it can never be executed}
  581. if (taicpu(p).opcode = A_JMP) then
  582. begin
  583. while GetNextInstruction(p, hp1) and
  584. (hp1.typ <> ait_label) do
  585. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  586. begin
  587. asml.remove(hp1);
  588. hp1.free;
  589. end
  590. else break;
  591. end;
  592. { remove jumps to a label coming right after them }
  593. if GetNextInstruction(p, hp1) then
  594. begin
  595. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  596. {$warning FIXME removing the first instruction fails}
  597. (p<>blockstart) then
  598. begin
  599. hp2:=tai(hp1.next);
  600. asml.remove(p);
  601. p.free;
  602. p:=hp2;
  603. continue;
  604. end
  605. else
  606. begin
  607. if hp1.typ = ait_label then
  608. SkipLabels(hp1,hp1);
  609. if (tai(hp1).typ=ait_instruction) and
  610. (taicpu(hp1).opcode=A_JMP) and
  611. GetNextInstruction(hp1, hp2) and
  612. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  613. begin
  614. if taicpu(p).opcode=A_Jcc then
  615. begin
  616. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  617. tai_label(hp2).labsym.decrefs;
  618. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  619. { when free'ing hp1, the ref. isn't decresed, so we don't
  620. increase it (FK)
  621. taicpu(p).oper[0]^.ref^.symbol.increfs;
  622. }
  623. asml.remove(hp1);
  624. hp1.free;
  625. GetFinalDestination(asml, taicpu(p),0);
  626. end
  627. else
  628. begin
  629. GetFinalDestination(asml, taicpu(p),0);
  630. p:=tai(p.next);
  631. continue;
  632. end;
  633. end
  634. else
  635. GetFinalDestination(asml, taicpu(p),0);
  636. end;
  637. end;
  638. end
  639. else
  640. { All other optimizes }
  641. begin
  642. for l := 0 to taicpu(p).ops-1 Do
  643. if (taicpu(p).oper[l]^.typ = top_ref) then
  644. With taicpu(p).oper[l]^.ref^ Do
  645. begin
  646. if (base = NR_NO) and
  647. (index <> NR_NO) and
  648. (scalefactor in [0,1]) then
  649. begin
  650. base := index;
  651. index := NR_NO
  652. end
  653. end;
  654. case taicpu(p).opcode Of
  655. A_AND:
  656. begin
  657. if (taicpu(p).oper[0]^.typ = top_const) and
  658. (taicpu(p).oper[1]^.typ = top_reg) and
  659. GetNextInstruction(p, hp1) and
  660. (tai(hp1).typ = ait_instruction) and
  661. (taicpu(hp1).opcode = A_AND) and
  662. (taicpu(hp1).oper[0]^.typ = top_const) and
  663. (taicpu(hp1).oper[1]^.typ = top_reg) and
  664. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  665. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  666. begin
  667. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  668. asml.remove(hp1);
  669. hp1.free;
  670. end
  671. else
  672. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  673. jump, but only if it's a conditional jump (PFV) }
  674. if (taicpu(p).oper[1]^.typ = top_reg) and
  675. GetNextInstruction(p, hp1) and
  676. (hp1.typ = ait_instruction) and
  677. (taicpu(hp1).is_jmp) and
  678. (taicpu(hp1).opcode<>A_JMP) and
  679. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  680. taicpu(p).opcode := A_TEST;
  681. end;
  682. A_CMP:
  683. begin
  684. { cmp register,$8000 neg register
  685. je target --> jo target
  686. .... only if register is deallocated before jump.}
  687. case Taicpu(p).opsize of
  688. S_B: v:=$80;
  689. S_W: v:=$8000;
  690. S_L: v:=aint($80000000);
  691. end;
  692. if (taicpu(p).oper[0]^.typ=Top_const) and
  693. (taicpu(p).oper[0]^.val=v) and
  694. (Taicpu(p).oper[1]^.typ=top_reg) and
  695. GetNextInstruction(p, hp1) and
  696. (hp1.typ=ait_instruction) and
  697. (taicpu(hp1).opcode=A_Jcc) and
  698. (Taicpu(hp1).condition in [C_E,C_NE]) and
  699. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  700. begin
  701. Taicpu(p).opcode:=A_NEG;
  702. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  703. Taicpu(p).clearop(1);
  704. Taicpu(p).ops:=1;
  705. if Taicpu(hp1).condition=C_E then
  706. Taicpu(hp1).condition:=C_O
  707. else
  708. Taicpu(hp1).condition:=C_NO;
  709. continue;
  710. end;
  711. {
  712. @@2: @@2:
  713. .... ....
  714. cmp operand1,0
  715. jle/jbe @@1
  716. dec operand1 --> sub operand1,1
  717. jmp @@2 jge/jae @@2
  718. @@1: @@1:
  719. ... ....}
  720. if (taicpu(p).oper[0]^.typ = top_const) and
  721. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  722. (taicpu(p).oper[0]^.val = 0) and
  723. GetNextInstruction(p, hp1) and
  724. (hp1.typ = ait_instruction) and
  725. (taicpu(hp1).is_jmp) and
  726. (taicpu(hp1).opcode=A_Jcc) and
  727. (taicpu(hp1).condition in [C_LE,C_BE]) and
  728. GetNextInstruction(hp1,hp2) and
  729. (hp2.typ = ait_instruction) and
  730. (taicpu(hp2).opcode = A_DEC) and
  731. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  732. GetNextInstruction(hp2, hp3) and
  733. (hp3.typ = ait_instruction) and
  734. (taicpu(hp3).is_jmp) and
  735. (taicpu(hp3).opcode = A_JMP) and
  736. GetNextInstruction(hp3, hp4) and
  737. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  738. begin
  739. taicpu(hp2).Opcode := A_SUB;
  740. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  741. taicpu(hp2).loadConst(0,1);
  742. taicpu(hp2).ops:=2;
  743. taicpu(hp3).Opcode := A_Jcc;
  744. case taicpu(hp1).condition of
  745. C_LE: taicpu(hp3).condition := C_GE;
  746. C_BE: taicpu(hp3).condition := C_AE;
  747. end;
  748. asml.remove(p);
  749. asml.remove(hp1);
  750. p.free;
  751. hp1.free;
  752. p := hp2;
  753. continue;
  754. end
  755. end;
  756. A_FLD:
  757. begin
  758. if (taicpu(p).oper[0]^.typ = top_reg) and
  759. GetNextInstruction(p, hp1) and
  760. (hp1.typ = Ait_Instruction) and
  761. (taicpu(hp1).oper[0]^.typ = top_reg) and
  762. (taicpu(hp1).oper[1]^.typ = top_reg) and
  763. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  764. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  765. { change to
  766. fld reg fxxx reg,st
  767. fxxxp st, st1 (hp1)
  768. Remark: non commutative operations must be reversed!
  769. }
  770. begin
  771. case taicpu(hp1).opcode Of
  772. A_FMULP,A_FADDP,
  773. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  774. begin
  775. case taicpu(hp1).opcode Of
  776. A_FADDP: taicpu(hp1).opcode := A_FADD;
  777. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  778. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  779. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  780. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  781. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  782. end;
  783. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  784. taicpu(hp1).oper[1]^.reg := NR_ST;
  785. asml.remove(p);
  786. p.free;
  787. p := hp1;
  788. continue;
  789. end;
  790. end;
  791. end
  792. else
  793. if (taicpu(p).oper[0]^.typ = top_ref) and
  794. GetNextInstruction(p, hp2) and
  795. (hp2.typ = Ait_Instruction) and
  796. (taicpu(hp2).ops = 2) and
  797. (taicpu(hp2).oper[0]^.typ = top_reg) and
  798. (taicpu(hp2).oper[1]^.typ = top_reg) and
  799. (taicpu(p).opsize in [S_FS, S_FL]) and
  800. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  801. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  802. if GetLastInstruction(p, hp1) and
  803. (hp1.typ = Ait_Instruction) and
  804. ((taicpu(hp1).opcode = A_FLD) or
  805. (taicpu(hp1).opcode = A_FST)) and
  806. (taicpu(hp1).opsize = taicpu(p).opsize) and
  807. (taicpu(hp1).oper[0]^.typ = top_ref) and
  808. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  809. if ((taicpu(hp2).opcode = A_FMULP) or
  810. (taicpu(hp2).opcode = A_FADDP)) then
  811. { change to
  812. fld/fst mem1 (hp1) fld/fst mem1
  813. fld mem1 (p) fadd/
  814. faddp/ fmul st, st
  815. fmulp st, st1 (hp2) }
  816. begin
  817. asml.remove(p);
  818. p.free;
  819. p := hp1;
  820. if (taicpu(hp2).opcode = A_FADDP) then
  821. taicpu(hp2).opcode := A_FADD
  822. else
  823. taicpu(hp2).opcode := A_FMUL;
  824. taicpu(hp2).oper[1]^.reg := NR_ST;
  825. end
  826. else
  827. { change to
  828. fld/fst mem1 (hp1) fld/fst mem1
  829. fld mem1 (p) fld st}
  830. begin
  831. taicpu(p).changeopsize(S_FL);
  832. taicpu(p).loadreg(0,NR_ST);
  833. end
  834. else
  835. begin
  836. case taicpu(hp2).opcode Of
  837. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  838. { change to
  839. fld/fst mem1 (hp1) fld/fst mem1
  840. fld mem2 (p) fxxx mem2
  841. fxxxp st, st1 (hp2) }
  842. begin
  843. case taicpu(hp2).opcode Of
  844. A_FADDP: taicpu(p).opcode := A_FADD;
  845. A_FMULP: taicpu(p).opcode := A_FMUL;
  846. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  847. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  848. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  849. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  850. end;
  851. asml.remove(hp2);
  852. hp2.free;
  853. end
  854. end
  855. end
  856. end;
  857. A_FSTP,A_FISTP:
  858. if doFpuLoadStoreOpt(asmL,p) then
  859. continue;
  860. A_LEA:
  861. begin
  862. {removes seg register prefixes from LEA operations, as they
  863. don't do anything}
  864. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  865. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  866. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  867. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  868. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  869. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  870. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  871. (taicpu(p).oper[0]^.ref^.offset = 0) then
  872. begin
  873. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  874. taicpu(p).oper[1]^.reg);
  875. InsertLLItem(asml,p.previous,p.next, hp1);
  876. p.free;
  877. p := hp1;
  878. continue;
  879. end
  880. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  881. begin
  882. hp1 := tai(p.Next);
  883. asml.remove(p);
  884. p.free;
  885. p := hp1;
  886. continue;
  887. end
  888. else
  889. with taicpu(p).oper[0]^.ref^ do
  890. if (base = taicpu(p).oper[1]^.reg) then
  891. begin
  892. l := offset;
  893. if (l=1) then
  894. begin
  895. taicpu(p).opcode := A_INC;
  896. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  897. taicpu(p).ops := 1
  898. end
  899. else if (l=-1) then
  900. begin
  901. taicpu(p).opcode := A_DEC;
  902. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  903. taicpu(p).ops := 1;
  904. end
  905. else
  906. begin
  907. taicpu(p).opcode := A_ADD;
  908. taicpu(p).loadConst(0,l);
  909. end;
  910. end;
  911. end;
  912. A_MOV:
  913. begin
  914. TmpUsedRegs := UsedRegs;
  915. if (taicpu(p).oper[1]^.typ = top_reg) and
  916. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  917. GetNextInstruction(p, hp1) and
  918. (tai(hp1).typ = ait_instruction) and
  919. (taicpu(hp1).opcode = A_MOV) and
  920. (taicpu(hp1).oper[0]^.typ = top_reg) and
  921. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  922. begin
  923. {we have "mov x, %treg; mov %treg, y}
  924. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  925. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  926. case taicpu(p).oper[0]^.typ Of
  927. top_reg:
  928. begin
  929. { change "mov %reg, %treg; mov %treg, y"
  930. to "mov %reg, y" }
  931. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  932. asml.remove(hp1);
  933. hp1.free;
  934. continue;
  935. end;
  936. top_ref:
  937. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  938. begin
  939. { change "mov mem, %treg; mov %treg, %reg"
  940. to "mov mem, %reg" }
  941. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  942. asml.remove(hp1);
  943. hp1.free;
  944. continue;
  945. end;
  946. end
  947. end
  948. else
  949. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  950. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  951. penalty}
  952. if (taicpu(p).oper[0]^.typ = top_reg) and
  953. (taicpu(p).oper[1]^.typ = top_reg) and
  954. GetNextInstruction(p,hp1) and
  955. (tai(hp1).typ = ait_instruction) and
  956. (taicpu(hp1).ops >= 1) and
  957. (taicpu(hp1).oper[0]^.typ = top_reg) and
  958. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  959. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  960. begin
  961. if ((taicpu(hp1).opcode = A_OR) or
  962. (taicpu(hp1).opcode = A_TEST)) and
  963. (taicpu(hp1).oper[1]^.typ = top_reg) and
  964. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  965. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  966. begin
  967. TmpUsedRegs := UsedRegs;
  968. { reg1 will be used after the first instruction, }
  969. { so update the allocation info }
  970. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  971. if GetNextInstruction(hp1, hp2) and
  972. (hp2.typ = ait_instruction) and
  973. taicpu(hp2).is_jmp and
  974. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  975. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  976. "test %reg1, %reg1; jxx" }
  977. begin
  978. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  979. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  980. asml.remove(p);
  981. p.free;
  982. p := hp1;
  983. continue
  984. end
  985. else
  986. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  987. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  988. begin
  989. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  990. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  991. end;
  992. end
  993. { else
  994. if (taicpu(p.next)^.opcode
  995. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  996. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  997. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  998. end
  999. else
  1000. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1001. x >= RetOffset) as it doesn't do anything (it writes either to a
  1002. parameter or to the temporary storage room for the function
  1003. result)}
  1004. if GetNextInstruction(p, hp1) and
  1005. (tai(hp1).typ = ait_instruction) then
  1006. if ((taicpu(hp1).opcode = A_LEAVE) or
  1007. (taicpu(hp1).opcode = A_RET)) and
  1008. (taicpu(p).oper[1]^.typ = top_ref) and
  1009. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1010. not(assigned(current_procinfo.procdef.funcretsym) and
  1011. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1012. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1013. (taicpu(p).oper[0]^.typ = top_reg) then
  1014. begin
  1015. asml.remove(p);
  1016. p.free;
  1017. p := hp1;
  1018. RemoveLastDeallocForFuncRes(asmL,p);
  1019. end
  1020. else
  1021. if (taicpu(p).oper[0]^.typ = top_reg) and
  1022. (taicpu(p).oper[1]^.typ = top_ref) and
  1023. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1024. (taicpu(hp1).opcode = A_CMP) and
  1025. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1026. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1027. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1028. begin
  1029. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1030. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1031. end;
  1032. { Next instruction is also a MOV ? }
  1033. if GetNextInstruction(p, hp1) and
  1034. (tai(hp1).typ = ait_instruction) and
  1035. (taicpu(hp1).opcode = A_MOV) and
  1036. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1037. begin
  1038. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1039. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1040. {mov reg1, mem1 or mov mem1, reg1
  1041. mov mem2, reg2 mov reg2, mem2}
  1042. begin
  1043. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1044. {mov reg1, mem1 or mov mem1, reg1
  1045. mov mem2, reg1 mov reg2, mem1}
  1046. begin
  1047. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1048. { Removes the second statement from
  1049. mov reg1, mem1/reg2
  1050. mov mem1/reg2, reg1 }
  1051. begin
  1052. if (taicpu(p).oper[0]^.typ = top_reg) then
  1053. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1054. asml.remove(hp1);
  1055. hp1.free;
  1056. end
  1057. else
  1058. begin
  1059. TmpUsedRegs := UsedRegs;
  1060. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1061. if (taicpu(p).oper[1]^.typ = top_ref) and
  1062. { mov reg1, mem1
  1063. mov mem2, reg1 }
  1064. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1065. GetNextInstruction(hp1, hp2) and
  1066. (hp2.typ = ait_instruction) and
  1067. (taicpu(hp2).opcode = A_CMP) and
  1068. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1069. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1070. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1071. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1072. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1073. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1074. { change to
  1075. mov reg1, mem1 mov reg1, mem1
  1076. mov mem2, reg1 cmp reg1, mem2
  1077. cmp mem1, reg1 }
  1078. begin
  1079. asml.remove(hp2);
  1080. hp2.free;
  1081. taicpu(hp1).opcode := A_CMP;
  1082. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1083. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1084. end;
  1085. end;
  1086. end
  1087. else
  1088. begin
  1089. tmpUsedRegs := UsedRegs;
  1090. if GetNextInstruction(hp1, hp2) and
  1091. (taicpu(p).oper[0]^.typ = top_ref) and
  1092. (taicpu(p).oper[1]^.typ = top_reg) and
  1093. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1094. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1095. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1096. (tai(hp2).typ = ait_instruction) and
  1097. (taicpu(hp2).opcode = A_MOV) and
  1098. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1099. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1100. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1101. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1102. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1103. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1104. { mov mem1, %reg1
  1105. mov %reg1, mem2
  1106. mov mem2, reg2
  1107. to:
  1108. mov mem1, reg2
  1109. mov reg2, mem2}
  1110. begin
  1111. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1112. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1113. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1114. asml.remove(hp2);
  1115. hp2.free;
  1116. end
  1117. else
  1118. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1119. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1120. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1121. { mov mem1, reg1 mov mem1, reg1
  1122. mov reg1, mem2 mov reg1, mem2
  1123. mov mem2, reg2 mov mem2, reg1
  1124. to: to:
  1125. mov mem1, reg1 mov mem1, reg1
  1126. mov mem1, reg2 mov reg1, mem2
  1127. mov reg1, mem2
  1128. or (if mem1 depends on reg1
  1129. and/or if mem2 depends on reg2)
  1130. to:
  1131. mov mem1, reg1
  1132. mov reg1, mem2
  1133. mov reg1, reg2
  1134. }
  1135. begin
  1136. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1137. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1138. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1139. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1140. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1141. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1142. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1143. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1144. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1145. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1146. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1147. end
  1148. else
  1149. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1150. begin
  1151. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1152. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1153. end
  1154. else
  1155. begin
  1156. asml.remove(hp2);
  1157. hp2.free;
  1158. end
  1159. end
  1160. end
  1161. else
  1162. (* {movl [mem1],reg1
  1163. movl [mem1],reg2
  1164. to:
  1165. movl [mem1],reg1
  1166. movl reg1,reg2 }
  1167. if (taicpu(p).oper[0]^.typ = top_ref) and
  1168. (taicpu(p).oper[1]^.typ = top_reg) and
  1169. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1170. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1171. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1172. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1173. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1174. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1175. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1176. else*)
  1177. { movl const1,[mem1]
  1178. movl [mem1],reg1
  1179. to:
  1180. movl const1,reg1
  1181. movl reg1,[mem1] }
  1182. if (taicpu(p).oper[0]^.typ = top_const) and
  1183. (taicpu(p).oper[1]^.typ = top_ref) and
  1184. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1185. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1186. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1187. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1188. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1189. begin
  1190. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1191. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1192. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1193. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1194. end
  1195. end;
  1196. if GetNextInstruction(p, hp1) and
  1197. (Tai(hp1).typ = ait_instruction) and
  1198. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1199. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1200. GetNextInstruction(hp1, hp2) and
  1201. (Tai(hp2).typ = ait_instruction) and
  1202. (Taicpu(hp2).opcode = A_OR) and
  1203. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1204. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1205. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1206. (Taicpu(p).oper[1]^.typ = top_reg) and
  1207. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1208. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1209. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1210. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1211. {mov reg1,0
  1212. bts reg1,operand1 --> mov reg1,operand2
  1213. or reg1,operand2 bts reg1,operand1}
  1214. begin
  1215. Taicpu(hp2).opcode:=A_MOV;
  1216. asml.remove(hp1);
  1217. insertllitem(asml,hp2,hp2.next,hp1);
  1218. asml.remove(p);
  1219. p.free;
  1220. end;
  1221. end;
  1222. A_MOVSX,
  1223. A_MOVZX :
  1224. begin
  1225. if (taicpu(p).oper[1]^.typ = top_reg) and
  1226. GetNextInstruction(p,hp1) and
  1227. (hp1.typ = ait_instruction) and
  1228. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1229. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1230. GetNextInstruction(hp1,hp2) and
  1231. (hp2.typ = ait_instruction) and
  1232. (taicpu(hp2).opcode = A_MOV) and
  1233. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1234. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) then
  1235. { change movsX/movzX reg/ref, reg2 }
  1236. { add/sub/or/... reg3/$const, reg2 }
  1237. { mov reg2 reg/ref }
  1238. { to add/sub/or/... reg3/$const, reg/ref }
  1239. begin
  1240. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1241. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1242. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1243. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1244. asml.remove(p);
  1245. asml.remove(hp2);
  1246. p.free;
  1247. hp2.free;
  1248. p := hp1
  1249. end
  1250. { removes superfluous And's after movzx's }
  1251. else if taicpu(p).opcode=A_MOVZX then
  1252. begin
  1253. if (taicpu(p).oper[1]^.typ = top_reg) and
  1254. GetNextInstruction(p, hp1) and
  1255. (tai(hp1).typ = ait_instruction) and
  1256. (taicpu(hp1).opcode = A_AND) and
  1257. (taicpu(hp1).oper[0]^.typ = top_const) and
  1258. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1259. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1260. case taicpu(p).opsize Of
  1261. S_BL, S_BW:
  1262. if (taicpu(hp1).oper[0]^.val = $ff) then
  1263. begin
  1264. asml.remove(hp1);
  1265. hp1.free;
  1266. end;
  1267. S_WL:
  1268. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1269. begin
  1270. asml.remove(hp1);
  1271. hp1.free;
  1272. end;
  1273. end;
  1274. {changes some movzx constructs to faster synonims (all examples
  1275. are given with eax/ax, but are also valid for other registers)}
  1276. if (taicpu(p).oper[1]^.typ = top_reg) then
  1277. if (taicpu(p).oper[0]^.typ = top_reg) then
  1278. case taicpu(p).opsize of
  1279. S_BW:
  1280. begin
  1281. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1282. not(cs_opt_size in current_settings.optimizerswitches) then
  1283. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1284. begin
  1285. taicpu(p).opcode := A_AND;
  1286. taicpu(p).changeopsize(S_W);
  1287. taicpu(p).loadConst(0,$ff);
  1288. end
  1289. else if GetNextInstruction(p, hp1) and
  1290. (tai(hp1).typ = ait_instruction) and
  1291. (taicpu(hp1).opcode = A_AND) and
  1292. (taicpu(hp1).oper[0]^.typ = top_const) and
  1293. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1294. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1295. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1296. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1297. begin
  1298. taicpu(p).opcode := A_MOV;
  1299. taicpu(p).changeopsize(S_W);
  1300. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1301. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1302. end;
  1303. end;
  1304. S_BL:
  1305. begin
  1306. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1307. not(cs_opt_size in current_settings.optimizerswitches) then
  1308. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1309. begin
  1310. taicpu(p).opcode := A_AND;
  1311. taicpu(p).changeopsize(S_L);
  1312. taicpu(p).loadConst(0,$ff)
  1313. end
  1314. else if GetNextInstruction(p, hp1) and
  1315. (tai(hp1).typ = ait_instruction) and
  1316. (taicpu(hp1).opcode = A_AND) and
  1317. (taicpu(hp1).oper[0]^.typ = top_const) and
  1318. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1319. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1320. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1321. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1322. begin
  1323. taicpu(p).opcode := A_MOV;
  1324. taicpu(p).changeopsize(S_L);
  1325. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1326. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1327. end
  1328. end;
  1329. S_WL:
  1330. begin
  1331. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1332. not(cs_opt_size in current_settings.optimizerswitches) then
  1333. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1334. begin
  1335. taicpu(p).opcode := A_AND;
  1336. taicpu(p).changeopsize(S_L);
  1337. taicpu(p).loadConst(0,$ffff);
  1338. end
  1339. else if GetNextInstruction(p, hp1) and
  1340. (tai(hp1).typ = ait_instruction) and
  1341. (taicpu(hp1).opcode = A_AND) and
  1342. (taicpu(hp1).oper[0]^.typ = top_const) and
  1343. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1344. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1345. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1346. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1347. begin
  1348. taicpu(p).opcode := A_MOV;
  1349. taicpu(p).changeopsize(S_L);
  1350. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1351. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1352. end;
  1353. end;
  1354. end
  1355. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1356. begin
  1357. if GetNextInstruction(p, hp1) and
  1358. (tai(hp1).typ = ait_instruction) and
  1359. (taicpu(hp1).opcode = A_AND) and
  1360. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1361. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1362. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1363. begin
  1364. taicpu(p).opcode := A_MOV;
  1365. case taicpu(p).opsize Of
  1366. S_BL:
  1367. begin
  1368. taicpu(p).changeopsize(S_L);
  1369. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1370. end;
  1371. S_WL:
  1372. begin
  1373. taicpu(p).changeopsize(S_L);
  1374. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1375. end;
  1376. S_BW:
  1377. begin
  1378. taicpu(p).changeopsize(S_W);
  1379. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1380. end;
  1381. end;
  1382. end;
  1383. end;
  1384. end;
  1385. end;
  1386. (* should not be generated anymore by the current code generator
  1387. A_POP:
  1388. begin
  1389. if target_info.system=system_i386_go32v2 then
  1390. begin
  1391. { Transform a series of pop/pop/pop/push/push/push to }
  1392. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1393. { because I'm not sure whether they can cope with }
  1394. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1395. { such a problem when using esp as frame pointer (JM) }
  1396. if (taicpu(p).oper[0]^.typ = top_reg) then
  1397. begin
  1398. hp1 := p;
  1399. hp2 := p;
  1400. l := 0;
  1401. while getNextInstruction(hp1,hp1) and
  1402. (hp1.typ = ait_instruction) and
  1403. (taicpu(hp1).opcode = A_POP) and
  1404. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1405. begin
  1406. hp2 := hp1;
  1407. inc(l,4);
  1408. end;
  1409. getLastInstruction(p,hp3);
  1410. l1 := 0;
  1411. while (hp2 <> hp3) and
  1412. assigned(hp1) and
  1413. (hp1.typ = ait_instruction) and
  1414. (taicpu(hp1).opcode = A_PUSH) and
  1415. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1416. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1417. begin
  1418. { change it to a two op operation }
  1419. taicpu(hp2).oper[1]^.typ:=top_none;
  1420. taicpu(hp2).ops:=2;
  1421. taicpu(hp2).opcode := A_MOV;
  1422. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1423. reference_reset(tmpref);
  1424. tmpRef.base.enum:=R_INTREGISTER;
  1425. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1426. convert_register_to_enum(tmpref.base);
  1427. tmpRef.offset := l;
  1428. taicpu(hp2).loadRef(0,tmpRef);
  1429. hp4 := hp1;
  1430. getNextInstruction(hp1,hp1);
  1431. asml.remove(hp4);
  1432. hp4.free;
  1433. getLastInstruction(hp2,hp2);
  1434. dec(l,4);
  1435. inc(l1);
  1436. end;
  1437. if l <> -4 then
  1438. begin
  1439. inc(l,4);
  1440. for l1 := l1 downto 1 do
  1441. begin
  1442. getNextInstruction(hp2,hp2);
  1443. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1444. end
  1445. end
  1446. end
  1447. end
  1448. else
  1449. begin
  1450. if (taicpu(p).oper[0]^.typ = top_reg) and
  1451. GetNextInstruction(p, hp1) and
  1452. (tai(hp1).typ=ait_instruction) and
  1453. (taicpu(hp1).opcode=A_PUSH) and
  1454. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1455. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1456. begin
  1457. { change it to a two op operation }
  1458. taicpu(p).oper[1]^.typ:=top_none;
  1459. taicpu(p).ops:=2;
  1460. taicpu(p).opcode := A_MOV;
  1461. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1462. reference_reset(tmpref);
  1463. TmpRef.base.enum := R_ESP;
  1464. taicpu(p).loadRef(0,TmpRef);
  1465. asml.remove(hp1);
  1466. hp1.free;
  1467. end;
  1468. end;
  1469. end;
  1470. *)
  1471. A_PUSH:
  1472. begin
  1473. if (taicpu(p).opsize = S_W) and
  1474. (taicpu(p).oper[0]^.typ = Top_Const) and
  1475. GetNextInstruction(p, hp1) and
  1476. (tai(hp1).typ = ait_instruction) and
  1477. (taicpu(hp1).opcode = A_PUSH) and
  1478. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1479. (taicpu(hp1).opsize = S_W) then
  1480. begin
  1481. taicpu(p).changeopsize(S_L);
  1482. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1483. asml.remove(hp1);
  1484. hp1.free;
  1485. end;
  1486. end;
  1487. A_SHL, A_SAL:
  1488. begin
  1489. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1490. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1491. (taicpu(p).opsize = S_L) and
  1492. (taicpu(p).oper[0]^.val <= 3) then
  1493. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1494. begin
  1495. TmpBool1 := True; {should we check the next instruction?}
  1496. TmpBool2 := False; {have we found an add/sub which could be
  1497. integrated in the lea?}
  1498. reference_reset(tmpref);
  1499. TmpRef.index := taicpu(p).oper[1]^.reg;
  1500. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1501. while TmpBool1 and
  1502. GetNextInstruction(p, hp1) and
  1503. (tai(hp1).typ = ait_instruction) and
  1504. ((((taicpu(hp1).opcode = A_ADD) or
  1505. (taicpu(hp1).opcode = A_SUB)) and
  1506. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1507. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1508. (((taicpu(hp1).opcode = A_INC) or
  1509. (taicpu(hp1).opcode = A_DEC)) and
  1510. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1511. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1512. (not GetNextInstruction(hp1,hp2) or
  1513. not instrReadsFlags(hp2)) Do
  1514. begin
  1515. TmpBool1 := False;
  1516. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1517. begin
  1518. TmpBool1 := True;
  1519. TmpBool2 := True;
  1520. case taicpu(hp1).opcode of
  1521. A_ADD:
  1522. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1523. A_SUB:
  1524. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1525. end;
  1526. asml.remove(hp1);
  1527. hp1.free;
  1528. end
  1529. else
  1530. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1531. (((taicpu(hp1).opcode = A_ADD) and
  1532. (TmpRef.base = NR_NO)) or
  1533. (taicpu(hp1).opcode = A_INC) or
  1534. (taicpu(hp1).opcode = A_DEC)) then
  1535. begin
  1536. TmpBool1 := True;
  1537. TmpBool2 := True;
  1538. case taicpu(hp1).opcode of
  1539. A_ADD:
  1540. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1541. A_INC:
  1542. inc(TmpRef.offset);
  1543. A_DEC:
  1544. dec(TmpRef.offset);
  1545. end;
  1546. asml.remove(hp1);
  1547. hp1.free;
  1548. end;
  1549. end;
  1550. if TmpBool2 or
  1551. ((current_settings.optimizecputype < cpu_Pentium2) and
  1552. (taicpu(p).oper[0]^.val <= 3) and
  1553. not(cs_opt_size in current_settings.optimizerswitches)) then
  1554. begin
  1555. if not(TmpBool2) and
  1556. (taicpu(p).oper[0]^.val = 1) then
  1557. begin
  1558. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1559. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1560. end
  1561. else
  1562. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1563. taicpu(p).oper[1]^.reg);
  1564. InsertLLItem(asml,p.previous, p.next, hp1);
  1565. p.free;
  1566. p := hp1;
  1567. end;
  1568. end
  1569. else
  1570. if (current_settings.optimizecputype < cpu_Pentium2) and
  1571. (taicpu(p).oper[0]^.typ = top_const) and
  1572. (taicpu(p).oper[1]^.typ = top_reg) then
  1573. if (taicpu(p).oper[0]^.val = 1) then
  1574. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1575. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1576. (unlike shl, which is only Tairable in the U pipe)}
  1577. begin
  1578. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1579. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1580. InsertLLItem(asml,p.previous, p.next, hp1);
  1581. p.free;
  1582. p := hp1;
  1583. end
  1584. else if (taicpu(p).opsize = S_L) and
  1585. (taicpu(p).oper[0]^.val<= 3) then
  1586. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1587. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1588. begin
  1589. reference_reset(tmpref);
  1590. TmpRef.index := taicpu(p).oper[1]^.reg;
  1591. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1592. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1593. InsertLLItem(asml,p.previous, p.next, hp1);
  1594. p.free;
  1595. p := hp1;
  1596. end
  1597. end;
  1598. A_SETcc :
  1599. { changes
  1600. setcc (funcres) setcc reg
  1601. movb (funcres), reg to leave/ret
  1602. leave/ret }
  1603. begin
  1604. if (taicpu(p).oper[0]^.typ = top_ref) and
  1605. GetNextInstruction(p, hp1) and
  1606. GetNextInstruction(hp1, hp2) and
  1607. (hp2.typ = ait_instruction) and
  1608. ((taicpu(hp2).opcode = A_LEAVE) or
  1609. (taicpu(hp2).opcode = A_RET)) and
  1610. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1611. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1612. not(assigned(current_procinfo.procdef.funcretsym) and
  1613. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1614. (hp1.typ = ait_instruction) and
  1615. (taicpu(hp1).opcode = A_MOV) and
  1616. (taicpu(hp1).opsize = S_B) and
  1617. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1618. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1619. begin
  1620. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1621. asml.remove(hp1);
  1622. hp1.free;
  1623. end
  1624. end;
  1625. A_SUB:
  1626. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1627. { * change "sub/add const1, reg" or "dec reg" followed by
  1628. "sub const2, reg" to one "sub ..., reg" }
  1629. begin
  1630. if (taicpu(p).oper[0]^.typ = top_const) and
  1631. (taicpu(p).oper[1]^.typ = top_reg) then
  1632. if (taicpu(p).oper[0]^.val = 2) and
  1633. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1634. { Don't do the sub/push optimization if the sub }
  1635. { comes from setting up the stack frame (JM) }
  1636. (not getLastInstruction(p,hp1) or
  1637. (hp1.typ <> ait_instruction) or
  1638. (taicpu(hp1).opcode <> A_MOV) or
  1639. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1640. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1641. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1642. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1643. begin
  1644. hp1 := tai(p.next);
  1645. while Assigned(hp1) and
  1646. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1647. not regReadByInstruction(RS_ESP,hp1) and
  1648. not regModifiedByInstruction(RS_ESP,hp1) do
  1649. hp1 := tai(hp1.next);
  1650. if Assigned(hp1) and
  1651. (tai(hp1).typ = ait_instruction) and
  1652. (taicpu(hp1).opcode = A_PUSH) and
  1653. (taicpu(hp1).opsize = S_W) then
  1654. begin
  1655. taicpu(hp1).changeopsize(S_L);
  1656. if taicpu(hp1).oper[0]^.typ=top_reg then
  1657. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1658. hp1 := tai(p.next);
  1659. asml.remove(p);
  1660. p.free;
  1661. p := hp1;
  1662. continue
  1663. end;
  1664. if DoSubAddOpt(p) then
  1665. continue;
  1666. end
  1667. else if DoSubAddOpt(p) then
  1668. continue
  1669. end;
  1670. end;
  1671. end; { if is_jmp }
  1672. end;
  1673. end;
  1674. updateUsedRegs(UsedRegs,p);
  1675. p:=tai(p.next);
  1676. end;
  1677. end;
  1678. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1679. {$ifdef USECMOV}
  1680. function CanBeCMOV(p : tai) : boolean;
  1681. begin
  1682. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1683. (taicpu(p).opcode=A_MOV) and
  1684. (taicpu(p).opsize in [S_L,S_W]) and
  1685. ((taicpu(p).oper[0]^.typ = top_reg)
  1686. { we can't use cmov ref,reg because
  1687. ref could be nil and cmov still throws an exception
  1688. if ref=nil but the mov isn't done (FK)
  1689. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1690. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1691. }
  1692. ) and
  1693. (taicpu(p).oper[1]^.typ in [top_reg]);
  1694. end;
  1695. {$endif USECMOV}
  1696. var
  1697. p,hp1,hp2: tai;
  1698. {$ifdef USECMOV}
  1699. l : longint;
  1700. condition : tasmcond;
  1701. hp3: tai;
  1702. {$endif USECMOV}
  1703. UsedRegs, TmpUsedRegs: TRegSet;
  1704. begin
  1705. p := BlockStart;
  1706. UsedRegs := [];
  1707. while (p <> BlockEnd) Do
  1708. begin
  1709. UpdateUsedRegs(UsedRegs, tai(p.next));
  1710. case p.Typ Of
  1711. Ait_Instruction:
  1712. begin
  1713. case taicpu(p).opcode Of
  1714. {$ifdef USECMOV}
  1715. A_Jcc:
  1716. if (current_settings.cputype>=cpu_Pentium2) then
  1717. begin
  1718. { check for
  1719. jCC xxx
  1720. <several movs>
  1721. xxx:
  1722. }
  1723. l:=0;
  1724. GetNextInstruction(p, hp1);
  1725. while assigned(hp1) and
  1726. CanBeCMOV(hp1) and
  1727. { stop on labels }
  1728. not(hp1.typ=ait_label) do
  1729. begin
  1730. inc(l);
  1731. GetNextInstruction(hp1,hp1);
  1732. end;
  1733. if assigned(hp1) then
  1734. begin
  1735. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1736. begin
  1737. if (l<=4) and (l>0) then
  1738. begin
  1739. condition:=inverse_cond(taicpu(p).condition);
  1740. hp2:=p;
  1741. GetNextInstruction(p,hp1);
  1742. p:=hp1;
  1743. repeat
  1744. taicpu(hp1).opcode:=A_CMOVcc;
  1745. taicpu(hp1).condition:=condition;
  1746. GetNextInstruction(hp1,hp1);
  1747. until not(assigned(hp1)) or
  1748. not(CanBeCMOV(hp1));
  1749. { wait with removing else GetNextInstruction could
  1750. ignore the label if it was the only usage in the
  1751. jump moved away }
  1752. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1753. asml.remove(hp2);
  1754. hp2.free;
  1755. continue;
  1756. end;
  1757. end
  1758. else
  1759. begin
  1760. { check further for
  1761. jCC xxx
  1762. <several movs 1>
  1763. jmp yyy
  1764. xxx:
  1765. <several movs 2>
  1766. yyy:
  1767. }
  1768. { hp2 points to jmp yyy }
  1769. hp2:=hp1;
  1770. { skip hp1 to xxx }
  1771. GetNextInstruction(hp1, hp1);
  1772. if assigned(hp2) and
  1773. assigned(hp1) and
  1774. (l<=3) and
  1775. (hp2.typ=ait_instruction) and
  1776. (taicpu(hp2).is_jmp) and
  1777. (taicpu(hp2).condition=C_None) and
  1778. { real label and jump, no further references to the
  1779. label are allowed }
  1780. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1781. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1782. begin
  1783. l:=0;
  1784. { skip hp1 to <several moves 2> }
  1785. GetNextInstruction(hp1, hp1);
  1786. while assigned(hp1) and
  1787. CanBeCMOV(hp1) do
  1788. begin
  1789. inc(l);
  1790. GetNextInstruction(hp1, hp1);
  1791. end;
  1792. { hp1 points to yyy: }
  1793. if assigned(hp1) and
  1794. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1795. begin
  1796. condition:=inverse_cond(taicpu(p).condition);
  1797. GetNextInstruction(p,hp1);
  1798. hp3:=p;
  1799. p:=hp1;
  1800. repeat
  1801. taicpu(hp1).opcode:=A_CMOVcc;
  1802. taicpu(hp1).condition:=condition;
  1803. GetNextInstruction(hp1,hp1);
  1804. until not(assigned(hp1)) or
  1805. not(CanBeCMOV(hp1));
  1806. { hp2 is still at jmp yyy }
  1807. GetNextInstruction(hp2,hp1);
  1808. { hp2 is now at xxx: }
  1809. condition:=inverse_cond(condition);
  1810. GetNextInstruction(hp1,hp1);
  1811. { hp1 is now at <several movs 2> }
  1812. repeat
  1813. taicpu(hp1).opcode:=A_CMOVcc;
  1814. taicpu(hp1).condition:=condition;
  1815. GetNextInstruction(hp1,hp1);
  1816. until not(assigned(hp1)) or
  1817. not(CanBeCMOV(hp1));
  1818. {
  1819. asml.remove(hp1.next)
  1820. hp1.next.free;
  1821. asml.remove(hp1);
  1822. hp1.free;
  1823. }
  1824. { remove jCC }
  1825. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1826. asml.remove(hp3);
  1827. hp3.free;
  1828. { remove jmp }
  1829. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1830. asml.remove(hp2);
  1831. hp2.free;
  1832. continue;
  1833. end;
  1834. end;
  1835. end;
  1836. end;
  1837. end;
  1838. {$endif USECMOV}
  1839. A_FSTP,A_FISTP:
  1840. if doFpuLoadStoreOpt(asmL,p) then
  1841. continue;
  1842. A_IMUL:
  1843. begin
  1844. if (taicpu(p).ops >= 2) and
  1845. ((taicpu(p).oper[0]^.typ = top_const) or
  1846. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1847. (taicpu(p).oper[1]^.typ = top_reg) and
  1848. ((taicpu(p).ops = 2) or
  1849. ((taicpu(p).oper[2]^.typ = top_reg) and
  1850. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1851. getLastInstruction(p,hp1) and
  1852. (hp1.typ = ait_instruction) and
  1853. (taicpu(hp1).opcode = A_MOV) and
  1854. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1855. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1856. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1857. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1858. begin
  1859. taicpu(p).ops := 3;
  1860. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1861. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1862. asml.remove(hp1);
  1863. hp1.free;
  1864. end;
  1865. end;
  1866. A_MOV:
  1867. begin
  1868. if (taicpu(p).oper[0]^.typ = top_reg) and
  1869. (taicpu(p).oper[1]^.typ = top_reg) and
  1870. GetNextInstruction(p, hp1) and
  1871. (hp1.typ = ait_Instruction) and
  1872. ((taicpu(hp1).opcode = A_MOV) or
  1873. (taicpu(hp1).opcode = A_MOVZX) or
  1874. (taicpu(hp1).opcode = A_MOVSX)) and
  1875. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1876. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1877. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1878. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1879. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1880. {mov reg1, reg2
  1881. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1882. begin
  1883. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1884. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1885. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1886. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1887. asml.remove(p);
  1888. p.free;
  1889. p := hp1;
  1890. continue;
  1891. end
  1892. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1893. GetNextInstruction(p,hp1) and
  1894. (hp1.typ = ait_instruction) and
  1895. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1896. GetNextInstruction(hp1,hp2) and
  1897. (hp2.typ = ait_instruction) and
  1898. (taicpu(hp2).opcode = A_MOV) and
  1899. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1900. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1901. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1902. begin
  1903. TmpUsedRegs := UsedRegs;
  1904. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1905. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1906. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1907. hp2, TmpUsedRegs))) then
  1908. { change mov (ref), reg }
  1909. { add/sub/or/... reg2/$const, reg }
  1910. { mov reg, (ref) }
  1911. { # release reg }
  1912. { to add/sub/or/... reg2/$const, (ref) }
  1913. begin
  1914. case taicpu(hp1).opcode of
  1915. A_INC,A_DEC:
  1916. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  1917. else
  1918. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1919. end;
  1920. asml.remove(p);
  1921. asml.remove(hp2);
  1922. p.free;
  1923. hp2.free;
  1924. p := hp1
  1925. end;
  1926. end
  1927. end;
  1928. end;
  1929. end;
  1930. end;
  1931. p := tai(p.next)
  1932. end;
  1933. end;
  1934. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  1935. var
  1936. p,hp1,hp2: tai;
  1937. begin
  1938. p := BlockStart;
  1939. while (p <> BlockEnd) Do
  1940. begin
  1941. case p.Typ Of
  1942. Ait_Instruction:
  1943. begin
  1944. case taicpu(p).opcode Of
  1945. A_CALL:
  1946. if (current_settings.optimizecputype < cpu_Pentium2) and
  1947. GetNextInstruction(p, hp1) and
  1948. (hp1.typ = ait_instruction) and
  1949. (taicpu(hp1).opcode = A_JMP) and
  1950. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1951. begin
  1952. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1953. InsertLLItem(asml, p.previous, p, hp2);
  1954. taicpu(p).opcode := A_JMP;
  1955. taicpu(p).is_jmp := true;
  1956. asml.remove(hp1);
  1957. hp1.free;
  1958. end;
  1959. A_CMP:
  1960. begin
  1961. if (taicpu(p).oper[0]^.typ = top_const) and
  1962. (taicpu(p).oper[0]^.val = 0) and
  1963. (taicpu(p).oper[1]^.typ = top_reg) then
  1964. {change "cmp $0, %reg" to "test %reg, %reg"}
  1965. begin
  1966. taicpu(p).opcode := A_TEST;
  1967. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1968. continue;
  1969. end;
  1970. end;
  1971. (*
  1972. Optimization is not safe; xor clears the carry flag.
  1973. See test/tgadint64 in the test suite.
  1974. A_MOV:
  1975. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1976. (taicpu(p).oper[0]^.val = 0) and
  1977. (taicpu(p).oper[1]^.typ = Top_Reg) then
  1978. { change "mov $0, %reg" into "xor %reg, %reg" }
  1979. begin
  1980. taicpu(p).opcode := A_XOR;
  1981. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  1982. end;
  1983. *)
  1984. A_MOVZX:
  1985. { if register vars are on, it's possible there is code like }
  1986. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1987. { so we can't safely replace the movzx then with xor/mov, }
  1988. { since that would change the flags (JM) }
  1989. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1990. begin
  1991. if (taicpu(p).oper[1]^.typ = top_reg) then
  1992. if (taicpu(p).oper[0]^.typ = top_reg)
  1993. then
  1994. case taicpu(p).opsize of
  1995. S_BL:
  1996. begin
  1997. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  1998. not(cs_opt_size in current_settings.optimizerswitches) and
  1999. (current_settings.optimizecputype = cpu_Pentium) then
  2000. {Change "movzbl %reg1, %reg2" to
  2001. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2002. PentiumMMX}
  2003. begin
  2004. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2005. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2006. InsertLLItem(asml,p.previous, p, hp1);
  2007. taicpu(p).opcode := A_MOV;
  2008. taicpu(p).changeopsize(S_B);
  2009. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2010. end;
  2011. end;
  2012. end
  2013. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2014. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2015. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2016. not(cs_opt_size in current_settings.optimizerswitches) and
  2017. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2018. (current_settings.optimizecputype = cpu_Pentium) and
  2019. (taicpu(p).opsize = S_BL) then
  2020. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2021. Pentium and PentiumMMX}
  2022. begin
  2023. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2024. taicpu(p).oper[1]^.reg);
  2025. taicpu(p).opcode := A_MOV;
  2026. taicpu(p).changeopsize(S_B);
  2027. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2028. InsertLLItem(asml,p.previous, p, hp1);
  2029. end;
  2030. end;
  2031. A_TEST, A_OR:
  2032. {removes the line marked with (x) from the sequence
  2033. and/or/xor/add/sub/... $x, %y
  2034. test/or %y, %y (x)
  2035. j(n)z _Label
  2036. as the first instruction already adjusts the ZF}
  2037. begin
  2038. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2039. if GetLastInstruction(p, hp1) and
  2040. (tai(hp1).typ = ait_instruction) and
  2041. GetNextInstruction(p,hp2) and
  2042. (hp2.typ = ait_instruction) and
  2043. ((taicpu(hp2).opcode = A_SETcc) or
  2044. (taicpu(hp2).opcode = A_Jcc) or
  2045. (taicpu(hp2).opcode = A_CMOVcc)) then
  2046. case taicpu(hp1).opcode Of
  2047. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2048. begin
  2049. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2050. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2051. { and in case of carry for A(E)/B(E)/C/NC }
  2052. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2053. ((taicpu(hp1).opcode <> A_ADD) and
  2054. (taicpu(hp1).opcode <> A_SUB))) then
  2055. begin
  2056. hp1 := tai(p.next);
  2057. asml.remove(p);
  2058. p.free;
  2059. p := tai(hp1);
  2060. continue
  2061. end;
  2062. end;
  2063. A_DEC, A_INC, A_NEG:
  2064. begin
  2065. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2066. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2067. { and in case of carry for A(E)/B(E)/C/NC }
  2068. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2069. begin
  2070. case taicpu(hp1).opcode Of
  2071. A_DEC, A_INC:
  2072. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2073. begin
  2074. case taicpu(hp1).opcode Of
  2075. A_DEC: taicpu(hp1).opcode := A_SUB;
  2076. A_INC: taicpu(hp1).opcode := A_ADD;
  2077. end;
  2078. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2079. taicpu(hp1).loadConst(0,1);
  2080. taicpu(hp1).ops:=2;
  2081. end
  2082. end;
  2083. hp1 := tai(p.next);
  2084. asml.remove(p);
  2085. p.free;
  2086. p := tai(hp1);
  2087. continue
  2088. end;
  2089. end
  2090. end
  2091. end;
  2092. end;
  2093. end;
  2094. end;
  2095. p := tai(p.next)
  2096. end;
  2097. end;
  2098. end.