popt386.pas 101 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067
  1. {
  2. $Id$
  3. Copyright (c) 1998-2000 by Florian Klaempfl and Jonas Maebe
  4. This unit contains the peephole optimizer.
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit POpt386;
  19. {$i defines.inc}
  20. Interface
  21. Uses Aasm;
  22. Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  23. Procedure PeepHoleOptPass1(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  24. Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  25. Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  26. Implementation
  27. Uses
  28. globtype,systems,
  29. globals,hcodegen,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpubase,cpuasm,DAOpt386,tgeni386;
  34. Function RegUsedAfterInstruction(Reg: TRegister; p: Pai; Var UsedRegs: TRegSet): Boolean;
  35. Begin
  36. reg := reg32(reg);
  37. UpdateUsedRegs(UsedRegs, Pai(p^.Next));
  38. RegUsedAfterInstruction :=
  39. (Reg in UsedRegs) and
  40. (not(getNextInstruction(p,p)) or
  41. not(regLoadedWithNewValue(reg,false,p)));
  42. End;
  43. function doFpuLoadStoreOpt(asmL: paasmoutput; var p: pai): boolean;
  44. { returns true if a "continue" should be done after this optimization }
  45. var hp1, hp2: pai;
  46. begin
  47. doFpuLoadStoreOpt := false;
  48. if (paicpu(p)^.oper[0].typ = top_ref) and
  49. getNextInstruction(p, hp1) and
  50. (hp1^.typ = ait_instruction) and
  51. (((paicpu(hp1)^.opcode = A_FLD) and
  52. (paicpu(p)^.opcode = A_FSTP)) or
  53. ((paicpu(p)^.opcode = A_FISTP) and
  54. (paicpu(hp1)^.opcode = A_FILD))) and
  55. (paicpu(hp1)^.oper[0].typ = top_ref) and
  56. (paicpu(hp1)^.opsize = Paicpu(p)^.opsize) and
  57. refsEqual(paicpu(p)^.oper[0].ref^, paicpu(hp1)^.oper[0].ref^) then
  58. begin
  59. if getNextInstruction(hp1, hp2) and
  60. (hp2^.typ = ait_instruction) and
  61. ((paicpu(hp2)^.opcode = A_LEAVE) or
  62. (paicpu(hp2)^.opcode = A_RET)) and
  63. (paicpu(p)^.oper[0].ref^.Base = procinfo^.FramePointer) and
  64. (paicpu(p)^.oper[0].ref^.Offset >= procinfo^.Return_Offset) and
  65. (paicpu(p)^.oper[0].ref^.Index = R_NO) then
  66. begin
  67. asmL^.remove(p);
  68. asmL^.remove(hp1);
  69. dispose(p, done);
  70. dispose(hp1, done);
  71. p := hp2;
  72. removeLastDeallocForFuncRes(asmL, p);
  73. doFPULoadStoreOpt := true;
  74. end
  75. else
  76. { fst can't store an extended value! }
  77. if (paicpu(p)^.opsize <> S_FX) and
  78. (paicpu(p)^.opsize <> S_IQ) then
  79. begin
  80. if (paicpu(p)^.opcode = A_FSTP) then
  81. paicpu(p)^.opcode := A_FST
  82. else Paicpu(p)^.opcode := A_FIST;
  83. asmL^.remove(hp1);
  84. dispose(hp1, done)
  85. end
  86. end;
  87. end;
  88. Procedure PrePeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  89. var
  90. p,hp1: pai;
  91. l: longint;
  92. tmpRef: treference;
  93. Begin
  94. P := BlockStart;
  95. While (P <> BlockEnd) Do
  96. Begin
  97. Case P^.Typ Of
  98. Ait_Instruction:
  99. Begin
  100. Case Paicpu(p)^.opcode Of
  101. A_IMUL:
  102. {changes certain "imul const, %reg"'s to lea sequences}
  103. Begin
  104. If (Paicpu(p)^.oper[0].typ = Top_Const) And
  105. (Paicpu(p)^.oper[1].typ = Top_Reg) And
  106. (Paicpu(p)^.opsize = S_L) Then
  107. If (Paicpu(p)^.oper[0].val = 1) Then
  108. If (Paicpu(p)^.oper[2].typ = Top_None) Then
  109. {remove "imul $1, reg"}
  110. Begin
  111. hp1 := Pai(p^.Next);
  112. AsmL^.Remove(p);
  113. Dispose(p, Done);
  114. p := hp1;
  115. Continue;
  116. End
  117. Else
  118. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  119. Begin
  120. hp1 := New(Paicpu, Op_Reg_Reg(A_MOV, S_L, Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[2].reg));
  121. InsertLLItem(AsmL, p^.previous, p^.next, hp1);
  122. Dispose(p, Done);
  123. p := hp1;
  124. End
  125. Else If
  126. ((Paicpu(p)^.oper[2].typ = Top_Reg) or
  127. (Paicpu(p)^.oper[2].typ = Top_None)) And
  128. (aktoptprocessor < ClassP6) And
  129. (Paicpu(p)^.oper[0].val <= 12) And
  130. Not(CS_LittleSize in aktglobalswitches) And
  131. (Not(GetNextInstruction(p, hp1)) Or
  132. {GetNextInstruction(p, hp1) And}
  133. Not((Pai(hp1)^.typ = ait_instruction) And
  134. ((paicpu(hp1)^.opcode=A_Jcc) and
  135. (paicpu(hp1)^.condition in [C_O,C_NO]))))
  136. Then
  137. Begin
  138. Reset_reference(tmpref);
  139. Case Paicpu(p)^.oper[0].val Of
  140. 3: Begin
  141. {imul 3, reg1, reg2 to
  142. lea (reg1,reg1,2), reg2
  143. imul 3, reg1 to
  144. lea (reg1,reg1,2), reg1}
  145. TmpRef.base := Paicpu(p)^.oper[1].reg;
  146. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  147. TmpRef.ScaleFactor := 2;
  148. If (Paicpu(p)^.oper[2].typ = Top_None) Then
  149. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
  150. Else
  151. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
  152. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  153. Dispose(p, Done);
  154. p := hp1;
  155. End;
  156. 5: Begin
  157. {imul 5, reg1, reg2 to
  158. lea (reg1,reg1,4), reg2
  159. imul 5, reg1 to
  160. lea (reg1,reg1,4), reg1}
  161. TmpRef.base := Paicpu(p)^.oper[1].reg;
  162. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  163. TmpRef.ScaleFactor := 4;
  164. If (Paicpu(p)^.oper[2].typ = Top_None) Then
  165. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
  166. Else
  167. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
  168. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  169. Dispose(p, Done);
  170. p := hp1;
  171. End;
  172. 6: Begin
  173. {imul 6, reg1, reg2 to
  174. lea (,reg1,2), reg2
  175. lea (reg2,reg1,4), reg2
  176. imul 6, reg1 to
  177. lea (reg1,reg1,2), reg1
  178. add reg1, reg1}
  179. If (aktoptprocessor <= Class386)
  180. Then
  181. Begin
  182. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  183. If (Paicpu(p)^.oper[2].typ = Top_Reg)
  184. Then
  185. Begin
  186. TmpRef.base := Paicpu(p)^.oper[2].reg;
  187. TmpRef.ScaleFactor := 4;
  188. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  189. End
  190. Else
  191. Begin
  192. hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
  193. Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
  194. End;
  195. InsertLLItem(AsmL,p, p^.next, hp1);
  196. Reset_reference(tmpref);
  197. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  198. TmpRef.ScaleFactor := 2;
  199. If (Paicpu(p)^.oper[2].typ = Top_Reg)
  200. Then
  201. Begin
  202. TmpRef.base := R_NO;
  203. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
  204. Paicpu(p)^.oper[2].reg));
  205. End
  206. Else
  207. Begin
  208. TmpRef.base := Paicpu(p)^.oper[1].reg;
  209. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  210. End;
  211. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  212. Dispose(p, Done);
  213. p := Pai(hp1^.next);
  214. End
  215. End;
  216. 9: Begin
  217. {imul 9, reg1, reg2 to
  218. lea (reg1,reg1,8), reg2
  219. imul 9, reg1 to
  220. lea (reg1,reg1,8), reg1}
  221. TmpRef.base := Paicpu(p)^.oper[1].reg;
  222. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  223. TmpRef.ScaleFactor := 8;
  224. If (Paicpu(p)^.oper[2].typ = Top_None) Then
  225. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg))
  226. Else
  227. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
  228. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  229. Dispose(p, Done);
  230. p := hp1;
  231. End;
  232. 10: Begin
  233. {imul 10, reg1, reg2 to
  234. lea (reg1,reg1,4), reg2
  235. add reg2, reg2
  236. imul 10, reg1 to
  237. lea (reg1,reg1,4), reg1
  238. add reg1, reg1}
  239. If (aktoptprocessor <= Class386) Then
  240. Begin
  241. If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
  242. hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
  243. Paicpu(p)^.oper[2].reg,Paicpu(p)^.oper[2].reg))
  244. Else
  245. hp1 := New(Paicpu, op_reg_reg(A_ADD, S_L,
  246. Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[1].reg));
  247. InsertLLItem(AsmL,p, p^.next, hp1);
  248. TmpRef.base := Paicpu(p)^.oper[1].reg;
  249. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  250. TmpRef.ScaleFactor := 4;
  251. If (Paicpu(p)^.oper[2].typ = Top_Reg)
  252. Then
  253. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg))
  254. Else
  255. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  256. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  257. Dispose(p, Done);
  258. p := Pai(hp1^.next);
  259. End
  260. End;
  261. 12: Begin
  262. {imul 12, reg1, reg2 to
  263. lea (,reg1,4), reg2
  264. lea (,reg1,8) reg2
  265. imul 12, reg1 to
  266. lea (reg1,reg1,2), reg1
  267. lea (,reg1,4), reg1}
  268. If (aktoptprocessor <= Class386)
  269. Then
  270. Begin
  271. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  272. If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
  273. Begin
  274. TmpRef.base := Paicpu(p)^.oper[2].reg;
  275. TmpRef.ScaleFactor := 8;
  276. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
  277. End
  278. Else
  279. Begin
  280. TmpRef.base := R_NO;
  281. TmpRef.ScaleFactor := 4;
  282. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  283. End;
  284. InsertLLItem(AsmL,p, p^.next, hp1);
  285. Reset_reference(tmpref);
  286. TmpRef.Index := Paicpu(p)^.oper[1].reg;
  287. If (Paicpu(p)^.oper[2].typ = Top_Reg) Then
  288. Begin
  289. TmpRef.base := R_NO;
  290. TmpRef.ScaleFactor := 4;
  291. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[2].reg));
  292. End
  293. Else
  294. Begin
  295. TmpRef.base := Paicpu(p)^.oper[1].reg;
  296. TmpRef.ScaleFactor := 2;
  297. hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  298. End;
  299. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  300. Dispose(p, Done);
  301. p := Pai(hp1^.next);
  302. End
  303. End
  304. End;
  305. End;
  306. End;
  307. A_SAR, A_SHR:
  308. {changes the code sequence
  309. shr/sar const1, x
  310. shl const2, x
  311. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  312. Begin
  313. If GetNextInstruction(p, hp1) And
  314. (pai(hp1)^.typ = ait_instruction) and
  315. (Paicpu(hp1)^.opcode = A_SHL) and
  316. (Paicpu(p)^.oper[0].typ = top_const) and
  317. (Paicpu(hp1)^.oper[0].typ = top_const) and
  318. (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
  319. (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[1].typ) And
  320. OpsEqual(Paicpu(hp1)^.oper[1], Paicpu(p)^.oper[1])
  321. Then
  322. If (Paicpu(p)^.oper[0].val > Paicpu(hp1)^.oper[0].val) And
  323. Not(CS_LittleSize In aktglobalswitches)
  324. Then
  325. { shr/sar const1, %reg
  326. shl const2, %reg
  327. with const1 > const2 }
  328. Begin
  329. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
  330. Paicpu(hp1)^.opcode := A_AND;
  331. l := (1 shl (Paicpu(hp1)^.oper[0].val)) - 1;
  332. Case Paicpu(p)^.opsize Of
  333. S_L: Paicpu(hp1)^.LoadConst(0,l Xor longint(-1));
  334. S_B: Paicpu(hp1)^.LoadConst(0,l Xor $ff);
  335. S_W: Paicpu(hp1)^.LoadConst(0,l Xor $ffff);
  336. End;
  337. End
  338. Else
  339. If (Paicpu(p)^.oper[0].val<Paicpu(hp1)^.oper[0].val) And
  340. Not(CS_LittleSize In aktglobalswitches)
  341. Then
  342. { shr/sar const1, %reg
  343. shl const2, %reg
  344. with const1 < const2 }
  345. Begin
  346. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val-Paicpu(p)^.oper[0].val);
  347. Paicpu(p)^.opcode := A_AND;
  348. l := (1 shl (Paicpu(p)^.oper[0].val))-1;
  349. Case Paicpu(p)^.opsize Of
  350. S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
  351. S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
  352. S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
  353. End;
  354. End
  355. Else
  356. { shr/sar const1, %reg
  357. shl const2, %reg
  358. with const1 = const2 }
  359. if (Paicpu(p)^.oper[0].val = Paicpu(hp1)^.oper[0].val) then
  360. Begin
  361. Paicpu(p)^.opcode := A_AND;
  362. l := (1 shl (Paicpu(p)^.oper[0].val))-1;
  363. Case Paicpu(p)^.opsize Of
  364. S_B: Paicpu(p)^.LoadConst(0,l Xor $ff);
  365. S_W: Paicpu(p)^.LoadConst(0,l Xor $ffff);
  366. S_L: Paicpu(p)^.LoadConst(0,l Xor $ffffffff);
  367. End;
  368. AsmL^.remove(hp1);
  369. dispose(hp1, done);
  370. End;
  371. End;
  372. A_XOR:
  373. If (Paicpu(p)^.oper[0].typ = top_reg) And
  374. (Paicpu(p)^.oper[1].typ = top_reg) And
  375. (Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
  376. { temporarily change this to 'mov reg,0' to make it easier }
  377. { for the CSE. Will be changed back in pass 2 }
  378. begin
  379. paicpu(p)^.opcode := A_MOV;
  380. paicpu(p)^.loadconst(0,0);
  381. end;
  382. End;
  383. End;
  384. End;
  385. p := Pai(p^.next)
  386. End;
  387. End;
  388. Procedure PeepHoleOptPass1(Asml: PAasmOutput; BlockStart, BlockEnd: Pai);
  389. {First pass of peepholeoptimizations}
  390. Var
  391. l,l1 : longint;
  392. p,hp1,hp2 : pai;
  393. hp3,hp4: pai;
  394. TmpRef: TReference;
  395. UsedRegs, TmpUsedRegs: TRegSet;
  396. TmpBool1, TmpBool2: Boolean;
  397. Function SkipLabels(hp: Pai; var hp2: pai): boolean;
  398. {skips all labels and returns the next "real" instruction}
  399. Begin
  400. While assigned(hp^.next) and
  401. (pai(hp^.next)^.typ In SkipInstr + [ait_label,ait_align]) Do
  402. hp := pai(hp^.next);
  403. If assigned(hp^.next) Then
  404. Begin
  405. SkipLabels := True;
  406. hp2 := pai(hp^.next)
  407. End
  408. Else
  409. Begin
  410. hp2 := hp;
  411. SkipLabels := False
  412. End;
  413. End;
  414. Procedure GetFinalDestination(AsmL: PAAsmOutput; hp: paicpu);
  415. {traces sucessive jumps to their final destination and sets it, e.g.
  416. je l1 je l3
  417. <code> <code>
  418. l1: becomes l1:
  419. je l2 je l3
  420. <code> <code>
  421. l2: l2:
  422. jmp l3 jmp l3}
  423. Var p1, p2: pai;
  424. l: pasmlabel;
  425. Function FindAnyLabel(hp: pai; var l: pasmlabel): Boolean;
  426. Begin
  427. FindAnyLabel := false;
  428. While assigned(hp^.next) and
  429. (pai(hp^.next)^.typ In (SkipInstr+[ait_align])) Do
  430. hp := pai(hp^.next);
  431. If assigned(hp^.next) and
  432. (pai(hp^.next)^.typ = ait_label) Then
  433. Begin
  434. FindAnyLabel := true;
  435. l := pai_label(hp^.next)^.l;
  436. End
  437. End;
  438. Begin
  439. If (pasmlabel(hp^.oper[0].sym)^.labelnr >= LoLab) and
  440. (pasmlabel(hp^.oper[0].sym)^.labelnr <= HiLab) and {range check, a jump can go past an assembler block!}
  441. Assigned(LTable^[pasmlabel(hp^.oper[0].sym)^.labelnr-LoLab].PaiObj) Then
  442. Begin
  443. p1 := LTable^[pasmlabel(hp^.oper[0].sym)^.labelnr-LoLab].PaiObj; {the jump's destination}
  444. SkipLabels(p1,p1);
  445. If (pai(p1)^.typ = ait_instruction) and
  446. (paicpu(p1)^.is_jmp) Then
  447. If { the next instruction after the label where the jump hp arrives}
  448. { is unconditional or of the same type as hp, so continue }
  449. (paicpu(p1)^.condition in [C_None,hp^.condition]) or
  450. { the next instruction after the label where the jump hp arrives}
  451. { is the opposite of hp (so this one is never taken), but after }
  452. { that one there is a branch that will be taken, so perform a }
  453. { little hack: set p1 equal to this instruction (that's what the}
  454. { last SkipLabels is for, only works with short bool evaluation)}
  455. ((paicpu(p1)^.condition = inverse_cond[hp^.condition]) and
  456. SkipLabels(p1,p2) and
  457. (p2^.typ = ait_instruction) and
  458. (paicpu(p2)^.is_jmp) and
  459. (paicpu(p2)^.condition in [C_None,hp^.condition]) and
  460. SkipLabels(p1,p1)) Then
  461. Begin
  462. GetFinalDestination(asml, paicpu(p1));
  463. Dec(pasmlabel(hp^.oper[0].sym)^.refs);
  464. hp^.oper[0].sym:=paicpu(p1)^.oper[0].sym;
  465. inc(pasmlabel(hp^.oper[0].sym)^.refs);
  466. End
  467. Else
  468. If (paicpu(p1)^.condition = inverse_cond[hp^.condition]) then
  469. if not FindAnyLabel(p1,l) then
  470. begin
  471. {$ifdef finaldestdebug}
  472. insertllitem(asml,p1,p1^.next,new(pai_asm_comment,init(
  473. strpnew('previous label inserted'))));
  474. {$endif finaldestdebug}
  475. getlabel(l);
  476. insertllitem(asml,p1,p1^.next,new(pai_label,init(l)));
  477. dec(pasmlabel(paicpu(hp)^.oper[0].sym)^.refs);
  478. hp^.oper[0].sym := l;
  479. inc(l^.refs);
  480. { this won't work, since the new label isn't in the labeltable }
  481. { so it will fail the rangecheck. Labeltable should become a }
  482. { hashtable to support this: }
  483. { GetFinalDestination(asml, hp); }
  484. end
  485. else
  486. begin
  487. {$ifdef finaldestdebug}
  488. insertllitem(asml,p1,p1^.next,new(pai_asm_comment,init(
  489. strpnew('next label reused'))));
  490. {$endif finaldestdebug}
  491. inc(l^.refs);
  492. hp^.oper[0].sym := l;
  493. GetFinalDestination(asml, hp);
  494. end;
  495. End;
  496. End;
  497. Function DoSubAddOpt(var p: Pai): Boolean;
  498. Begin
  499. DoSubAddOpt := False;
  500. If GetLastInstruction(p, hp1) And
  501. (hp1^.typ = ait_instruction) And
  502. (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) then
  503. Case Paicpu(hp1)^.opcode Of
  504. A_DEC:
  505. If (Paicpu(hp1)^.oper[0].typ = top_reg) And
  506. (Paicpu(hp1)^.oper[0].reg = Paicpu(p)^.oper[1].reg) Then
  507. Begin
  508. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val+1);
  509. AsmL^.Remove(hp1);
  510. Dispose(hp1, Done)
  511. End;
  512. A_SUB:
  513. If (Paicpu(hp1)^.oper[0].typ = top_const) And
  514. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  515. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg) Then
  516. Begin
  517. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val+Paicpu(hp1)^.oper[0].val);
  518. AsmL^.Remove(hp1);
  519. Dispose(hp1, Done)
  520. End;
  521. A_ADD:
  522. If (Paicpu(hp1)^.oper[0].typ = top_const) And
  523. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  524. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg) Then
  525. Begin
  526. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val-Paicpu(hp1)^.oper[0].val);
  527. AsmL^.Remove(hp1);
  528. Dispose(hp1, Done);
  529. If (Paicpu(p)^.oper[0].val = 0) Then
  530. Begin
  531. hp1 := Pai(p^.next);
  532. AsmL^.Remove(p);
  533. Dispose(p, Done);
  534. If Not GetLastInstruction(hp1, p) Then
  535. p := hp1;
  536. DoSubAddOpt := True;
  537. End
  538. End;
  539. End;
  540. End;
  541. Begin
  542. P := BlockStart;
  543. UsedRegs := [];
  544. While (P <> BlockEnd) Do
  545. Begin
  546. UpDateUsedRegs(UsedRegs, Pai(p^.next));
  547. Case P^.Typ Of
  548. ait_instruction:
  549. Begin
  550. { Handle Jmp Optimizations }
  551. if Paicpu(p)^.is_jmp then
  552. begin
  553. {the following if-block removes all code between a jmp and the next label,
  554. because it can never be executed}
  555. If (paicpu(p)^.opcode = A_JMP) Then
  556. Begin
  557. While GetNextInstruction(p, hp1) and
  558. (hp1^.typ <> ait_label) do
  559. If not(hp1^.typ in ([ait_label,ait_align]+skipinstr)) Then
  560. Begin
  561. AsmL^.Remove(hp1);
  562. Dispose(hp1, done);
  563. End
  564. else break;
  565. End;
  566. { remove jumps to a label coming right after them }
  567. If GetNextInstruction(p, hp1) then
  568. Begin
  569. if FindLabel(pasmlabel(paicpu(p)^.oper[0].sym), hp1) then
  570. Begin
  571. hp2:=pai(hp1^.next);
  572. asml^.remove(p);
  573. dispose(p,done);
  574. p:=hp2;
  575. continue;
  576. end
  577. Else
  578. Begin
  579. if hp1^.typ = ait_label then
  580. SkipLabels(hp1,hp1);
  581. If (pai(hp1)^.typ=ait_instruction) and
  582. (paicpu(hp1)^.opcode=A_JMP) and
  583. GetNextInstruction(hp1, hp2) And
  584. FindLabel(PAsmLabel(paicpu(p)^.oper[0].sym), hp2)
  585. Then
  586. Begin
  587. if paicpu(p)^.opcode=A_Jcc then
  588. paicpu(p)^.condition:=inverse_cond[paicpu(p)^.condition]
  589. else
  590. begin
  591. If (LabDif <> 0) Then
  592. GetFinalDestination(asml, paicpu(p));
  593. p:=pai(p^.next);
  594. continue;
  595. end;
  596. Dec(pai_label(hp2)^.l^.refs);
  597. paicpu(p)^.oper[0].sym:=paicpu(hp1)^.oper[0].sym;
  598. Inc(paicpu(p)^.oper[0].sym^.refs);
  599. asml^.remove(hp1);
  600. dispose(hp1,done);
  601. If (LabDif <> 0) Then
  602. GetFinalDestination(asml, paicpu(p));
  603. end
  604. else
  605. If (LabDif <> 0) Then
  606. GetFinalDestination(asml, paicpu(p));
  607. end;
  608. end;
  609. end
  610. else
  611. { All other optimizes }
  612. begin
  613. For l := 0 to 2 Do
  614. If (Paicpu(p)^.oper[l].typ = top_ref) Then
  615. With Paicpu(p)^.oper[l].ref^ Do
  616. Begin
  617. If (base = R_NO) And
  618. (index <> R_NO) And
  619. (scalefactor in [0,1])
  620. Then
  621. Begin
  622. base := index;
  623. index := R_NO
  624. End
  625. End;
  626. Case Paicpu(p)^.opcode Of
  627. A_AND:
  628. Begin
  629. If (Paicpu(p)^.oper[0].typ = top_const) And
  630. (Paicpu(p)^.oper[1].typ = top_reg) And
  631. GetNextInstruction(p, hp1) And
  632. (Pai(hp1)^.typ = ait_instruction) And
  633. (Paicpu(hp1)^.opcode = A_AND) And
  634. (Paicpu(hp1)^.oper[0].typ = top_const) And
  635. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  636. (Paicpu(hp1)^.oper[1].reg = Paicpu(hp1)^.oper[1].reg)
  637. Then
  638. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  639. Begin
  640. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val And Paicpu(hp1)^.oper[0].val);
  641. AsmL^.Remove(hp1);
  642. Dispose(hp1, Done)
  643. End
  644. Else
  645. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  646. jump, but only if it's a conditional jump (PFV) }
  647. If (Paicpu(p)^.oper[1].typ = top_reg) And
  648. GetNextInstruction(p, hp1) And
  649. (hp1^.typ = ait_instruction) And
  650. (Paicpu(hp1)^.is_jmp) and
  651. (Paicpu(hp1)^.opcode<>A_JMP) and
  652. Not(Paicpu(p)^.oper[1].reg in UsedRegs) Then
  653. Paicpu(p)^.opcode := A_TEST;
  654. End;
  655. A_CMP:
  656. Begin
  657. If (Paicpu(p)^.oper[0].typ = top_const) And
  658. (Paicpu(p)^.oper[1].typ in [top_reg,top_ref]) And
  659. (Paicpu(p)^.oper[0].val = 0) Then
  660. If GetNextInstruction(p, hp1) And
  661. (hp1^.typ = ait_instruction) And
  662. (Paicpu(hp1)^.is_jmp) and
  663. (paicpu(hp1)^.opcode=A_Jcc) and
  664. (paicpu(hp1)^.condition in [C_LE,C_BE]) and
  665. GetNextInstruction(hp1,hp2) and
  666. (hp2^.typ = ait_instruction) and
  667. (Paicpu(hp2)^.opcode = A_DEC) And
  668. OpsEqual(Paicpu(hp2)^.oper[0],Paicpu(p)^.oper[1]) And
  669. GetNextInstruction(hp2, hp3) And
  670. (hp3^.typ = ait_instruction) and
  671. (Paicpu(hp3)^.is_jmp) and
  672. (Paicpu(hp3)^.opcode = A_JMP) And
  673. GetNextInstruction(hp3, hp4) And
  674. FindLabel(PAsmLabel(paicpu(hp1)^.oper[0].sym),hp4)
  675. Then
  676. Begin
  677. Paicpu(hp2)^.Opcode := A_SUB;
  678. Paicpu(hp2)^.Loadoper(1,Paicpu(hp2)^.oper[0]);
  679. Paicpu(hp2)^.LoadConst(0,1);
  680. Paicpu(hp2)^.ops:=2;
  681. Paicpu(hp3)^.Opcode := A_Jcc;
  682. Case paicpu(hp1)^.condition of
  683. C_LE: Paicpu(hp3)^.condition := C_GE;
  684. C_BE: Paicpu(hp3)^.condition := C_AE;
  685. End;
  686. AsmL^.Remove(p);
  687. AsmL^.Remove(hp1);
  688. Dispose(p, Done);
  689. Dispose(hp1, Done);
  690. p := hp2;
  691. continue;
  692. End
  693. Else
  694. {change "cmp $0, %reg" to "test %reg, %reg"}
  695. If (Paicpu(p)^.oper[1].typ = top_reg) Then
  696. Begin
  697. Paicpu(p)^.opcode := A_TEST;
  698. Paicpu(p)^.loadreg(0,Paicpu(p)^.oper[1].reg);
  699. End;
  700. End;
  701. A_FLD:
  702. Begin
  703. If (Paicpu(p)^.oper[0].typ = top_reg) And
  704. GetNextInstruction(p, hp1) And
  705. (hp1^.typ = Ait_Instruction) And
  706. (Paicpu(hp1)^.oper[0].typ = top_reg) And
  707. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  708. (Paicpu(hp1)^.oper[0].reg = R_ST) And
  709. (Paicpu(hp1)^.oper[1].reg = R_ST1) Then
  710. { change to
  711. fld reg fxxx reg,st
  712. fxxxp st, st1 (hp1)
  713. Remark: non commutative operations must be reversed!
  714. }
  715. begin
  716. Case Paicpu(hp1)^.opcode Of
  717. A_FMULP,A_FADDP,
  718. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  719. begin
  720. Case Paicpu(hp1)^.opcode Of
  721. A_FADDP: Paicpu(hp1)^.opcode := A_FADD;
  722. A_FMULP: Paicpu(hp1)^.opcode := A_FMUL;
  723. A_FSUBP: Paicpu(hp1)^.opcode := A_FSUBR;
  724. A_FSUBRP: Paicpu(hp1)^.opcode := A_FSUB;
  725. A_FDIVP: Paicpu(hp1)^.opcode := A_FDIVR;
  726. A_FDIVRP: Paicpu(hp1)^.opcode := A_FDIV;
  727. End;
  728. Paicpu(hp1)^.oper[0].reg := Paicpu(p)^.oper[0].reg;
  729. Paicpu(hp1)^.oper[1].reg := R_ST;
  730. AsmL^.Remove(p);
  731. Dispose(p, Done);
  732. p := hp1;
  733. Continue;
  734. end;
  735. end;
  736. end
  737. else
  738. If (Paicpu(p)^.oper[0].typ = top_ref) And
  739. GetNextInstruction(p, hp2) And
  740. (hp2^.typ = Ait_Instruction) And
  741. (Paicpu(hp2)^.oper[0].typ = top_reg) And
  742. (Paicpu(hp2)^.oper[1].typ = top_reg) And
  743. (Paicpu(p)^.opsize in [S_FS, S_FL]) And
  744. (Paicpu(hp2)^.oper[0].reg = R_ST) And
  745. (Paicpu(hp2)^.oper[1].reg = R_ST1) Then
  746. If GetLastInstruction(p, hp1) And
  747. (hp1^.typ = Ait_Instruction) And
  748. ((Paicpu(hp1)^.opcode = A_FLD) Or
  749. (Paicpu(hp1)^.opcode = A_FST)) And
  750. (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize) And
  751. (Paicpu(hp1)^.oper[0].typ = top_ref) And
  752. RefsEqual(Paicpu(p)^.oper[0].ref^, Paicpu(hp1)^.oper[0].ref^) Then
  753. If ((Paicpu(hp2)^.opcode = A_FMULP) Or
  754. (Paicpu(hp2)^.opcode = A_FADDP)) Then
  755. { change to
  756. fld/fst mem1 (hp1) fld/fst mem1
  757. fld mem1 (p) fadd/
  758. faddp/ fmul st, st
  759. fmulp st, st1 (hp2) }
  760. Begin
  761. AsmL^.Remove(p);
  762. Dispose(p, Done);
  763. p := hp1;
  764. If (Paicpu(hp2)^.opcode = A_FADDP) Then
  765. Paicpu(hp2)^.opcode := A_FADD
  766. Else
  767. Paicpu(hp2)^.opcode := A_FMUL;
  768. Paicpu(hp2)^.oper[1].reg := R_ST;
  769. End
  770. Else
  771. { change to
  772. fld/fst mem1 (hp1) fld/fst mem1
  773. fld mem1 (p) fld st}
  774. Begin
  775. Paicpu(p)^.changeopsize(S_FL);
  776. Paicpu(p)^.loadreg(0,R_ST);
  777. End
  778. Else
  779. Begin
  780. Case Paicpu(hp2)^.opcode Of
  781. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  782. { change to
  783. fld/fst mem1 (hp1) fld/fst mem1
  784. fld mem2 (p) fxxx mem2
  785. fxxxp st, st1 (hp2) }
  786. Begin
  787. Case Paicpu(hp2)^.opcode Of
  788. A_FADDP: Paicpu(p)^.opcode := A_FADD;
  789. A_FMULP: Paicpu(p)^.opcode := A_FMUL;
  790. A_FSUBP: Paicpu(p)^.opcode := A_FSUBR;
  791. A_FSUBRP: Paicpu(p)^.opcode := A_FSUB;
  792. A_FDIVP: Paicpu(p)^.opcode := A_FDIVR;
  793. A_FDIVRP: Paicpu(p)^.opcode := A_FDIV;
  794. End;
  795. AsmL^.Remove(hp2);
  796. Dispose(hp2, Done)
  797. End
  798. End
  799. End
  800. End;
  801. A_FSTP,A_FISTP:
  802. if doFpuLoadStoreOpt(asmL,p) then
  803. continue;
  804. A_LEA:
  805. Begin
  806. {removes seg register prefixes from LEA operations, as they
  807. don't do anything}
  808. Paicpu(p)^.oper[0].ref^.Segment := R_NO;
  809. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  810. If (Paicpu(p)^.oper[0].ref^.Base In [R_EAX..R_EDI]) And
  811. (Paicpu(p)^.oper[0].ref^.Index = R_NO) And
  812. (Not(Assigned(Paicpu(p)^.oper[0].ref^.Symbol))) Then
  813. If (Paicpu(p)^.oper[0].ref^.Base <> Paicpu(p)^.oper[1].reg)
  814. and (Paicpu(p)^.oper[0].ref^.Offset = 0)
  815. Then
  816. Begin
  817. hp1 := New(Paicpu, op_reg_reg(A_MOV, S_L,Paicpu(p)^.oper[0].ref^.Base,
  818. Paicpu(p)^.oper[1].reg));
  819. InsertLLItem(AsmL,p^.previous,p^.next, hp1);
  820. Dispose(p, Done);
  821. p := hp1;
  822. Continue;
  823. End
  824. Else
  825. if (Paicpu(p)^.oper[0].ref^.Offset = 0) then
  826. Begin
  827. hp1 := Pai(p^.Next);
  828. AsmL^.Remove(p);
  829. Dispose(p, Done);
  830. p := hp1;
  831. Continue;
  832. End
  833. else
  834. with Paicpu(p)^.oper[0].ref^ do
  835. if (Base = Paicpu(p)^.oper[1].reg) then
  836. begin
  837. l := offset+offsetfixup;
  838. case l of
  839. 1,-1:
  840. begin
  841. if l = 1 then
  842. paicpu(p)^.opcode := A_INC
  843. else paicpu(p)^.opcode := A_DEC;
  844. paicpu(p)^.loadreg(0,Paicpu(p)^.oper[1].reg);
  845. paicpu(p)^.ops := 1;
  846. end;
  847. else
  848. begin
  849. paicpu(p)^.opcode := A_ADD;
  850. paicpu(p)^.loadconst(0,offset+offsetfixup);
  851. end;
  852. end;
  853. end;
  854. End;
  855. A_MOV:
  856. Begin
  857. TmpUsedRegs := UsedRegs;
  858. If (Paicpu(p)^.oper[1].typ = top_reg) And
  859. (Paicpu(p)^.oper[1].reg In [R_EAX, R_EBX, R_EDX, R_EDI]) And
  860. GetNextInstruction(p, hp1) And
  861. (Pai(hp1)^.typ = ait_instruction) And
  862. (Paicpu(hp1)^.opcode = A_MOV) And
  863. (Paicpu(hp1)^.oper[0].typ = top_reg) And
  864. (Paicpu(hp1)^.oper[0].reg = Paicpu(p)^.oper[1].reg)
  865. Then
  866. {we have "mov x, %treg; mov %treg, y}
  867. If not(RegUsedAfterInstruction(Paicpu(p)^.oper[1].reg, hp1, TmpUsedRegs)) then
  868. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  869. Case Paicpu(p)^.oper[0].typ Of
  870. top_reg:
  871. Begin
  872. { change "mov %reg, %treg; mov %treg, y"
  873. to "mov %reg, y" }
  874. Paicpu(p)^.LoadOper(1,Paicpu(hp1)^.oper[1]);
  875. AsmL^.Remove(hp1);
  876. Dispose(hp1, Done);
  877. continue;
  878. End;
  879. top_ref:
  880. If (Paicpu(hp1)^.oper[1].typ = top_reg) Then
  881. Begin
  882. { change "mov mem, %treg; mov %treg, %reg"
  883. to "mov mem, %reg" }
  884. Paicpu(p)^.Loadoper(1,Paicpu(hp1)^.oper[1]);
  885. AsmL^.Remove(hp1);
  886. Dispose(hp1, Done);
  887. continue;
  888. End;
  889. End
  890. Else
  891. Else
  892. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  893. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  894. penalty}
  895. If (Paicpu(p)^.oper[0].typ = top_reg) And
  896. (Paicpu(p)^.oper[1].typ = top_reg) And
  897. GetNextInstruction(p,hp1) And
  898. (Pai(hp1)^.typ = ait_instruction) And
  899. (Paicpu(hp1)^.oper[0].typ = top_reg) And
  900. (Paicpu(hp1)^.oper[0].reg = Paicpu(p)^.oper[1].reg)
  901. Then
  902. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  903. Begin
  904. If ((Paicpu(hp1)^.opcode = A_OR) Or
  905. (Paicpu(hp1)^.opcode = A_TEST)) And
  906. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  907. (Paicpu(hp1)^.oper[0].reg = Paicpu(hp1)^.oper[1].reg)
  908. Then
  909. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  910. Begin
  911. TmpUsedRegs := UsedRegs;
  912. { reg1 will be used after the first instruction, }
  913. { so update the allocation info }
  914. allocRegBetween(asmL,paicpu(p)^.oper[0].reg,p,hp1);
  915. If GetNextInstruction(hp1, hp2) And
  916. (hp2^.typ = ait_instruction) And
  917. paicpu(hp2)^.is_jmp and
  918. Not(RegUsedAfterInstruction(Paicpu(hp1)^.oper[0].reg, hp1, TmpUsedRegs))
  919. Then
  920. {change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  921. "test %reg1, %reg1; jxx"}
  922. Begin
  923. Paicpu(hp1)^.Loadoper(0,Paicpu(p)^.oper[0]);
  924. Paicpu(hp1)^.Loadoper(1,Paicpu(p)^.oper[0]);
  925. AsmL^.Remove(p);
  926. Dispose(p, done);
  927. p := hp1;
  928. continue
  929. End
  930. Else
  931. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  932. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  933. Begin
  934. Paicpu(hp1)^.Loadoper(0,Paicpu(p)^.oper[0]);
  935. Paicpu(hp1)^.Loadoper(1,Paicpu(p)^.oper[0]);
  936. End;
  937. End
  938. { Else
  939. If (Paicpu(p^.next)^.opcode
  940. In [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  941. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  942. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  943. End
  944. Else
  945. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  946. x >= RetOffset) as it doesn't do anything (it writes either to a
  947. parameter or to the temporary storage room for the function
  948. result)}
  949. If GetNextInstruction(p, hp1) And
  950. (Pai(hp1)^.typ = ait_instruction)
  951. Then
  952. If ((Paicpu(hp1)^.opcode = A_LEAVE) Or
  953. (Paicpu(hp1)^.opcode = A_RET)) And
  954. (Paicpu(p)^.oper[1].typ = top_ref) And
  955. (Paicpu(p)^.oper[1].ref^.base = procinfo^.FramePointer) And
  956. (Paicpu(p)^.oper[1].ref^.offset >= procinfo^.Return_Offset) And
  957. (Paicpu(p)^.oper[1].ref^.index = R_NO) And
  958. (Paicpu(p)^.oper[0].typ = top_reg)
  959. Then
  960. Begin
  961. AsmL^.Remove(p);
  962. Dispose(p, done);
  963. p := hp1;
  964. RemoveLastDeallocForFuncRes(asmL,p);
  965. End
  966. Else
  967. If (Paicpu(p)^.oper[0].typ = top_reg) And
  968. (Paicpu(p)^.oper[1].typ = top_ref) And
  969. (Paicpu(p)^.opsize = Paicpu(hp1)^.opsize) And
  970. (Paicpu(hp1)^.opcode = A_CMP) And
  971. (Paicpu(hp1)^.oper[1].typ = top_ref) And
  972. RefsEqual(Paicpu(p)^.oper[1].ref^, Paicpu(hp1)^.oper[1].ref^) Then
  973. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  974. begin
  975. Paicpu(hp1)^.loadreg(1,Paicpu(p)^.oper[0].reg);
  976. allocRegBetween(asmL,paicpu(p)^.oper[0].reg,p,hp1);
  977. end;
  978. { Next instruction is also a MOV ? }
  979. If GetNextInstruction(p, hp1) And
  980. (pai(hp1)^.typ = ait_instruction) and
  981. (Paicpu(hp1)^.opcode = A_MOV) and
  982. (Paicpu(hp1)^.opsize = Paicpu(p)^.opsize)
  983. Then
  984. Begin
  985. If (Paicpu(hp1)^.oper[0].typ = Paicpu(p)^.oper[1].typ) and
  986. (Paicpu(hp1)^.oper[1].typ = Paicpu(p)^.oper[0].typ)
  987. Then
  988. {mov reg1, mem1 or mov mem1, reg1
  989. mov mem2, reg2 mov reg2, mem2}
  990. Begin
  991. If OpsEqual(Paicpu(hp1)^.oper[1],Paicpu(p)^.oper[0]) Then
  992. {mov reg1, mem1 or mov mem1, reg1
  993. mov mem2, reg1 mov reg2, mem1}
  994. Begin
  995. If OpsEqual(Paicpu(hp1)^.oper[0],Paicpu(p)^.oper[1]) Then
  996. { Removes the second statement from
  997. mov reg1, mem1/reg2
  998. mov mem1/reg2, reg1 }
  999. Begin
  1000. if (paicpu(p)^.oper[0].typ = top_reg) then
  1001. AllocRegBetween(asmL,paicpu(p)^.oper[0].reg,p,hp1);
  1002. AsmL^.remove(hp1);
  1003. Dispose(hp1,done);
  1004. End
  1005. Else
  1006. Begin
  1007. TmpUsedRegs := UsedRegs;
  1008. UpdateUsedRegs(TmpUsedRegs, Pai(hp1^.next));
  1009. If (Paicpu(p)^.oper[0].typ = top_reg) And
  1010. { mov reg1, mem1
  1011. mov mem2, reg1 }
  1012. GetNextInstruction(hp1, hp2) And
  1013. (hp2^.typ = ait_instruction) And
  1014. (Paicpu(hp2)^.opcode = A_CMP) And
  1015. (Paicpu(hp2)^.opsize = Paicpu(p)^.opsize) and
  1016. (Paicpu(hp2)^.oper[0].typ = TOp_Ref) And
  1017. (Paicpu(hp2)^.oper[1].typ = TOp_Reg) And
  1018. RefsEqual(Paicpu(hp2)^.oper[0].ref^, Paicpu(p)^.oper[1].ref^) And
  1019. (Paicpu(hp2)^.oper[1].reg = Paicpu(p)^.oper[0].reg) And
  1020. Not(RegUsedAfterInstruction(Paicpu(p)^.oper[0].reg, hp2, TmpUsedRegs)) Then
  1021. { change to
  1022. mov reg1, mem1 mov reg1, mem1
  1023. mov mem2, reg1 cmp reg1, mem2
  1024. cmp mem1, reg1 }
  1025. Begin
  1026. AsmL^.Remove(hp2);
  1027. Dispose(hp2, Done);
  1028. Paicpu(hp1)^.opcode := A_CMP;
  1029. Paicpu(hp1)^.loadref(1,newreference(Paicpu(hp1)^.oper[0].ref^));
  1030. Paicpu(hp1)^.loadreg(0,Paicpu(p)^.oper[0].reg);
  1031. End;
  1032. End;
  1033. End
  1034. Else
  1035. Begin
  1036. tmpUsedRegs := UsedRegs;
  1037. If GetNextInstruction(hp1, hp2) And
  1038. (Paicpu(p)^.oper[0].typ = top_ref) And
  1039. (Paicpu(p)^.oper[1].typ = top_reg) And
  1040. (Paicpu(hp1)^.oper[0].typ = top_reg) And
  1041. (Paicpu(hp1)^.oper[0].reg = Paicpu(p)^.oper[1].reg) And
  1042. (Paicpu(hp1)^.oper[1].typ = top_ref) And
  1043. (Pai(hp2)^.typ = ait_instruction) And
  1044. (Paicpu(hp2)^.opcode = A_MOV) And
  1045. (Paicpu(hp2)^.opsize = Paicpu(p)^.opsize) and
  1046. (Paicpu(hp2)^.oper[1].typ = top_reg) And
  1047. (Paicpu(hp2)^.oper[0].typ = top_ref) And
  1048. RefsEqual(Paicpu(hp2)^.oper[0].ref^, Paicpu(hp1)^.oper[1].ref^) Then
  1049. If not regInRef(Paicpu(hp2)^.oper[1].reg,Paicpu(hp2)^.oper[0].ref^) and
  1050. (Paicpu(p)^.oper[1].reg in [R_DI,R_EDI]) and
  1051. not(RegUsedAfterInstruction(R_EDI,hp1,tmpUsedRegs)) Then
  1052. { mov mem1, %edi
  1053. mov %edi, mem2
  1054. mov mem2, reg2
  1055. to:
  1056. mov mem1, reg2
  1057. mov reg2, mem2}
  1058. Begin
  1059. Paicpu(p)^.Loadoper(1,Paicpu(hp2)^.oper[1]);
  1060. Paicpu(hp1)^.loadoper(0,Paicpu(hp2)^.oper[1]);
  1061. AsmL^.Remove(hp2);
  1062. Dispose(hp2,Done);
  1063. End
  1064. Else
  1065. If (Paicpu(p)^.oper[1].reg <> Paicpu(hp2)^.oper[1].reg) And
  1066. not(RegInRef(Paicpu(p)^.oper[1].reg,Paicpu(p)^.oper[0].ref^)) And
  1067. not(RegInRef(Paicpu(hp2)^.oper[1].reg,Paicpu(hp2)^.oper[0].ref^))
  1068. Then
  1069. { mov mem1, reg1 mov mem1, reg1
  1070. mov reg1, mem2 mov reg1, mem2
  1071. mov mem2, reg2 mov mem2, reg1
  1072. to: to:
  1073. mov mem1, reg1 mov mem1, reg1
  1074. mov mem1, reg2 mov reg1, mem2
  1075. mov reg1, mem2
  1076. or (if mem1 depends on reg1
  1077. and/or if mem2 depends on reg2)
  1078. to:
  1079. mov mem1, reg1
  1080. mov reg1, mem2
  1081. mov reg1, reg2
  1082. }
  1083. Begin
  1084. Paicpu(hp1)^.LoadRef(0,newreference(Paicpu(p)^.oper[0].ref^));
  1085. Paicpu(hp1)^.LoadReg(1,Paicpu(hp2)^.oper[1].reg);
  1086. Paicpu(hp2)^.LoadRef(1,newreference(Paicpu(hp2)^.oper[0].ref^));
  1087. Paicpu(hp2)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
  1088. allocRegBetween(asmL,paicpu(p)^.oper[1].reg,p,hp2);
  1089. if (paicpu(p)^.oper[0].ref^.base in (usableregs+[R_EDI])) then
  1090. allocRegBetween(asmL,paicpu(p)^.oper[0].ref^.base,p,hp2);
  1091. if (paicpu(p)^.oper[0].ref^.index in (usableregs+[R_EDI])) then
  1092. allocRegBetween(asmL,paicpu(p)^.oper[0].ref^.index,p,hp2);
  1093. End
  1094. Else
  1095. If (Paicpu(hp1)^.Oper[0].reg <> Paicpu(hp2)^.Oper[1].reg) Then
  1096. begin
  1097. Paicpu(hp2)^.LoadReg(0,Paicpu(hp1)^.Oper[0].reg);
  1098. allocRegBetween(asmL,paicpu(p)^.oper[1].reg,p,hp2);
  1099. end
  1100. else
  1101. begin
  1102. asmL^.Remove(hp2);
  1103. dispose(hp2, done);
  1104. end
  1105. End;
  1106. End
  1107. Else
  1108. (* {movl [mem1],reg1
  1109. movl [mem1],reg2
  1110. to:
  1111. movl [mem1],reg1
  1112. movl reg1,reg2 }
  1113. If (Paicpu(p)^.oper[0].typ = top_ref) and
  1114. (Paicpu(p)^.oper[1].typ = top_reg) and
  1115. (Paicpu(hp1)^.oper[0].typ = top_ref) and
  1116. (Paicpu(hp1)^.oper[1].typ = top_reg) and
  1117. (Paicpu(p)^.opsize = Paicpu(hp1)^.opsize) and
  1118. RefsEqual(TReference(Paicpu(p)^.oper[0]^),Paicpu(hp1)^.oper[0]^.ref^) and
  1119. (Paicpu(p)^.oper[1].reg<>Paicpu(hp1)^.oper[0]^.ref^.base) and
  1120. (Paicpu(p)^.oper[1].reg<>Paicpu(hp1)^.oper[0]^.ref^.index) then
  1121. Paicpu(hp1)^.LoadReg(0,Paicpu(p)^.oper[1].reg)
  1122. Else*)
  1123. { movl const1,[mem1]
  1124. movl [mem1],reg1
  1125. to:
  1126. movl const1,reg1
  1127. movl reg1,[mem1] }
  1128. If (Paicpu(p)^.oper[0].typ = top_const) and
  1129. (Paicpu(p)^.oper[1].typ = top_ref) and
  1130. (Paicpu(hp1)^.oper[0].typ = top_ref) and
  1131. (Paicpu(hp1)^.oper[1].typ = top_reg) and
  1132. (Paicpu(p)^.opsize = Paicpu(hp1)^.opsize) and
  1133. RefsEqual(Paicpu(hp1)^.oper[0].ref^,Paicpu(p)^.oper[1].ref^) then
  1134. Begin
  1135. allocregbetween(asml,Paicpu(hp1)^.oper[1].reg,p,hp1);
  1136. { allocregbetween doesn't insert this because at }
  1137. { this time, no regalloc info is available in }
  1138. { the optinfo field, so do it manually (JM) }
  1139. hp2 := new(paiRegalloc,alloc(Paicpu(hp1)^.oper[1].reg));
  1140. insertllitem(asml,p^.previous,p,hp2);
  1141. Paicpu(hp1)^.LoadReg(0,Paicpu(hp1)^.oper[1].reg);
  1142. Paicpu(hp1)^.LoadRef(1,newreference(Paicpu(p)^.oper[1].ref^));
  1143. Paicpu(p)^.LoadReg(1,Paicpu(hp1)^.oper[0].reg);
  1144. End
  1145. End;
  1146. End;
  1147. A_MOVZX:
  1148. Begin
  1149. {removes superfluous And's after movzx's}
  1150. If (Paicpu(p)^.oper[1].typ = top_reg) And
  1151. GetNextInstruction(p, hp1) And
  1152. (Pai(hp1)^.typ = ait_instruction) And
  1153. (Paicpu(hp1)^.opcode = A_AND) And
  1154. (Paicpu(hp1)^.oper[0].typ = top_const) And
  1155. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  1156. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg)
  1157. Then
  1158. Case Paicpu(p)^.opsize Of
  1159. S_BL, S_BW:
  1160. If (Paicpu(hp1)^.oper[0].val = $ff) Then
  1161. Begin
  1162. AsmL^.Remove(hp1);
  1163. Dispose(hp1, Done);
  1164. End;
  1165. S_WL:
  1166. If (Paicpu(hp1)^.oper[0].val = $ffff) Then
  1167. Begin
  1168. AsmL^.Remove(hp1);
  1169. Dispose(hp1, Done);
  1170. End;
  1171. End;
  1172. {changes some movzx constructs to faster synonims (all examples
  1173. are given with eax/ax, but are also valid for other registers)}
  1174. If (Paicpu(p)^.oper[1].typ = top_reg) Then
  1175. If (Paicpu(p)^.oper[0].typ = top_reg) Then
  1176. Case Paicpu(p)^.opsize of
  1177. S_BW:
  1178. Begin
  1179. If (Paicpu(p)^.oper[0].reg = Reg16ToReg8(Paicpu(p)^.oper[1].reg)) And
  1180. Not(CS_LittleSize In aktglobalswitches)
  1181. Then
  1182. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1183. Begin
  1184. Paicpu(p)^.opcode := A_AND;
  1185. Paicpu(p)^.changeopsize(S_W);
  1186. Paicpu(p)^.LoadConst(0,$ff);
  1187. End
  1188. Else
  1189. If GetNextInstruction(p, hp1) And
  1190. (Pai(hp1)^.typ = ait_instruction) And
  1191. (Paicpu(hp1)^.opcode = A_AND) And
  1192. (Paicpu(hp1)^.oper[0].typ = top_const) And
  1193. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  1194. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg)
  1195. Then
  1196. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1197. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1198. Begin
  1199. Paicpu(p)^.opcode := A_MOV;
  1200. Paicpu(p)^.changeopsize(S_W);
  1201. Paicpu(p)^.LoadReg(0,Reg8ToReg16(Paicpu(p)^.oper[0].reg));
  1202. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ff);
  1203. End;
  1204. End;
  1205. S_BL:
  1206. Begin
  1207. If (Paicpu(p)^.oper[0].reg = Reg32ToReg8(Paicpu(p)^.oper[1].reg)) And
  1208. Not(CS_LittleSize in aktglobalswitches)
  1209. Then
  1210. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1211. Begin
  1212. Paicpu(p)^.opcode := A_AND;
  1213. Paicpu(p)^.changeopsize(S_L);
  1214. Paicpu(p)^.loadconst(0,$ff)
  1215. End
  1216. Else
  1217. If GetNextInstruction(p, hp1) And
  1218. (Pai(hp1)^.typ = ait_instruction) And
  1219. (Paicpu(hp1)^.opcode = A_AND) And
  1220. (Paicpu(hp1)^.oper[0].typ = top_const) And
  1221. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  1222. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg)
  1223. Then
  1224. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1225. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1226. Begin
  1227. Paicpu(p)^.opcode := A_MOV;
  1228. Paicpu(p)^.changeopsize(S_L);
  1229. Paicpu(p)^.LoadReg(0,Reg8ToReg32(Paicpu(p)^.oper[0].reg));
  1230. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ff);
  1231. End
  1232. End;
  1233. S_WL:
  1234. Begin
  1235. If (Paicpu(p)^.oper[0].reg = Reg32ToReg16(Paicpu(p)^.oper[1].reg)) And
  1236. Not(CS_LittleSize In aktglobalswitches)
  1237. Then
  1238. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1239. Begin
  1240. Paicpu(p)^.opcode := A_AND;
  1241. Paicpu(p)^.changeopsize(S_L);
  1242. Paicpu(p)^.LoadConst(0,$ffff);
  1243. End
  1244. Else
  1245. If GetNextInstruction(p, hp1) And
  1246. (Pai(hp1)^.typ = ait_instruction) And
  1247. (Paicpu(hp1)^.opcode = A_AND) And
  1248. (Paicpu(hp1)^.oper[0].typ = top_const) And
  1249. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  1250. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg)
  1251. Then
  1252. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1253. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1254. Begin
  1255. Paicpu(p)^.opcode := A_MOV;
  1256. Paicpu(p)^.changeopsize(S_L);
  1257. Paicpu(p)^.LoadReg(0,Reg16ToReg32(Paicpu(p)^.oper[0].reg));
  1258. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ffff);
  1259. End;
  1260. End;
  1261. End
  1262. Else
  1263. If (Paicpu(p)^.oper[0].typ = top_ref) Then
  1264. Begin
  1265. If GetNextInstruction(p, hp1) And
  1266. (Pai(hp1)^.typ = ait_instruction) And
  1267. (Paicpu(hp1)^.opcode = A_AND) And
  1268. (Paicpu(hp1)^.oper[0].typ = Top_Const) And
  1269. (Paicpu(hp1)^.oper[1].typ = Top_Reg) And
  1270. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg) Then
  1271. Begin
  1272. Paicpu(p)^.opcode := A_MOV;
  1273. Case Paicpu(p)^.opsize Of
  1274. S_BL:
  1275. Begin
  1276. Paicpu(p)^.changeopsize(S_L);
  1277. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ff);
  1278. End;
  1279. S_WL:
  1280. Begin
  1281. Paicpu(p)^.changeopsize(S_L);
  1282. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ffff);
  1283. End;
  1284. S_BW:
  1285. Begin
  1286. Paicpu(p)^.changeopsize(S_W);
  1287. Paicpu(hp1)^.LoadConst(0,Paicpu(hp1)^.oper[0].val And $ff);
  1288. End;
  1289. End;
  1290. End;
  1291. End;
  1292. End;
  1293. A_POP:
  1294. Begin
  1295. if target_info.target=target_i386_go32v2 then
  1296. begin
  1297. { Transform a series of pop/pop/pop/push/push/push to }
  1298. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1299. { because I'm not sure whether they can cope with }
  1300. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1301. { such a problem when using esp as frame pointer (JM) }
  1302. if (Paicpu(p)^.oper[0].typ = top_reg) then
  1303. begin
  1304. hp1 := p;
  1305. hp2 := p;
  1306. l := 0;
  1307. while getNextInstruction(hp1,hp1) and
  1308. (hp1^.typ = ait_instruction) and
  1309. (paicpu(hp1)^.opcode = A_POP) and
  1310. (paicpu(hp1)^.oper[0].typ = top_reg) do
  1311. begin
  1312. hp2 := hp1;
  1313. inc(l,4);
  1314. end;
  1315. getLastInstruction(p,hp3);
  1316. l1 := 0;
  1317. while (hp2 <> hp3) and
  1318. assigned(hp1) and
  1319. (hp1^.typ = ait_instruction) and
  1320. (paicpu(hp1)^.opcode = A_PUSH) and
  1321. (paicpu(hp1)^.oper[0].typ = top_reg) and
  1322. (paicpu(hp1)^.oper[0].reg = paicpu(hp2)^.oper[0].reg) do
  1323. begin
  1324. { change it to a two op operation }
  1325. paicpu(hp2)^.oper[1].typ:=top_none;
  1326. paicpu(hp2)^.ops:=2;
  1327. paicpu(hp2)^.opcode := A_MOV;
  1328. paicpu(hp2)^.Loadoper(1,paicpu(hp1)^.oper[0]);
  1329. reset_reference(tmpref);
  1330. tmpRef.base := stack_pointer;
  1331. tmpRef.offset := l;
  1332. paicpu(hp2)^.loadRef(0,newReference(tmpRef));
  1333. hp4 := hp1;
  1334. getNextInstruction(hp1,hp1);
  1335. asmL^.remove(hp4);
  1336. dispose(hp4,done);
  1337. getLastInstruction(hp2,hp2);
  1338. dec(l,4);
  1339. inc(l1);
  1340. end;
  1341. if l <> -4 then
  1342. begin
  1343. inc(l,4);
  1344. for l1 := l1 downto 1 do
  1345. begin
  1346. getNextInstruction(hp2,hp2);
  1347. dec(paicpu(hp2)^.oper[0].ref^.offset,l);
  1348. end
  1349. end
  1350. end
  1351. end
  1352. else
  1353. begin
  1354. if (Paicpu(p)^.oper[0].typ = top_reg) And
  1355. GetNextInstruction(p, hp1) And
  1356. (pai(hp1)^.typ=ait_instruction) and
  1357. (Paicpu(hp1)^.opcode=A_PUSH) and
  1358. (Paicpu(hp1)^.oper[0].typ = top_reg) And
  1359. (Paicpu(hp1)^.oper[0].reg=Paicpu(p)^.oper[0].reg) then
  1360. Begin
  1361. { change it to a two op operation }
  1362. Paicpu(p)^.oper[1].typ:=top_none;
  1363. Paicpu(p)^.ops:=2;
  1364. Paicpu(p)^.opcode := A_MOV;
  1365. Paicpu(p)^.Loadoper(1,Paicpu(p)^.oper[0]);
  1366. Reset_reference(tmpref);
  1367. TmpRef.base := R_ESP;
  1368. Paicpu(p)^.LoadRef(0,newReference(TmpRef));
  1369. AsmL^.Remove(hp1);
  1370. Dispose(hp1, Done)
  1371. End;
  1372. end;
  1373. end;
  1374. A_PUSH:
  1375. Begin
  1376. If (Paicpu(p)^.opsize = S_W) And
  1377. (Paicpu(p)^.oper[0].typ = Top_Const) And
  1378. GetNextInstruction(p, hp1) And
  1379. (Pai(hp1)^.typ = ait_instruction) And
  1380. (Paicpu(hp1)^.opcode = A_PUSH) And
  1381. (Paicpu(hp1)^.oper[0].typ = Top_Const) And
  1382. (Paicpu(hp1)^.opsize = S_W) Then
  1383. Begin
  1384. Paicpu(p)^.changeopsize(S_L);
  1385. Paicpu(p)^.LoadConst(0,Paicpu(p)^.oper[0].val shl 16 + word(Paicpu(hp1)^.oper[0].val));
  1386. AsmL^.Remove(hp1);
  1387. Dispose(hp1, Done)
  1388. End;
  1389. End;
  1390. A_SHL, A_SAL:
  1391. Begin
  1392. If (Paicpu(p)^.oper[0].typ = Top_Const) And
  1393. (Paicpu(p)^.oper[1].typ = Top_Reg) And
  1394. (Paicpu(p)^.opsize = S_L) And
  1395. (Paicpu(p)^.oper[0].val <= 3)
  1396. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1397. Then
  1398. Begin
  1399. TmpBool1 := True; {should we check the next instruction?}
  1400. TmpBool2 := False; {have we found an add/sub which could be
  1401. integrated in the lea?}
  1402. Reset_reference(tmpref);
  1403. TmpRef.index := Paicpu(p)^.oper[1].reg;
  1404. TmpRef.scalefactor := 1 shl Paicpu(p)^.oper[0].val;
  1405. While TmpBool1 And
  1406. GetNextInstruction(p, hp1) And
  1407. (Pai(hp1)^.typ = ait_instruction) And
  1408. ((Paicpu(hp1)^.opcode = A_ADD) Or
  1409. (Paicpu(hp1)^.opcode = A_SUB)) And
  1410. (Paicpu(hp1)^.oper[1].typ = Top_Reg) And
  1411. (Paicpu(hp1)^.oper[1].reg = Paicpu(p)^.oper[1].reg) Do
  1412. Begin
  1413. TmpBool1 := False;
  1414. If (Paicpu(hp1)^.oper[0].typ = Top_Const)
  1415. Then
  1416. Begin
  1417. TmpBool1 := True;
  1418. TmpBool2 := True;
  1419. If Paicpu(hp1)^.opcode = A_ADD Then
  1420. Inc(TmpRef.offset, Paicpu(hp1)^.oper[0].val)
  1421. Else
  1422. Dec(TmpRef.offset, Paicpu(hp1)^.oper[0].val);
  1423. AsmL^.Remove(hp1);
  1424. Dispose(hp1, Done);
  1425. End
  1426. Else
  1427. If (Paicpu(hp1)^.oper[0].typ = Top_Reg) And
  1428. (Paicpu(hp1)^.opcode = A_ADD) And
  1429. (TmpRef.base = R_NO) Then
  1430. Begin
  1431. TmpBool1 := True;
  1432. TmpBool2 := True;
  1433. TmpRef.base := Paicpu(hp1)^.oper[0].reg;
  1434. AsmL^.Remove(hp1);
  1435. Dispose(hp1, Done);
  1436. End;
  1437. End;
  1438. If TmpBool2 Or
  1439. ((aktoptprocessor < ClassP6) And
  1440. (Paicpu(p)^.oper[0].val <= 3) And
  1441. Not(CS_LittleSize in aktglobalswitches))
  1442. Then
  1443. Begin
  1444. If Not(TmpBool2) And
  1445. (Paicpu(p)^.oper[0].val = 1)
  1446. Then
  1447. Begin
  1448. hp1 := new(Paicpu,op_reg_reg(A_ADD,Paicpu(p)^.opsize,
  1449. Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg))
  1450. End
  1451. Else hp1 := New(Paicpu, op_ref_reg(A_LEA, S_L, newReference(TmpRef),
  1452. Paicpu(p)^.oper[1].reg));
  1453. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  1454. Dispose(p, Done);
  1455. p := hp1;
  1456. End;
  1457. End
  1458. Else
  1459. If (aktoptprocessor < ClassP6) And
  1460. (Paicpu(p)^.oper[0].typ = top_const) And
  1461. (Paicpu(p)^.oper[1].typ = top_reg) Then
  1462. If (Paicpu(p)^.oper[0].val = 1)
  1463. Then
  1464. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1465. but faster on a 486, and pairable in both U and V pipes on the Pentium
  1466. (unlike shl, which is only pairable in the U pipe)}
  1467. Begin
  1468. hp1 := new(Paicpu,op_reg_reg(A_ADD,Paicpu(p)^.opsize,
  1469. Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
  1470. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  1471. Dispose(p, done);
  1472. p := hp1;
  1473. End
  1474. Else If (Paicpu(p)^.opsize = S_L) and
  1475. (Paicpu(p)^.oper[0].val<= 3) Then
  1476. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1477. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1478. Begin
  1479. Reset_reference(tmpref);
  1480. TmpRef.index := Paicpu(p)^.oper[1].reg;
  1481. TmpRef.scalefactor := 1 shl Paicpu(p)^.oper[0].val;
  1482. hp1 := new(Paicpu,op_ref_reg(A_LEA,S_L,newReference(TmpRef), Paicpu(p)^.oper[1].reg));
  1483. InsertLLItem(AsmL,p^.previous, p^.next, hp1);
  1484. Dispose(p, done);
  1485. p := hp1;
  1486. End
  1487. End;
  1488. A_SETcc :
  1489. { changes
  1490. setcc (funcres) setcc reg
  1491. movb (funcres), reg to leave/ret
  1492. leave/ret }
  1493. Begin
  1494. If (Paicpu(p)^.oper[0].typ = top_ref) And
  1495. GetNextInstruction(p, hp1) And
  1496. GetNextInstruction(hp1, hp2) And
  1497. (hp2^.typ = ait_instruction) And
  1498. ((Paicpu(hp2)^.opcode = A_LEAVE) or
  1499. (Paicpu(hp2)^.opcode = A_RET)) And
  1500. (Paicpu(p)^.oper[0].ref^.Base = procinfo^.FramePointer) And
  1501. (Paicpu(p)^.oper[0].ref^.Index = R_NO) And
  1502. (Paicpu(p)^.oper[0].ref^.Offset >= procinfo^.Return_Offset) And
  1503. (hp1^.typ = ait_instruction) And
  1504. (Paicpu(hp1)^.opcode = A_MOV) And
  1505. (Paicpu(hp1)^.opsize = S_B) And
  1506. (Paicpu(hp1)^.oper[0].typ = top_ref) And
  1507. RefsEqual(Paicpu(hp1)^.oper[0].ref^, Paicpu(p)^.oper[0].ref^) Then
  1508. Begin
  1509. Paicpu(p)^.LoadReg(0,Paicpu(hp1)^.oper[1].reg);
  1510. AsmL^.Remove(hp1);
  1511. Dispose(hp1, Done)
  1512. End
  1513. End;
  1514. A_SUB:
  1515. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1516. { * change "sub/add const1, reg" or "dec reg" followed by
  1517. "sub const2, reg" to one "sub ..., reg" }
  1518. Begin
  1519. If (Paicpu(p)^.oper[0].typ = top_const) And
  1520. (Paicpu(p)^.oper[1].typ = top_reg) Then
  1521. If (Paicpu(p)^.oper[0].val = 2) And
  1522. (Paicpu(p)^.oper[1].reg = R_ESP) and
  1523. { Don't do the sub/push optimization if the sub }
  1524. { comes from setting up the stack frame (JM) }
  1525. (not getLastInstruction(p,hp1) or
  1526. (hp1^.typ <> ait_instruction) or
  1527. (paicpu(hp1)^.opcode <> A_MOV) or
  1528. (paicpu(hp1)^.oper[0].typ <> top_reg) or
  1529. (paicpu(hp1)^.oper[0].reg <> R_ESP) or
  1530. (paicpu(hp1)^.oper[1].typ <> top_reg) or
  1531. (paicpu(hp1)^.oper[1].reg <> R_EBP)) then
  1532. Begin
  1533. hp1 := Pai(p^.next);
  1534. While Assigned(hp1) And
  1535. (Pai(hp1)^.typ In [ait_instruction]+SkipInstr) And
  1536. not regReadByInstruction(R_ESP,hp1) and
  1537. not regModifiedByInstruction(R_ESP,hp1) do
  1538. hp1 := Pai(hp1^.next);
  1539. If Assigned(hp1) And
  1540. (Pai(hp1)^.typ = ait_instruction) And
  1541. (Paicpu(hp1)^.opcode = A_PUSH) And
  1542. (Paicpu(hp1)^.opsize = S_W)
  1543. Then
  1544. Begin
  1545. Paicpu(hp1)^.changeopsize(S_L);
  1546. if Paicpu(hp1)^.oper[0].typ=top_reg then
  1547. Paicpu(hp1)^.LoadReg(0,Reg16ToReg32(Paicpu(hp1)^.oper[0].reg));
  1548. hp1 := Pai(p^.next);
  1549. AsmL^.Remove(p);
  1550. Dispose(p, Done);
  1551. p := hp1;
  1552. Continue
  1553. End;
  1554. If DoSubAddOpt(p) Then continue;
  1555. End
  1556. Else If DoSubAddOpt(p) Then Continue
  1557. End;
  1558. A_XOR:
  1559. If (Paicpu(p)^.oper[0].typ = top_reg) And
  1560. (Paicpu(p)^.oper[1].typ = top_reg) And
  1561. (Paicpu(p)^.oper[0].reg = Paicpu(p)^.oper[1].reg) then
  1562. { temporarily change this to 'mov reg,0' to make it easier }
  1563. { for the CSE. Will be changed back in pass 2 }
  1564. begin
  1565. paicpu(p)^.opcode := A_MOV;
  1566. paicpu(p)^.loadconst(0,0);
  1567. end;
  1568. End;
  1569. end; { if is_jmp }
  1570. End;
  1571. { ait_label:
  1572. Begin
  1573. If labelCanBeSkipped(pai_label(p))
  1574. Then
  1575. Begin
  1576. hp1 := Pai(p^.next);
  1577. AsmL^.Remove(p);
  1578. Dispose(p, Done);
  1579. p := hp1;
  1580. Continue
  1581. End;
  1582. End;}
  1583. End;
  1584. p:=pai(p^.next);
  1585. end;
  1586. end;
  1587. function isFoldableArithOp(hp1: paicpu; reg: tregister): boolean;
  1588. begin
  1589. IsFoldableArithOp := False;
  1590. case hp1^.opcode of
  1591. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1592. isFoldableArithOp :=
  1593. (paicpu(hp1)^.oper[0].typ in [top_reg,top_const]) and
  1594. (paicpu(hp1)^.oper[1].typ = top_reg) and
  1595. (paicpu(hp1)^.oper[1].reg = reg);
  1596. A_INC,A_DEC:
  1597. isFoldableArithOp :=
  1598. (paicpu(hp1)^.oper[0].typ = top_reg) and
  1599. (paicpu(hp1)^.oper[0].reg = reg);
  1600. end;
  1601. end;
  1602. Procedure PeepHoleOptPass2(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  1603. {$ifdef USECMOV}
  1604. function CanBeCMOV(p : pai) : boolean;
  1605. begin
  1606. CanBeCMOV:=assigned(p) and (p^.typ=ait_instruction) and
  1607. (paicpu(p)^.opcode=A_MOV) and
  1608. (paicpu(p)^.opsize in [S_L,S_W]) and
  1609. (paicpu(p)^.oper[0].typ in [top_reg,top_ref]) and
  1610. (paicpu(p)^.oper[1].typ in [top_reg,top_ref]);
  1611. end;
  1612. {$endif USECMOV}
  1613. var
  1614. p,hp1,hp2: pai;
  1615. {$ifdef USECMOV}
  1616. l : longint;
  1617. condition : tasmcond;
  1618. hp3: pai;
  1619. {$endif USECMOV}
  1620. UsedRegs, TmpUsedRegs: TRegSet;
  1621. Begin
  1622. P := BlockStart;
  1623. UsedRegs := [];
  1624. While (P <> BlockEnd) Do
  1625. Begin
  1626. UpdateUsedRegs(UsedRegs, Pai(p^.next));
  1627. Case P^.Typ Of
  1628. Ait_Instruction:
  1629. Begin
  1630. Case Paicpu(p)^.opcode Of
  1631. {$ifdef USECMOV}
  1632. A_Jcc:
  1633. if (aktspecificoptprocessor=ClassP6) then
  1634. begin
  1635. { check for
  1636. jCC xxx
  1637. <several movs>
  1638. xxx:
  1639. }
  1640. l:=0;
  1641. GetNextInstruction(p, hp1);
  1642. while assigned(hp1) And
  1643. CanBeCMOV(hp1) do
  1644. begin
  1645. inc(l);
  1646. GetNextInstruction(hp1,hp1);
  1647. end;
  1648. if assigned(hp1) then
  1649. begin
  1650. if FindLabel(PAsmLabel(paicpu(p)^.oper[0].sym),hp1) then
  1651. begin
  1652. if (l<=4) and (l>0) then
  1653. begin
  1654. condition:=inverse_cond[paicpu(p)^.condition];
  1655. GetNextInstruction(p,hp1);
  1656. asml^.remove(p);
  1657. dispose(p,done);
  1658. p:=hp1;
  1659. repeat
  1660. paicpu(hp1)^.opcode:=A_CMOVcc;
  1661. paicpu(hp1)^.condition:=condition;
  1662. GetNextInstruction(hp1,hp1);
  1663. until not(assigned(hp1)) or
  1664. not(CanBeCMOV(hp1));
  1665. asml^.remove(hp1);
  1666. dispose(hp1,done);
  1667. continue;
  1668. end;
  1669. end
  1670. else
  1671. begin
  1672. { check further for
  1673. jCC xxx
  1674. <several movs>
  1675. jmp yyy
  1676. xxx:
  1677. <several movs>
  1678. yyy:
  1679. }
  1680. { hp2 points to jmp xxx }
  1681. hp2:=hp1;
  1682. { skip hp1 to xxx }
  1683. GetNextInstruction(hp1, hp1);
  1684. if assigned(hp2) and
  1685. assigned(hp1) and
  1686. (l<=3) and
  1687. (hp2^.typ=ait_instruction) and
  1688. (paicpu(hp2)^.is_jmp) and
  1689. (paicpu(hp2)^.condition=C_None) and
  1690. FindLabel(PAsmLabel(paicpu(p)^.oper[0].sym),hp1) then
  1691. begin
  1692. l:=0;
  1693. while assigned(hp1) And
  1694. CanBeCMOV(hp1) do
  1695. begin
  1696. inc(l);
  1697. GetNextInstruction(hp1, hp1);
  1698. end;
  1699. end;
  1700. {
  1701. if assigned(hp1) and
  1702. FindLabel(PAsmLabel(paicpu(hp2)^.oper[0].sym),hp1) then
  1703. begin
  1704. condition:=inverse_cond[paicpu(p)^.condition];
  1705. GetNextInstruction(p,hp1);
  1706. asml^.remove(p);
  1707. dispose(p,done);
  1708. p:=hp1;
  1709. repeat
  1710. paicpu(hp1)^.opcode:=A_CMOVcc;
  1711. paicpu(hp1)^.condition:=condition;
  1712. GetNextInstruction(hp1,hp1);
  1713. until not(assigned(hp1)) or
  1714. not(CanBeCMOV(hp1));
  1715. hp2:=hp1^.next;
  1716. condition:=inverse_cond[condition];
  1717. asml^.remove(hp1^.next)
  1718. dispose(hp1^.next,done);
  1719. asml^.remove(hp1);
  1720. dispose(hp1,done);
  1721. continue;
  1722. end;
  1723. }
  1724. end;
  1725. end;
  1726. end;
  1727. {$endif USECMOV}
  1728. A_FSTP,A_FISTP:
  1729. if doFpuLoadStoreOpt(asmL,p) then
  1730. continue;
  1731. A_IMUL:
  1732. begin
  1733. if ((paicpu(p)^.oper[0].typ = top_const) or
  1734. (paicpu(p)^.oper[0].typ = top_symbol)) and
  1735. (paicpu(p)^.oper[1].typ = top_reg) and
  1736. ((paicpu(p)^.oper[2].typ = top_none) or
  1737. ((paicpu(p)^.oper[2].typ = top_reg) and
  1738. (paicpu(p)^.oper[2].reg = paicpu(p)^.oper[1].reg))) and
  1739. getLastInstruction(p,hp1) and
  1740. (hp1^.typ = ait_instruction) and
  1741. (paicpu(hp1)^.opcode = A_MOV) and
  1742. (paicpu(hp1)^.oper[0].typ = top_reg) and
  1743. (paicpu(hp1)^.oper[1].typ = top_reg) and
  1744. (paicpu(hp1)^.oper[1].reg = paicpu(p)^.oper[1].reg) then
  1745. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1746. begin
  1747. paicpu(p)^.ops := 3;
  1748. paicpu(p)^.loadreg(1,paicpu(hp1)^.oper[0].reg);
  1749. paicpu(p)^.loadreg(2,paicpu(hp1)^.oper[1].reg);
  1750. asmL^.remove(hp1);
  1751. dispose(hp1,done);
  1752. end;
  1753. end;
  1754. A_MOV:
  1755. Begin
  1756. If (Paicpu(p)^.oper[0].typ = top_reg) And
  1757. (Paicpu(p)^.oper[1].typ = top_reg) And
  1758. GetNextInstruction(p, hp1) And
  1759. (hp1^.typ = ait_Instruction) And
  1760. ((Paicpu(hp1)^.opcode = A_MOV) or
  1761. (Paicpu(hp1)^.opcode = A_MOVZX) or
  1762. (Paicpu(hp1)^.opcode = A_MOVSX)) And
  1763. (Paicpu(hp1)^.oper[0].typ = top_ref) And
  1764. (Paicpu(hp1)^.oper[1].typ = top_reg) And
  1765. ((Paicpu(hp1)^.oper[0].ref^.Base = Paicpu(p)^.oper[1].reg) Or
  1766. (Paicpu(hp1)^.oper[0].ref^.Index = Paicpu(p)^.oper[1].reg)) And
  1767. (Reg32(Paicpu(hp1)^.oper[1].reg) = Paicpu(p)^.oper[1].reg) Then
  1768. {mov reg1, reg2
  1769. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1770. Begin
  1771. If (Paicpu(hp1)^.oper[0].ref^.Base = Paicpu(p)^.oper[1].reg) Then
  1772. Paicpu(hp1)^.oper[0].ref^.Base := Paicpu(p)^.oper[0].reg;
  1773. If (Paicpu(hp1)^.oper[0].ref^.Index = Paicpu(p)^.oper[1].reg) Then
  1774. Paicpu(hp1)^.oper[0].ref^.Index := Paicpu(p)^.oper[0].reg;
  1775. AsmL^.Remove(p);
  1776. Dispose(p, Done);
  1777. p := hp1;
  1778. Continue;
  1779. End
  1780. Else If (Paicpu(p)^.oper[0].typ = top_ref) And
  1781. GetNextInstruction(p,hp1) And
  1782. (hp1^.typ = ait_instruction) And
  1783. IsFoldableArithOp(paicpu(hp1),Paicpu(p)^.oper[1].reg) And
  1784. GetNextInstruction(hp1,hp2) And
  1785. (hp2^.typ = ait_instruction) And
  1786. (Paicpu(hp2)^.opcode = A_MOV) And
  1787. (Paicpu(hp2)^.oper[0].typ = top_reg) And
  1788. (Paicpu(hp2)^.oper[0].reg = Paicpu(p)^.oper[1].reg) And
  1789. (Paicpu(hp2)^.oper[1].typ = top_ref) Then
  1790. Begin
  1791. TmpUsedRegs := UsedRegs;
  1792. UpdateUsedRegs(TmpUsedRegs,Pai(hp1^.next));
  1793. If (RefsEqual(Paicpu(hp2)^.oper[1].ref^, Paicpu(p)^.oper[0].ref^) And
  1794. Not(RegUsedAfterInstruction(Paicpu(p)^.oper[1].reg,
  1795. hp2, TmpUsedRegs)))
  1796. Then
  1797. { change mov (ref), reg }
  1798. { add/sub/or/... reg2/$const, reg }
  1799. { mov reg, (ref) }
  1800. { # release reg }
  1801. { to add/sub/or/... reg2/$const, (ref) }
  1802. Begin
  1803. case paicpu(hp1)^.opcode of
  1804. A_INC,A_DEC:
  1805. paicpu(hp1)^.LoadRef(0,newreference(Paicpu(p)^.oper[0].ref^))
  1806. else
  1807. paicpu(hp1)^.LoadRef(1,newreference(Paicpu(p)^.oper[0].ref^));
  1808. end;
  1809. AsmL^.Remove(p);
  1810. AsmL^.Remove(hp2);
  1811. Dispose(p,done);
  1812. Dispose(hp2,Done);
  1813. p := hp1
  1814. End;
  1815. End
  1816. End;
  1817. A_TEST, A_OR:
  1818. {removes the line marked with (x) from the sequence
  1819. And/or/xor/add/sub/... $x, %y
  1820. test/or %y, %y (x)
  1821. j(n)z _Label
  1822. as the first instruction already adjusts the ZF}
  1823. Begin
  1824. If OpsEqual(Paicpu(p)^.oper[0],Paicpu(p)^.oper[1]) Then
  1825. If GetLastInstruction(p, hp1) And
  1826. (pai(hp1)^.typ = ait_instruction) Then
  1827. Case Paicpu(hp1)^.opcode Of
  1828. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  1829. Begin
  1830. If OpsEqual(Paicpu(hp1)^.oper[1],Paicpu(p)^.oper[0]) Then
  1831. Begin
  1832. hp1 := pai(p^.next);
  1833. asml^.remove(p);
  1834. dispose(p, done);
  1835. p := pai(hp1);
  1836. continue
  1837. End;
  1838. End;
  1839. A_DEC, A_INC, A_NEG:
  1840. Begin
  1841. If OpsEqual(Paicpu(hp1)^.oper[0],Paicpu(p)^.oper[0]) Then
  1842. Begin
  1843. Case Paicpu(hp1)^.opcode Of
  1844. A_DEC, A_INC:
  1845. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1846. Begin
  1847. Case Paicpu(hp1)^.opcode Of
  1848. A_DEC: Paicpu(hp1)^.opcode := A_SUB;
  1849. A_INC: Paicpu(hp1)^.opcode := A_ADD;
  1850. End;
  1851. Paicpu(hp1)^.Loadoper(1,Paicpu(hp1)^.oper[0]);
  1852. Paicpu(hp1)^.LoadConst(0,1);
  1853. Paicpu(hp1)^.ops:=2;
  1854. End
  1855. End;
  1856. hp1 := pai(p^.next);
  1857. asml^.remove(p);
  1858. dispose(p, done);
  1859. p := pai(hp1);
  1860. continue
  1861. End;
  1862. End
  1863. End
  1864. End;
  1865. End;
  1866. End;
  1867. End;
  1868. p := Pai(p^.next)
  1869. End;
  1870. End;
  1871. Procedure PostPeepHoleOpts(AsmL: PAasmOutput; BlockStart, BlockEnd: Pai);
  1872. var
  1873. p,hp1,hp2: pai;
  1874. Begin
  1875. P := BlockStart;
  1876. While (P <> BlockEnd) Do
  1877. Begin
  1878. Case P^.Typ Of
  1879. Ait_Instruction:
  1880. Begin
  1881. Case Paicpu(p)^.opcode Of
  1882. A_CALL:
  1883. If (AktOptProcessor < ClassP6) And
  1884. GetNextInstruction(p, hp1) And
  1885. (hp1^.typ = ait_instruction) And
  1886. (paicpu(hp1)^.opcode = A_JMP) Then
  1887. Begin
  1888. Inc(paicpu(hp1)^.oper[0].sym^.refs);
  1889. hp2 := New(Paicpu,op_sym(A_PUSH,S_L,paicpu(hp1)^.oper[0].sym));
  1890. InsertLLItem(AsmL, p^.previous, p, hp2);
  1891. Paicpu(p)^.opcode := A_JMP;
  1892. AsmL^.Remove(hp1);
  1893. Dispose(hp1, Done)
  1894. End;
  1895. A_MOV:
  1896. if (Paicpu(p)^.oper[0].typ = Top_Const) And
  1897. (Paicpu(p)^.oper[0].val = 0) And
  1898. (Paicpu(p)^.oper[1].typ = Top_Reg) Then
  1899. { change "mov $0, %reg" into "xor %reg, %reg" }
  1900. Begin
  1901. Paicpu(p)^.opcode := A_XOR;
  1902. Paicpu(p)^.LoadReg(0,Paicpu(p)^.oper[1].reg);
  1903. End;
  1904. A_MOVZX:
  1905. Begin
  1906. If (Paicpu(p)^.oper[1].typ = top_reg) Then
  1907. If (Paicpu(p)^.oper[0].typ = top_reg)
  1908. Then
  1909. Case Paicpu(p)^.opsize of
  1910. S_BL:
  1911. Begin
  1912. If IsGP32Reg(Paicpu(p)^.oper[1].reg) And
  1913. Not(CS_LittleSize in aktglobalswitches) And
  1914. (aktoptprocessor = ClassP5)
  1915. Then
  1916. {Change "movzbl %reg1, %reg2" to
  1917. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1918. PentiumMMX}
  1919. Begin
  1920. hp1 := New(Paicpu, op_reg_reg(A_XOR, S_L,
  1921. Paicpu(p)^.oper[1].reg, Paicpu(p)^.oper[1].reg));
  1922. InsertLLItem(AsmL,p^.previous, p, hp1);
  1923. Paicpu(p)^.opcode := A_MOV;
  1924. Paicpu(p)^.changeopsize(S_B);
  1925. Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
  1926. End;
  1927. End;
  1928. End
  1929. Else
  1930. If (Paicpu(p)^.oper[0].typ = top_ref) And
  1931. (Paicpu(p)^.oper[0].ref^.base <> Paicpu(p)^.oper[1].reg) And
  1932. (Paicpu(p)^.oper[0].ref^.index <> Paicpu(p)^.oper[1].reg) And
  1933. Not(CS_LittleSize in aktglobalswitches) And
  1934. IsGP32Reg(Paicpu(p)^.oper[1].reg) And
  1935. (aktoptprocessor = ClassP5) And
  1936. (Paicpu(p)^.opsize = S_BL)
  1937. Then
  1938. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1939. Pentium and PentiumMMX}
  1940. Begin
  1941. hp1 := New(Paicpu,op_reg_reg(A_XOR, S_L, Paicpu(p)^.oper[1].reg,
  1942. Paicpu(p)^.oper[1].reg));
  1943. Paicpu(p)^.opcode := A_MOV;
  1944. Paicpu(p)^.changeopsize(S_B);
  1945. Paicpu(p)^.LoadReg(1,Reg32ToReg8(Paicpu(p)^.oper[1].reg));
  1946. InsertLLItem(AsmL,p^.previous, p, hp1);
  1947. End;
  1948. End;
  1949. End;
  1950. End;
  1951. End;
  1952. p := Pai(p^.next)
  1953. End;
  1954. End;
  1955. End.
  1956. {
  1957. $Log$
  1958. Revision 1.2 2000-10-24 10:40:54 jonas
  1959. + register renaming ("fixes" bug1088)
  1960. * changed command line options meanings for optimizer:
  1961. O2 now means peepholopts, CSE and register renaming in 1 pass
  1962. O3 is the same, but repeated until no further optimizations are
  1963. possible or until 5 passes have been done (to avoid endless loops)
  1964. * changed aopt386 so it does this looping
  1965. * added some procedures from csopt386 to the interface because they're
  1966. used by rropt386 as well
  1967. * some changes to csopt386 and daopt386 so that newly added instructions
  1968. by the CSE get optimizer info (they were simply skipped previously),
  1969. this fixes some bugs
  1970. Revision 1.1 2000/10/15 09:47:43 peter
  1971. * moved to i386/
  1972. Revision 1.13 2000/10/02 13:01:29 jonas
  1973. * fixed bug regarding removal of "test/or reg,reg": apparently, shr/shl
  1974. doesn't set the zero flag according to the contents of the register
  1975. after the shift :( (mergfed from fixes branch)
  1976. Revision 1.12 2000/09/24 15:06:23 peter
  1977. * use defines.inc
  1978. Revision 1.11 2000/09/18 11:28:36 jonas
  1979. * fixed web bug 1133 (merged from fixes branch)
  1980. Revision 1.10 2000/08/18 10:09:13 jonas
  1981. * fix for web bug1099 (merged from fixes branch)
  1982. Revision 1.9 2000/08/05 13:33:08 peter
  1983. * $ifdef go32v2 -> target_info.target=go32v2
  1984. Revision 1.8 2000/08/05 10:35:51 jonas
  1985. * readded l1 variable (between ifdef go32v2 to avoid hints/notes)
  1986. Revision 1.7 2000/08/04 22:00:52 peter
  1987. * merges from fixes
  1988. Revision 1.6 2000/07/31 08:44:05 jonas
  1989. - removed imul support from -dfoldarithops since "imull [reg32],[mem32]"
  1990. doesn't exist (merged from fixes branch)
  1991. Revision 1.5 2000/07/28 13:56:23 jonas
  1992. * fixed bug in shr/shl optimization when -Og is used (merged from fixes
  1993. branch)
  1994. Revision 1.4 2000/07/21 15:19:55 jonas
  1995. * daopt386: changes to getnextinstruction/getlastinstruction so they
  1996. ignore labels who have is_addr set
  1997. + daopt386/csopt386: remove loads of registers which are overwritten
  1998. before their contents are used (especially usefull for removing superfluous
  1999. maybe_loadesi outputs and push/pops transformed by below optimization
  2000. + popt386: transform pop/pop/pop/.../push/push/push to sequences of
  2001. 'movl x(%esp),%reg' (only active when compiling a go32v2 compiler
  2002. currently because I don't know whether it's safe to do this under Win32/
  2003. Linux (because of problems we had when using esp as frame pointer on
  2004. those os'es)
  2005. Revision 1.3 2000/07/14 05:11:49 michael
  2006. + Patch to 1.1
  2007. Revision 1.2 2000/07/13 11:32:45 michael
  2008. + removed logs
  2009. }