popt386.pas 106 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235
  1. {
  2. $Id$
  3. Copyright (c) 1998-2000 by Florian Klaempfl and Jonas Maebe
  4. This unit contains the peephole optimizer.
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. Unit POpt386;
  19. {$i fpcdefs.inc}
  20. Interface
  21. Uses Aasm;
  22. Procedure PrePeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  23. Procedure PeepHoleOptPass1(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  24. Procedure PeepHoleOptPass2(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  25. Procedure PostPeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  26. Implementation
  27. Uses
  28. globtype,systems,
  29. globals,cgbase,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. tainst,cpuinfo,cpubase,cpuasm,DAOpt386,cginfo,rgobj;
  34. Function RegUsedAfterInstruction(Reg: TRegister; p: Tai; Var UsedRegs: TRegSet): Boolean;
  35. Begin
  36. reg := reg32(reg);
  37. UpdateUsedRegs(UsedRegs, Tai(p.Next));
  38. RegUsedAfterInstruction :=
  39. (Reg in UsedRegs) and
  40. (not(getNextInstruction(p,p)) or
  41. not(regLoadedWithNewValue(reg,false,p)));
  42. End;
  43. function doFpuLoadStoreOpt(asmL: TAAsmoutput; var p: Tai): boolean;
  44. { returns true if a "continue" should be done after this optimization }
  45. var hp1, hp2: Tai;
  46. begin
  47. doFpuLoadStoreOpt := false;
  48. if (Taicpu(p).oper[0].typ = top_ref) and
  49. getNextInstruction(p, hp1) and
  50. (hp1.typ = ait_instruction) and
  51. (((Taicpu(hp1).opcode = A_FLD) and
  52. (Taicpu(p).opcode = A_FSTP)) or
  53. ((Taicpu(p).opcode = A_FISTP) and
  54. (Taicpu(hp1).opcode = A_FILD))) and
  55. (Taicpu(hp1).oper[0].typ = top_ref) and
  56. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  57. refsEqual(Taicpu(p).oper[0].ref^, Taicpu(hp1).oper[0].ref^) then
  58. begin
  59. if getNextInstruction(hp1, hp2) and
  60. (hp2.typ = ait_instruction) and
  61. ((Taicpu(hp2).opcode = A_LEAVE) or
  62. (Taicpu(hp2).opcode = A_RET)) and
  63. (Taicpu(p).oper[0].ref^.Base = procinfo^.FramePointer) and
  64. (Taicpu(p).oper[0].ref^.Offset >= procinfo^.Return_Offset) and
  65. (Taicpu(p).oper[0].ref^.Index = R_NO) then
  66. begin
  67. asml.remove(p);
  68. asml.remove(hp1);
  69. p.free;
  70. hp1.free;
  71. p := hp2;
  72. removeLastDeallocForFuncRes(asmL, p);
  73. doFPULoadStoreOpt := true;
  74. end
  75. else
  76. { fst can't store an extended value! }
  77. if (Taicpu(p).opsize <> S_FX) and
  78. (Taicpu(p).opsize <> S_IQ) then
  79. begin
  80. if (Taicpu(p).opcode = A_FSTP) then
  81. Taicpu(p).opcode := A_FST
  82. else Taicpu(p).opcode := A_FIST;
  83. asml.remove(hp1);
  84. hp1.free;
  85. end
  86. end;
  87. end;
  88. Procedure PrePeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  89. var
  90. p,hp1: Tai;
  91. l: Aword;
  92. tmpRef: treference;
  93. Begin
  94. P := BlockStart;
  95. While (P <> BlockEnd) Do
  96. Begin
  97. Case p.Typ Of
  98. Ait_Instruction:
  99. Begin
  100. Case Taicpu(p).opcode Of
  101. A_IMUL:
  102. {changes certain "imul const, %reg"'s to lea sequences}
  103. Begin
  104. If (Taicpu(p).oper[0].typ = Top_Const) And
  105. (Taicpu(p).oper[1].typ = Top_Reg) And
  106. (Taicpu(p).opsize = S_L) Then
  107. If (Taicpu(p).oper[0].val = 1) Then
  108. If (Taicpu(p).oper[2].typ = Top_None) Then
  109. {remove "imul $1, reg"}
  110. Begin
  111. hp1 := Tai(p.Next);
  112. asml.Remove(p);
  113. p.free;
  114. p := hp1;
  115. Continue;
  116. End
  117. Else
  118. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  119. Begin
  120. hp1 := Taicpu.Op_Reg_Reg(A_MOV, S_L, Taicpu(p).oper[1].reg,Taicpu(p).oper[2].reg);
  121. InsertLLItem(AsmL, p.previous, p.next, hp1);
  122. p.free;
  123. p := hp1;
  124. End
  125. Else If
  126. ((Taicpu(p).oper[2].typ = Top_Reg) or
  127. (Taicpu(p).oper[2].typ = Top_None)) And
  128. (aktoptprocessor < ClassP6) And
  129. (Taicpu(p).oper[0].val <= 12) And
  130. Not(CS_LittleSize in aktglobalswitches) And
  131. (Not(GetNextInstruction(p, hp1)) Or
  132. {GetNextInstruction(p, hp1) And}
  133. Not((Tai(hp1).typ = ait_instruction) And
  134. ((Taicpu(hp1).opcode=A_Jcc) and
  135. (Taicpu(hp1).condition in [C_O,C_NO]))))
  136. Then
  137. Begin
  138. reference_reset(tmpref);
  139. Case Taicpu(p).oper[0].val Of
  140. 3: Begin
  141. {imul 3, reg1, reg2 to
  142. lea (reg1,reg1,2), reg2
  143. imul 3, reg1 to
  144. lea (reg1,reg1,2), reg1}
  145. TmpRef.base := Taicpu(p).oper[1].reg;
  146. TmpRef.Index := Taicpu(p).oper[1].reg;
  147. TmpRef.ScaleFactor := 2;
  148. If (Taicpu(p).oper[2].typ = Top_None) Then
  149. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg)
  150. Else
  151. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg);
  152. InsertLLItem(AsmL,p.previous, p.next, hp1);
  153. p.free;
  154. p := hp1;
  155. End;
  156. 5: Begin
  157. {imul 5, reg1, reg2 to
  158. lea (reg1,reg1,4), reg2
  159. imul 5, reg1 to
  160. lea (reg1,reg1,4), reg1}
  161. TmpRef.base := Taicpu(p).oper[1].reg;
  162. TmpRef.Index := Taicpu(p).oper[1].reg;
  163. TmpRef.ScaleFactor := 4;
  164. If (Taicpu(p).oper[2].typ = Top_None) Then
  165. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg)
  166. Else
  167. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg);
  168. InsertLLItem(AsmL,p.previous, p.next, hp1);
  169. p.free;
  170. p := hp1;
  171. End;
  172. 6: Begin
  173. {imul 6, reg1, reg2 to
  174. lea (,reg1,2), reg2
  175. lea (reg2,reg1,4), reg2
  176. imul 6, reg1 to
  177. lea (reg1,reg1,2), reg1
  178. add reg1, reg1}
  179. If (aktoptprocessor <= Class386)
  180. Then
  181. Begin
  182. TmpRef.Index := Taicpu(p).oper[1].reg;
  183. If (Taicpu(p).oper[2].typ = Top_Reg)
  184. Then
  185. Begin
  186. TmpRef.base := Taicpu(p).oper[2].reg;
  187. TmpRef.ScaleFactor := 4;
  188. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg);
  189. End
  190. Else
  191. Begin
  192. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  193. Taicpu(p).oper[1].reg,Taicpu(p).oper[1].reg);
  194. End;
  195. InsertLLItem(AsmL,p, p.next, hp1);
  196. reference_reset(tmpref);
  197. TmpRef.Index := Taicpu(p).oper[1].reg;
  198. TmpRef.ScaleFactor := 2;
  199. If (Taicpu(p).oper[2].typ = Top_Reg)
  200. Then
  201. Begin
  202. TmpRef.base := R_NO;
  203. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  204. Taicpu(p).oper[2].reg);
  205. End
  206. Else
  207. Begin
  208. TmpRef.base := Taicpu(p).oper[1].reg;
  209. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg);
  210. End;
  211. InsertLLItem(AsmL,p.previous, p.next, hp1);
  212. p.free;
  213. p := Tai(hp1.next);
  214. End
  215. End;
  216. 9: Begin
  217. {imul 9, reg1, reg2 to
  218. lea (reg1,reg1,8), reg2
  219. imul 9, reg1 to
  220. lea (reg1,reg1,8), reg1}
  221. TmpRef.base := Taicpu(p).oper[1].reg;
  222. TmpRef.Index := Taicpu(p).oper[1].reg;
  223. TmpRef.ScaleFactor := 8;
  224. If (Taicpu(p).oper[2].typ = Top_None) Then
  225. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg)
  226. Else
  227. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg);
  228. InsertLLItem(AsmL,p.previous, p.next, hp1);
  229. p.free;
  230. p := hp1;
  231. End;
  232. 10: Begin
  233. {imul 10, reg1, reg2 to
  234. lea (reg1,reg1,4), reg2
  235. add reg2, reg2
  236. imul 10, reg1 to
  237. lea (reg1,reg1,4), reg1
  238. add reg1, reg1}
  239. If (aktoptprocessor <= Class386) Then
  240. Begin
  241. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  242. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  243. Taicpu(p).oper[2].reg,Taicpu(p).oper[2].reg)
  244. Else
  245. hp1 := Taicpu.op_reg_reg(A_ADD, S_L,
  246. Taicpu(p).oper[1].reg,Taicpu(p).oper[1].reg);
  247. InsertLLItem(AsmL,p, p.next, hp1);
  248. TmpRef.base := Taicpu(p).oper[1].reg;
  249. TmpRef.Index := Taicpu(p).oper[1].reg;
  250. TmpRef.ScaleFactor := 4;
  251. If (Taicpu(p).oper[2].typ = Top_Reg)
  252. Then
  253. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg)
  254. Else
  255. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg);
  256. InsertLLItem(AsmL,p.previous, p.next, hp1);
  257. p.free;
  258. p := Tai(hp1.next);
  259. End
  260. End;
  261. 12: Begin
  262. {imul 12, reg1, reg2 to
  263. lea (,reg1,4), reg2
  264. lea (,reg1,8) reg2
  265. imul 12, reg1 to
  266. lea (reg1,reg1,2), reg1
  267. lea (,reg1,4), reg1}
  268. If (aktoptprocessor <= Class386)
  269. Then
  270. Begin
  271. TmpRef.Index := Taicpu(p).oper[1].reg;
  272. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  273. Begin
  274. TmpRef.base := Taicpu(p).oper[2].reg;
  275. TmpRef.ScaleFactor := 8;
  276. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg);
  277. End
  278. Else
  279. Begin
  280. TmpRef.base := R_NO;
  281. TmpRef.ScaleFactor := 4;
  282. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg);
  283. End;
  284. InsertLLItem(AsmL,p, p.next, hp1);
  285. reference_reset(tmpref);
  286. TmpRef.Index := Taicpu(p).oper[1].reg;
  287. If (Taicpu(p).oper[2].typ = Top_Reg) Then
  288. Begin
  289. TmpRef.base := R_NO;
  290. TmpRef.ScaleFactor := 4;
  291. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[2].reg);
  292. End
  293. Else
  294. Begin
  295. TmpRef.base := Taicpu(p).oper[1].reg;
  296. TmpRef.ScaleFactor := 2;
  297. hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef, Taicpu(p).oper[1].reg);
  298. End;
  299. InsertLLItem(AsmL,p.previous, p.next, hp1);
  300. p.free;
  301. p := Tai(hp1.next);
  302. End
  303. End
  304. End;
  305. End;
  306. End;
  307. A_SAR, A_SHR:
  308. {changes the code sequence
  309. shr/sar const1, x
  310. shl const2, x
  311. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  312. Begin
  313. If GetNextInstruction(p, hp1) And
  314. (Tai(hp1).typ = ait_instruction) and
  315. (Taicpu(hp1).opcode = A_SHL) and
  316. (Taicpu(p).oper[0].typ = top_const) and
  317. (Taicpu(hp1).oper[0].typ = top_const) and
  318. (Taicpu(hp1).opsize = Taicpu(p).opsize) And
  319. (Taicpu(hp1).oper[1].typ = Taicpu(p).oper[1].typ) And
  320. OpsEqual(Taicpu(hp1).oper[1], Taicpu(p).oper[1])
  321. Then
  322. If (Taicpu(p).oper[0].val > Taicpu(hp1).oper[0].val) And
  323. Not(CS_LittleSize In aktglobalswitches)
  324. Then
  325. { shr/sar const1, %reg
  326. shl const2, %reg
  327. with const1 > const2 }
  328. Begin
  329. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val-Taicpu(hp1).oper[0].val);
  330. Taicpu(hp1).opcode := A_AND;
  331. l := (1 shl (Taicpu(hp1).oper[0].val)) - 1;
  332. Case Taicpu(p).opsize Of
  333. S_L: Taicpu(hp1).LoadConst(0,l Xor aword($ffffffff));
  334. S_B: Taicpu(hp1).LoadConst(0,l Xor $ff);
  335. S_W: Taicpu(hp1).LoadConst(0,l Xor $ffff);
  336. End;
  337. End
  338. Else
  339. If (Taicpu(p).oper[0].val<Taicpu(hp1).oper[0].val) And
  340. Not(CS_LittleSize In aktglobalswitches)
  341. Then
  342. { shr/sar const1, %reg
  343. shl const2, %reg
  344. with const1 < const2 }
  345. Begin
  346. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val-Taicpu(p).oper[0].val);
  347. Taicpu(p).opcode := A_AND;
  348. l := (1 shl (Taicpu(p).oper[0].val))-1;
  349. Case Taicpu(p).opsize Of
  350. S_L: Taicpu(p).LoadConst(0,l Xor aword($ffffffff));
  351. S_B: Taicpu(p).LoadConst(0,l Xor $ff);
  352. S_W: Taicpu(p).LoadConst(0,l Xor $ffff);
  353. End;
  354. End
  355. Else
  356. { shr/sar const1, %reg
  357. shl const2, %reg
  358. with const1 = const2 }
  359. if (Taicpu(p).oper[0].val = Taicpu(hp1).oper[0].val) then
  360. Begin
  361. Taicpu(p).opcode := A_AND;
  362. l := (1 shl (Taicpu(p).oper[0].val))-1;
  363. Case Taicpu(p).opsize Of
  364. S_B: Taicpu(p).LoadConst(0,l Xor $ff);
  365. S_W: Taicpu(p).LoadConst(0,l Xor $ffff);
  366. S_L: Taicpu(p).LoadConst(0,l Xor aword($ffffffff));
  367. End;
  368. asml.remove(hp1);
  369. hp1.free;
  370. End;
  371. End;
  372. A_XOR:
  373. If (Taicpu(p).oper[0].typ = top_reg) And
  374. (Taicpu(p).oper[1].typ = top_reg) And
  375. (Taicpu(p).oper[0].reg = Taicpu(p).oper[1].reg) then
  376. { temporarily change this to 'mov reg,0' to make it easier }
  377. { for the CSE. Will be changed back in pass 2 }
  378. begin
  379. Taicpu(p).opcode := A_MOV;
  380. Taicpu(p).loadconst(0,0);
  381. end;
  382. End;
  383. End;
  384. End;
  385. p := Tai(p.next)
  386. End;
  387. End;
  388. Procedure PeepHoleOptPass1(Asml: TAAsmOutput; BlockStart, BlockEnd: Tai);
  389. {First pass of peepholeoptimizations}
  390. Var
  391. l,l1 : longint;
  392. p,hp1,hp2 : Tai;
  393. hp3,hp4: Tai;
  394. TmpRef: TReference;
  395. UsedRegs, TmpUsedRegs: TRegSet;
  396. TmpBool1, TmpBool2: Boolean;
  397. Function SkipLabels(hp: Tai; var hp2: Tai): boolean;
  398. {skips all labels and returns the next "real" instruction}
  399. Begin
  400. While assigned(hp.next) and
  401. (Tai(hp.next).typ In SkipInstr + [ait_label,ait_align]) Do
  402. hp := Tai(hp.next);
  403. If assigned(hp.next) Then
  404. Begin
  405. SkipLabels := True;
  406. hp2 := Tai(hp.next)
  407. End
  408. Else
  409. Begin
  410. hp2 := hp;
  411. SkipLabels := False
  412. End;
  413. End;
  414. function GetFinalDestination(AsmL: TAAsmOutput; hp: Taicpu; level: longint): boolean;
  415. {traces sucessive jumps to their final destination and sets it, e.g.
  416. je l1 je l3
  417. <code> <code>
  418. l1: becomes l1:
  419. je l2 je l3
  420. <code> <code>
  421. l2: l2:
  422. jmp l3 jmp l3
  423. the level parameter denotes how deeep we have already followed the jump,
  424. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  425. Var p1, p2: Tai;
  426. l: tasmlabel;
  427. Function FindAnyLabel(hp: Tai; var l: tasmlabel): Boolean;
  428. Begin
  429. FindAnyLabel := false;
  430. While assigned(hp.next) and
  431. (Tai(hp.next).typ In (SkipInstr+[ait_align])) Do
  432. hp := Tai(hp.next);
  433. If assigned(hp.next) and
  434. (Tai(hp.next).typ = ait_label) Then
  435. Begin
  436. FindAnyLabel := true;
  437. l := Tai_label(hp.next).l;
  438. End
  439. End;
  440. Begin
  441. if level > 20 then
  442. exit;
  443. GetfinalDestination := false;
  444. If (tasmlabel(hp.oper[0].sym).labelnr >= LoLab) and
  445. (tasmlabel(hp.oper[0].sym).labelnr <= HiLab) and {range check, a jump can go past an assembler block!}
  446. Assigned(LTable^[tasmlabel(hp.oper[0].sym).labelnr-LoLab].TaiObj) Then
  447. Begin
  448. p1 := LTable^[tasmlabel(hp.oper[0].sym).labelnr-LoLab].TaiObj; {the jump's destination}
  449. SkipLabels(p1,p1);
  450. If (Tai(p1).typ = ait_instruction) and
  451. (Taicpu(p1).is_jmp) Then
  452. If { the next instruction after the label where the jump hp arrives}
  453. { is unconditional or of the same type as hp, so continue }
  454. (Taicpu(p1).condition in [C_None,hp.condition]) or
  455. { the next instruction after the label where the jump hp arrives}
  456. { is the opposite of hp (so this one is never taken), but after }
  457. { that one there is a branch that will be taken, so perform a }
  458. { little hack: set p1 equal to this instruction (that's what the}
  459. { last SkipLabels is for, only works with short bool evaluation)}
  460. ((Taicpu(p1).condition = inverse_cond[hp.condition]) and
  461. SkipLabels(p1,p2) and
  462. (p2.typ = ait_instruction) and
  463. (Taicpu(p2).is_jmp) and
  464. (Taicpu(p2).condition in [C_None,hp.condition]) and
  465. SkipLabels(p1,p1)) Then
  466. Begin
  467. { quick check for loops of the form "l5: ; jmp l5 }
  468. if (tasmlabel(Taicpu(p1).oper[0].sym).labelnr =
  469. tasmlabel(hp.oper[0].sym).labelnr) then
  470. exit;
  471. if not GetFinalDestination(asml, Taicpu(p1),succ(level)) then
  472. exit;
  473. Dec(tasmlabel(hp.oper[0].sym).refs);
  474. hp.oper[0].sym:=Taicpu(p1).oper[0].sym;
  475. inc(tasmlabel(hp.oper[0].sym).refs);
  476. End
  477. Else
  478. If (Taicpu(p1).condition = inverse_cond[hp.condition]) then
  479. if not FindAnyLabel(p1,l) then
  480. begin
  481. {$ifdef finaldestdebug}
  482. insertllitem(asml,p1,p1.next,Tai_asm_comment.Create(
  483. strpnew('previous label inserted'))));
  484. {$endif finaldestdebug}
  485. getlabel(l);
  486. insertllitem(asml,p1,p1.next,Tai_label.Create(l));
  487. dec(tasmlabel(Taicpu(hp).oper[0].sym).refs);
  488. hp.oper[0].sym := l;
  489. inc(l.refs);
  490. { this won't work, since the new label isn't in the labeltable }
  491. { so it will fail the rangecheck. Labeltable should become a }
  492. { hashtable to support this: }
  493. { GetFinalDestination(asml, hp); }
  494. end
  495. else
  496. begin
  497. {$ifdef finaldestdebug}
  498. insertllitem(asml,p1,p1.next,Tai_asm_comment.Create(
  499. strpnew('next label reused'))));
  500. {$endif finaldestdebug}
  501. inc(l.refs);
  502. hp.oper[0].sym := l;
  503. if not GetFinalDestination(asml, hp,succ(level)) then
  504. exit;
  505. end;
  506. End;
  507. GetFinalDestination := true;
  508. End;
  509. Function DoSubAddOpt(var p: Tai): Boolean;
  510. Begin
  511. DoSubAddOpt := False;
  512. If GetLastInstruction(p, hp1) And
  513. (hp1.typ = ait_instruction) And
  514. (Taicpu(hp1).opsize = Taicpu(p).opsize) then
  515. Case Taicpu(hp1).opcode Of
  516. A_DEC:
  517. If (Taicpu(hp1).oper[0].typ = top_reg) And
  518. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg) Then
  519. Begin
  520. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val+1);
  521. asml.Remove(hp1);
  522. hp1.free;
  523. End;
  524. A_SUB:
  525. If (Taicpu(hp1).oper[0].typ = top_const) And
  526. (Taicpu(hp1).oper[1].typ = top_reg) And
  527. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  528. Begin
  529. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val+Taicpu(hp1).oper[0].val);
  530. asml.Remove(hp1);
  531. hp1.free;
  532. End;
  533. A_ADD:
  534. If (Taicpu(hp1).oper[0].typ = top_const) And
  535. (Taicpu(hp1).oper[1].typ = top_reg) And
  536. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  537. Begin
  538. Taicpu(p).LoadConst(0,AWord(int64(Taicpu(p).oper[0].val)-int64(Taicpu(hp1).oper[0].val)));
  539. asml.Remove(hp1);
  540. hp1.free;
  541. If (Taicpu(p).oper[0].val = 0) Then
  542. Begin
  543. hp1 := Tai(p.next);
  544. asml.Remove(p);
  545. p.free;
  546. If Not GetLastInstruction(hp1, p) Then
  547. p := hp1;
  548. DoSubAddOpt := True;
  549. End
  550. End;
  551. End;
  552. End;
  553. Begin
  554. P := BlockStart;
  555. UsedRegs := [];
  556. While (P <> BlockEnd) Do
  557. Begin
  558. UpDateUsedRegs(UsedRegs, Tai(p.next));
  559. Case p.Typ Of
  560. ait_instruction:
  561. Begin
  562. { Handle Jmp Optimizations }
  563. if Taicpu(p).is_jmp then
  564. begin
  565. {the following if-block removes all code between a jmp and the next label,
  566. because it can never be executed}
  567. If (Taicpu(p).opcode = A_JMP) Then
  568. Begin
  569. While GetNextInstruction(p, hp1) and
  570. (hp1.typ <> ait_label) do
  571. If not(hp1.typ in ([ait_label,ait_align]+skipinstr)) Then
  572. Begin
  573. asml.Remove(hp1);
  574. hp1.free;
  575. End
  576. else break;
  577. End;
  578. { remove jumps to a label coming right after them }
  579. If GetNextInstruction(p, hp1) then
  580. Begin
  581. if FindLabel(tasmlabel(Taicpu(p).oper[0].sym), hp1) then
  582. Begin
  583. hp2:=Tai(hp1.next);
  584. asml.remove(p);
  585. p.free;
  586. p:=hp2;
  587. continue;
  588. end
  589. Else
  590. Begin
  591. if hp1.typ = ait_label then
  592. SkipLabels(hp1,hp1);
  593. If (Tai(hp1).typ=ait_instruction) and
  594. (Taicpu(hp1).opcode=A_JMP) and
  595. GetNextInstruction(hp1, hp2) And
  596. FindLabel(tasmlabel(Taicpu(p).oper[0].sym), hp2)
  597. Then
  598. Begin
  599. if Taicpu(p).opcode=A_Jcc then
  600. Taicpu(p).condition:=inverse_cond[Taicpu(p).condition]
  601. else
  602. begin
  603. If (LabDif <> 0) Then
  604. GetFinalDestination(asml, Taicpu(p),0);
  605. p:=Tai(p.next);
  606. continue;
  607. end;
  608. Dec(Tai_label(hp2).l.refs);
  609. Taicpu(p).oper[0].sym:=Taicpu(hp1).oper[0].sym;
  610. Inc(Taicpu(p).oper[0].sym.refs);
  611. asml.remove(hp1);
  612. hp1.free;
  613. If (LabDif <> 0) Then
  614. GetFinalDestination(asml, Taicpu(p),0);
  615. end
  616. else
  617. If (LabDif <> 0) Then
  618. GetFinalDestination(asml, Taicpu(p),0);
  619. end;
  620. end;
  621. end
  622. else
  623. { All other optimizes }
  624. begin
  625. For l := 0 to 2 Do
  626. If (Taicpu(p).oper[l].typ = top_ref) Then
  627. With Taicpu(p).oper[l].ref^ Do
  628. Begin
  629. If (base = R_NO) And
  630. (index <> R_NO) And
  631. (scalefactor in [0,1])
  632. Then
  633. Begin
  634. base := index;
  635. index := R_NO
  636. End
  637. End;
  638. Case Taicpu(p).opcode Of
  639. A_AND:
  640. Begin
  641. If (Taicpu(p).oper[0].typ = top_const) And
  642. (Taicpu(p).oper[1].typ = top_reg) And
  643. GetNextInstruction(p, hp1) And
  644. (Tai(hp1).typ = ait_instruction) And
  645. (Taicpu(hp1).opcode = A_AND) And
  646. (Taicpu(hp1).oper[0].typ = top_const) And
  647. (Taicpu(hp1).oper[1].typ = top_reg) And
  648. (Taicpu(p).oper[1].reg = Taicpu(hp1).oper[1].reg)
  649. Then
  650. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  651. Begin
  652. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val And Taicpu(hp1).oper[0].val);
  653. asml.Remove(hp1);
  654. hp1.free;
  655. End
  656. Else
  657. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  658. jump, but only if it's a conditional jump (PFV) }
  659. If (Taicpu(p).oper[1].typ = top_reg) And
  660. GetNextInstruction(p, hp1) And
  661. (hp1.typ = ait_instruction) And
  662. (Taicpu(hp1).is_jmp) and
  663. (Taicpu(hp1).opcode<>A_JMP) and
  664. Not(Taicpu(p).oper[1].reg in UsedRegs) Then
  665. Taicpu(p).opcode := A_TEST;
  666. End;
  667. A_CMP:
  668. Begin
  669. If (Taicpu(p).oper[0].typ = top_const) And
  670. (Taicpu(p).oper[1].typ in [top_reg,top_ref]) And
  671. (Taicpu(p).oper[0].val = 0) and
  672. GetNextInstruction(p, hp1) And
  673. (hp1.typ = ait_instruction) And
  674. (Taicpu(hp1).is_jmp) and
  675. (Taicpu(hp1).opcode=A_Jcc) and
  676. (Taicpu(hp1).condition in [C_LE,C_BE]) and
  677. GetNextInstruction(hp1,hp2) and
  678. (hp2.typ = ait_instruction) and
  679. (Taicpu(hp2).opcode = A_DEC) And
  680. OpsEqual(Taicpu(hp2).oper[0],Taicpu(p).oper[1]) And
  681. GetNextInstruction(hp2, hp3) And
  682. (hp3.typ = ait_instruction) and
  683. (Taicpu(hp3).is_jmp) and
  684. (Taicpu(hp3).opcode = A_JMP) And
  685. GetNextInstruction(hp3, hp4) And
  686. FindLabel(tasmlabel(Taicpu(hp1).oper[0].sym),hp4) Then
  687. Begin
  688. Taicpu(hp2).Opcode := A_SUB;
  689. Taicpu(hp2).Loadoper(1,Taicpu(hp2).oper[0]);
  690. Taicpu(hp2).LoadConst(0,1);
  691. Taicpu(hp2).ops:=2;
  692. Taicpu(hp3).Opcode := A_Jcc;
  693. Case Taicpu(hp1).condition of
  694. C_LE: Taicpu(hp3).condition := C_GE;
  695. C_BE: Taicpu(hp3).condition := C_AE;
  696. End;
  697. asml.Remove(p);
  698. asml.Remove(hp1);
  699. p.free;
  700. hp1.free;
  701. p := hp2;
  702. continue;
  703. End
  704. End;
  705. A_FLD:
  706. Begin
  707. If (Taicpu(p).oper[0].typ = top_reg) And
  708. GetNextInstruction(p, hp1) And
  709. (hp1.typ = Ait_Instruction) And
  710. (Taicpu(hp1).oper[0].typ = top_reg) And
  711. (Taicpu(hp1).oper[1].typ = top_reg) And
  712. (Taicpu(hp1).oper[0].reg = R_ST) And
  713. (Taicpu(hp1).oper[1].reg = R_ST1) Then
  714. { change to
  715. fld reg fxxx reg,st
  716. fxxxp st, st1 (hp1)
  717. Remark: non commutative operations must be reversed!
  718. }
  719. begin
  720. Case Taicpu(hp1).opcode Of
  721. A_FMULP,A_FADDP,
  722. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  723. begin
  724. Case Taicpu(hp1).opcode Of
  725. A_FADDP: Taicpu(hp1).opcode := A_FADD;
  726. A_FMULP: Taicpu(hp1).opcode := A_FMUL;
  727. A_FSUBP: Taicpu(hp1).opcode := A_FSUBR;
  728. A_FSUBRP: Taicpu(hp1).opcode := A_FSUB;
  729. A_FDIVP: Taicpu(hp1).opcode := A_FDIVR;
  730. A_FDIVRP: Taicpu(hp1).opcode := A_FDIV;
  731. End;
  732. Taicpu(hp1).oper[0].reg := Taicpu(p).oper[0].reg;
  733. Taicpu(hp1).oper[1].reg := R_ST;
  734. asml.Remove(p);
  735. p.free;
  736. p := hp1;
  737. Continue;
  738. end;
  739. end;
  740. end
  741. else
  742. If (Taicpu(p).oper[0].typ = top_ref) And
  743. GetNextInstruction(p, hp2) And
  744. (hp2.typ = Ait_Instruction) And
  745. (Taicpu(hp2).oper[0].typ = top_reg) And
  746. (Taicpu(hp2).oper[1].typ = top_reg) And
  747. (Taicpu(p).opsize in [S_FS, S_FL]) And
  748. (Taicpu(hp2).oper[0].reg = R_ST) And
  749. (Taicpu(hp2).oper[1].reg = R_ST1) Then
  750. If GetLastInstruction(p, hp1) And
  751. (hp1.typ = Ait_Instruction) And
  752. ((Taicpu(hp1).opcode = A_FLD) Or
  753. (Taicpu(hp1).opcode = A_FST)) And
  754. (Taicpu(hp1).opsize = Taicpu(p).opsize) And
  755. (Taicpu(hp1).oper[0].typ = top_ref) And
  756. RefsEqual(Taicpu(p).oper[0].ref^, Taicpu(hp1).oper[0].ref^) Then
  757. If ((Taicpu(hp2).opcode = A_FMULP) Or
  758. (Taicpu(hp2).opcode = A_FADDP)) Then
  759. { change to
  760. fld/fst mem1 (hp1) fld/fst mem1
  761. fld mem1 (p) fadd/
  762. faddp/ fmul st, st
  763. fmulp st, st1 (hp2) }
  764. Begin
  765. asml.Remove(p);
  766. p.free;
  767. p := hp1;
  768. If (Taicpu(hp2).opcode = A_FADDP) Then
  769. Taicpu(hp2).opcode := A_FADD
  770. Else
  771. Taicpu(hp2).opcode := A_FMUL;
  772. Taicpu(hp2).oper[1].reg := R_ST;
  773. End
  774. Else
  775. { change to
  776. fld/fst mem1 (hp1) fld/fst mem1
  777. fld mem1 (p) fld st}
  778. Begin
  779. Taicpu(p).changeopsize(S_FL);
  780. Taicpu(p).loadreg(0,R_ST);
  781. End
  782. Else
  783. Begin
  784. Case Taicpu(hp2).opcode Of
  785. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  786. { change to
  787. fld/fst mem1 (hp1) fld/fst mem1
  788. fld mem2 (p) fxxx mem2
  789. fxxxp st, st1 (hp2) }
  790. Begin
  791. Case Taicpu(hp2).opcode Of
  792. A_FADDP: Taicpu(p).opcode := A_FADD;
  793. A_FMULP: Taicpu(p).opcode := A_FMUL;
  794. A_FSUBP: Taicpu(p).opcode := A_FSUBR;
  795. A_FSUBRP: Taicpu(p).opcode := A_FSUB;
  796. A_FDIVP: Taicpu(p).opcode := A_FDIVR;
  797. A_FDIVRP: Taicpu(p).opcode := A_FDIV;
  798. End;
  799. asml.Remove(hp2);
  800. hp2.free;
  801. End
  802. End
  803. End
  804. End;
  805. A_FSTP,A_FISTP:
  806. if doFpuLoadStoreOpt(asmL,p) then
  807. continue;
  808. A_LEA:
  809. Begin
  810. {removes seg register prefixes from LEA operations, as they
  811. don't do anything}
  812. Taicpu(p).oper[0].ref^.Segment := R_NO;
  813. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  814. If (Taicpu(p).oper[0].ref^.Base In [R_EAX..R_EDI]) And
  815. (Taicpu(p).oper[0].ref^.Index = R_NO) And
  816. (Not(Assigned(Taicpu(p).oper[0].ref^.Symbol))) Then
  817. If (Taicpu(p).oper[0].ref^.Base <> Taicpu(p).oper[1].reg)
  818. and (Taicpu(p).oper[0].ref^.Offset = 0)
  819. Then
  820. Begin
  821. hp1 := Taicpu.op_reg_reg(A_MOV, S_L,Taicpu(p).oper[0].ref^.Base,
  822. Taicpu(p).oper[1].reg);
  823. InsertLLItem(AsmL,p.previous,p.next, hp1);
  824. p.free;
  825. p := hp1;
  826. Continue;
  827. End
  828. Else
  829. if (Taicpu(p).oper[0].ref^.Offset = 0) then
  830. Begin
  831. hp1 := Tai(p.Next);
  832. asml.Remove(p);
  833. p.free;
  834. p := hp1;
  835. Continue;
  836. End
  837. else
  838. with Taicpu(p).oper[0].ref^ do
  839. if (Base = Taicpu(p).oper[1].reg) then
  840. begin
  841. l := offset+offsetfixup;
  842. if (l=1) then
  843. begin
  844. Taicpu(p).opcode := A_INC;
  845. Taicpu(p).loadreg(0,Taicpu(p).oper[1].reg);
  846. Taicpu(p).ops := 1
  847. end
  848. else
  849. if (l=-1) then
  850. begin
  851. Taicpu(p).opcode := A_DEC;
  852. Taicpu(p).loadreg(0,Taicpu(p).oper[1].reg);
  853. Taicpu(p).ops := 1;
  854. end
  855. else
  856. begin
  857. Taicpu(p).opcode := A_ADD;
  858. Taicpu(p).loadconst(0,aword(l));
  859. end;
  860. end;
  861. End;
  862. A_MOV:
  863. Begin
  864. TmpUsedRegs := UsedRegs;
  865. If (Taicpu(p).oper[1].typ = top_reg) And
  866. (Taicpu(p).oper[1].reg In [R_EAX, R_EBX, R_EDX, R_EDI]) And
  867. GetNextInstruction(p, hp1) And
  868. (Tai(hp1).typ = ait_instruction) And
  869. (Taicpu(hp1).opcode = A_MOV) And
  870. (Taicpu(hp1).oper[0].typ = top_reg) And
  871. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg)
  872. Then
  873. {we have "mov x, %treg; mov %treg, y}
  874. If not(RegUsedAfterInstruction(Taicpu(p).oper[1].reg, hp1, TmpUsedRegs)) then
  875. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  876. Case Taicpu(p).oper[0].typ Of
  877. top_reg:
  878. Begin
  879. { change "mov %reg, %treg; mov %treg, y"
  880. to "mov %reg, y" }
  881. Taicpu(p).LoadOper(1,Taicpu(hp1).oper[1]);
  882. asml.Remove(hp1);
  883. hp1.free;
  884. continue;
  885. End;
  886. top_ref:
  887. If (Taicpu(hp1).oper[1].typ = top_reg) Then
  888. Begin
  889. { change "mov mem, %treg; mov %treg, %reg"
  890. to "mov mem, %reg" }
  891. Taicpu(p).Loadoper(1,Taicpu(hp1).oper[1]);
  892. asml.Remove(hp1);
  893. hp1.free;
  894. continue;
  895. End;
  896. End
  897. Else
  898. Else
  899. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  900. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  901. penalty}
  902. If (Taicpu(p).oper[0].typ = top_reg) And
  903. (Taicpu(p).oper[1].typ = top_reg) And
  904. GetNextInstruction(p,hp1) And
  905. (Tai(hp1).typ = ait_instruction) And
  906. (Taicpu(hp1).oper[0].typ = top_reg) And
  907. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg)
  908. Then
  909. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  910. Begin
  911. If ((Taicpu(hp1).opcode = A_OR) Or
  912. (Taicpu(hp1).opcode = A_TEST)) And
  913. (Taicpu(hp1).oper[1].typ = top_reg) And
  914. (Taicpu(hp1).oper[0].reg = Taicpu(hp1).oper[1].reg)
  915. Then
  916. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  917. Begin
  918. TmpUsedRegs := UsedRegs;
  919. { reg1 will be used after the first instruction, }
  920. { so update the allocation info }
  921. allocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  922. If GetNextInstruction(hp1, hp2) And
  923. (hp2.typ = ait_instruction) And
  924. Taicpu(hp2).is_jmp and
  925. Not(RegUsedAfterInstruction(Taicpu(hp1).oper[0].reg, hp1, TmpUsedRegs))
  926. Then
  927. {change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  928. "test %reg1, %reg1; jxx"}
  929. Begin
  930. Taicpu(hp1).Loadoper(0,Taicpu(p).oper[0]);
  931. Taicpu(hp1).Loadoper(1,Taicpu(p).oper[0]);
  932. asml.Remove(p);
  933. p.free;
  934. p := hp1;
  935. continue
  936. End
  937. Else
  938. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  939. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  940. Begin
  941. Taicpu(hp1).Loadoper(0,Taicpu(p).oper[0]);
  942. Taicpu(hp1).Loadoper(1,Taicpu(p).oper[0]);
  943. End;
  944. End
  945. { Else
  946. If (Taicpu(p.next)^.opcode
  947. In [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  948. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  949. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  950. End
  951. Else
  952. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  953. x >= RetOffset) as it doesn't do anything (it writes either to a
  954. parameter or to the temporary storage room for the function
  955. result)}
  956. If GetNextInstruction(p, hp1) And
  957. (Tai(hp1).typ = ait_instruction)
  958. Then
  959. If ((Taicpu(hp1).opcode = A_LEAVE) Or
  960. (Taicpu(hp1).opcode = A_RET)) And
  961. (Taicpu(p).oper[1].typ = top_ref) And
  962. (Taicpu(p).oper[1].ref^.base = procinfo^.FramePointer) And
  963. (Taicpu(p).oper[1].ref^.offset >= procinfo^.Return_Offset) And
  964. (Taicpu(p).oper[1].ref^.index = R_NO) And
  965. (Taicpu(p).oper[0].typ = top_reg)
  966. Then
  967. Begin
  968. asml.Remove(p);
  969. p.free;
  970. p := hp1;
  971. RemoveLastDeallocForFuncRes(asmL,p);
  972. End
  973. Else
  974. If (Taicpu(p).oper[0].typ = top_reg) And
  975. (Taicpu(p).oper[1].typ = top_ref) And
  976. (Taicpu(p).opsize = Taicpu(hp1).opsize) And
  977. (Taicpu(hp1).opcode = A_CMP) And
  978. (Taicpu(hp1).oper[1].typ = top_ref) And
  979. RefsEqual(Taicpu(p).oper[1].ref^, Taicpu(hp1).oper[1].ref^) Then
  980. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  981. begin
  982. Taicpu(hp1).loadreg(1,Taicpu(p).oper[0].reg);
  983. allocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  984. end;
  985. { Next instruction is also a MOV ? }
  986. If GetNextInstruction(p, hp1) And
  987. (Tai(hp1).typ = ait_instruction) and
  988. (Taicpu(hp1).opcode = A_MOV) and
  989. (Taicpu(hp1).opsize = Taicpu(p).opsize)
  990. Then
  991. Begin
  992. If (Taicpu(hp1).oper[0].typ = Taicpu(p).oper[1].typ) and
  993. (Taicpu(hp1).oper[1].typ = Taicpu(p).oper[0].typ)
  994. Then
  995. {mov reg1, mem1 or mov mem1, reg1
  996. mov mem2, reg2 mov reg2, mem2}
  997. Begin
  998. If OpsEqual(Taicpu(hp1).oper[1],Taicpu(p).oper[0]) Then
  999. {mov reg1, mem1 or mov mem1, reg1
  1000. mov mem2, reg1 mov reg2, mem1}
  1001. Begin
  1002. If OpsEqual(Taicpu(hp1).oper[0],Taicpu(p).oper[1]) Then
  1003. { Removes the second statement from
  1004. mov reg1, mem1/reg2
  1005. mov mem1/reg2, reg1 }
  1006. Begin
  1007. if (Taicpu(p).oper[0].typ = top_reg) then
  1008. AllocRegBetween(asmL,Taicpu(p).oper[0].reg,p,hp1);
  1009. asml.remove(hp1);
  1010. hp1.free;
  1011. End
  1012. Else
  1013. Begin
  1014. TmpUsedRegs := UsedRegs;
  1015. UpdateUsedRegs(TmpUsedRegs, Tai(hp1.next));
  1016. If (Taicpu(p).oper[1].typ = top_ref) And
  1017. { mov reg1, mem1
  1018. mov mem2, reg1 }
  1019. GetNextInstruction(hp1, hp2) And
  1020. (hp2.typ = ait_instruction) And
  1021. (Taicpu(hp2).opcode = A_CMP) And
  1022. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1023. (Taicpu(hp2).oper[0].typ = TOp_Ref) And
  1024. (Taicpu(hp2).oper[1].typ = TOp_Reg) And
  1025. RefsEqual(Taicpu(hp2).oper[0].ref^, Taicpu(p).oper[1].ref^) And
  1026. (Taicpu(hp2).oper[1].reg = Taicpu(p).oper[0].reg) And
  1027. Not(RegUsedAfterInstruction(Taicpu(p).oper[0].reg, hp2, TmpUsedRegs)) Then
  1028. { change to
  1029. mov reg1, mem1 mov reg1, mem1
  1030. mov mem2, reg1 cmp reg1, mem2
  1031. cmp mem1, reg1 }
  1032. Begin
  1033. asml.Remove(hp2);
  1034. hp2.free;
  1035. Taicpu(hp1).opcode := A_CMP;
  1036. Taicpu(hp1).loadref(1,Taicpu(hp1).oper[0].ref^);
  1037. Taicpu(hp1).loadreg(0,Taicpu(p).oper[0].reg);
  1038. End;
  1039. End;
  1040. End
  1041. Else
  1042. Begin
  1043. tmpUsedRegs := UsedRegs;
  1044. If GetNextInstruction(hp1, hp2) And
  1045. (Taicpu(p).oper[0].typ = top_ref) And
  1046. (Taicpu(p).oper[1].typ = top_reg) And
  1047. (Taicpu(hp1).oper[0].typ = top_reg) And
  1048. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg) And
  1049. (Taicpu(hp1).oper[1].typ = top_ref) And
  1050. (Tai(hp2).typ = ait_instruction) And
  1051. (Taicpu(hp2).opcode = A_MOV) And
  1052. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1053. (Taicpu(hp2).oper[1].typ = top_reg) And
  1054. (Taicpu(hp2).oper[0].typ = top_ref) And
  1055. RefsEqual(Taicpu(hp2).oper[0].ref^, Taicpu(hp1).oper[1].ref^) Then
  1056. If not regInRef(Taicpu(hp2).oper[1].reg,Taicpu(hp2).oper[0].ref^) and
  1057. (Taicpu(p).oper[1].reg in [R_DI,R_EDI]) and
  1058. not(RegUsedAfterInstruction(R_EDI,hp1,tmpUsedRegs)) Then
  1059. { mov mem1, %edi
  1060. mov %edi, mem2
  1061. mov mem2, reg2
  1062. to:
  1063. mov mem1, reg2
  1064. mov reg2, mem2}
  1065. Begin
  1066. AllocRegBetween(asmL,reg32(Taicpu(hp2).oper[1].reg),p,hp2);
  1067. Taicpu(p).Loadoper(1,Taicpu(hp2).oper[1]);
  1068. Taicpu(hp1).loadoper(0,Taicpu(hp2).oper[1]);
  1069. asml.Remove(hp2);
  1070. hp2.free;
  1071. End
  1072. Else
  1073. If (Taicpu(p).oper[1].reg <> Taicpu(hp2).oper[1].reg) And
  1074. not(RegInRef(Taicpu(p).oper[1].reg,Taicpu(p).oper[0].ref^)) And
  1075. not(RegInRef(Taicpu(hp2).oper[1].reg,Taicpu(hp2).oper[0].ref^))
  1076. Then
  1077. { mov mem1, reg1 mov mem1, reg1
  1078. mov reg1, mem2 mov reg1, mem2
  1079. mov mem2, reg2 mov mem2, reg1
  1080. to: to:
  1081. mov mem1, reg1 mov mem1, reg1
  1082. mov mem1, reg2 mov reg1, mem2
  1083. mov reg1, mem2
  1084. or (if mem1 depends on reg1
  1085. and/or if mem2 depends on reg2)
  1086. to:
  1087. mov mem1, reg1
  1088. mov reg1, mem2
  1089. mov reg1, reg2
  1090. }
  1091. Begin
  1092. Taicpu(hp1).LoadRef(0,Taicpu(p).oper[0].ref^);
  1093. Taicpu(hp1).LoadReg(1,Taicpu(hp2).oper[1].reg);
  1094. Taicpu(hp2).LoadRef(1,Taicpu(hp2).oper[0].ref^);
  1095. Taicpu(hp2).LoadReg(0,Taicpu(p).oper[1].reg);
  1096. allocRegBetween(asmL,Taicpu(p).oper[1].reg,p,hp2);
  1097. if (Taicpu(p).oper[0].ref^.base in (rg.usableregsint+[R_EDI])) then
  1098. allocRegBetween(asmL,Taicpu(p).oper[0].ref^.base,p,hp2);
  1099. if (Taicpu(p).oper[0].ref^.index in (rg.usableregsint+[R_EDI])) then
  1100. allocRegBetween(asmL,Taicpu(p).oper[0].ref^.index,p,hp2);
  1101. End
  1102. Else
  1103. If (Taicpu(hp1).Oper[0].reg <> Taicpu(hp2).Oper[1].reg) Then
  1104. begin
  1105. Taicpu(hp2).LoadReg(0,Taicpu(hp1).Oper[0].reg);
  1106. allocRegBetween(asmL,Taicpu(p).oper[1].reg,p,hp2);
  1107. end
  1108. else
  1109. begin
  1110. asml.Remove(hp2);
  1111. hp2.free;
  1112. end
  1113. End;
  1114. End
  1115. Else
  1116. (* {movl [mem1],reg1
  1117. movl [mem1],reg2
  1118. to:
  1119. movl [mem1],reg1
  1120. movl reg1,reg2 }
  1121. If (Taicpu(p).oper[0].typ = top_ref) and
  1122. (Taicpu(p).oper[1].typ = top_reg) and
  1123. (Taicpu(hp1).oper[0].typ = top_ref) and
  1124. (Taicpu(hp1).oper[1].typ = top_reg) and
  1125. (Taicpu(p).opsize = Taicpu(hp1).opsize) and
  1126. RefsEqual(TReference(Taicpu(p).oper[0]^),Taicpu(hp1).oper[0]^.ref^) and
  1127. (Taicpu(p).oper[1].reg<>Taicpu(hp1).oper[0]^.ref^.base) and
  1128. (Taicpu(p).oper[1].reg<>Taicpu(hp1).oper[0]^.ref^.index) then
  1129. Taicpu(hp1).LoadReg(0,Taicpu(p).oper[1].reg)
  1130. Else*)
  1131. { movl const1,[mem1]
  1132. movl [mem1],reg1
  1133. to:
  1134. movl const1,reg1
  1135. movl reg1,[mem1] }
  1136. If (Taicpu(p).oper[0].typ = top_const) and
  1137. (Taicpu(p).oper[1].typ = top_ref) and
  1138. (Taicpu(hp1).oper[0].typ = top_ref) and
  1139. (Taicpu(hp1).oper[1].typ = top_reg) and
  1140. (Taicpu(p).opsize = Taicpu(hp1).opsize) and
  1141. RefsEqual(Taicpu(hp1).oper[0].ref^,Taicpu(p).oper[1].ref^) then
  1142. Begin
  1143. allocregbetween(asml,Taicpu(hp1).oper[1].reg,p,hp1);
  1144. { allocregbetween doesn't insert this because at }
  1145. { this time, no regalloc info is available in }
  1146. { the optinfo field, so do it manually (JM) }
  1147. hp2 := TaiRegalloc.Alloc(Taicpu(hp1).oper[1].reg);
  1148. insertllitem(asml,p.previous,p,hp2);
  1149. Taicpu(hp1).LoadReg(0,Taicpu(hp1).oper[1].reg);
  1150. Taicpu(hp1).LoadRef(1,Taicpu(p).oper[1].ref^);
  1151. Taicpu(p).LoadReg(1,Taicpu(hp1).oper[0].reg);
  1152. End
  1153. End;
  1154. End;
  1155. A_MOVZX:
  1156. Begin
  1157. {removes superfluous And's after movzx's}
  1158. If (Taicpu(p).oper[1].typ = top_reg) And
  1159. GetNextInstruction(p, hp1) And
  1160. (Tai(hp1).typ = ait_instruction) And
  1161. (Taicpu(hp1).opcode = A_AND) And
  1162. (Taicpu(hp1).oper[0].typ = top_const) And
  1163. (Taicpu(hp1).oper[1].typ = top_reg) And
  1164. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1165. Then
  1166. Case Taicpu(p).opsize Of
  1167. S_BL, S_BW:
  1168. If (Taicpu(hp1).oper[0].val = $ff) Then
  1169. Begin
  1170. asml.Remove(hp1);
  1171. hp1.free;
  1172. End;
  1173. S_WL:
  1174. If (Taicpu(hp1).oper[0].val = $ffff) Then
  1175. Begin
  1176. asml.Remove(hp1);
  1177. hp1.free;
  1178. End;
  1179. End;
  1180. {changes some movzx constructs to faster synonims (all examples
  1181. are given with eax/ax, but are also valid for other registers)}
  1182. If (Taicpu(p).oper[1].typ = top_reg) Then
  1183. If (Taicpu(p).oper[0].typ = top_reg) Then
  1184. Case Taicpu(p).opsize of
  1185. S_BW:
  1186. Begin
  1187. If (rg.makeregsize(Taicpu(p).oper[0].reg,OS_16)=Taicpu(p).oper[1].reg) And
  1188. Not(CS_LittleSize In aktglobalswitches)
  1189. Then
  1190. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1191. Begin
  1192. Taicpu(p).opcode := A_AND;
  1193. Taicpu(p).changeopsize(S_W);
  1194. Taicpu(p).LoadConst(0,$ff);
  1195. End
  1196. Else
  1197. If GetNextInstruction(p, hp1) And
  1198. (Tai(hp1).typ = ait_instruction) And
  1199. (Taicpu(hp1).opcode = A_AND) And
  1200. (Taicpu(hp1).oper[0].typ = top_const) And
  1201. (Taicpu(hp1).oper[1].typ = top_reg) And
  1202. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1203. Then
  1204. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1205. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1206. Begin
  1207. Taicpu(p).opcode := A_MOV;
  1208. Taicpu(p).changeopsize(S_W);
  1209. Taicpu(p).LoadReg(0,rg.makeregsize(Taicpu(p).oper[0].reg,OS_16));
  1210. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1211. End;
  1212. End;
  1213. S_BL:
  1214. Begin
  1215. If (rg.makeregsize(Taicpu(p).oper[0].reg,OS_32)=Taicpu(p).oper[1].reg) And
  1216. Not(CS_LittleSize in aktglobalswitches)
  1217. Then
  1218. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1219. Begin
  1220. Taicpu(p).opcode := A_AND;
  1221. Taicpu(p).changeopsize(S_L);
  1222. Taicpu(p).loadconst(0,$ff)
  1223. End
  1224. Else
  1225. If GetNextInstruction(p, hp1) And
  1226. (Tai(hp1).typ = ait_instruction) And
  1227. (Taicpu(hp1).opcode = A_AND) And
  1228. (Taicpu(hp1).oper[0].typ = top_const) And
  1229. (Taicpu(hp1).oper[1].typ = top_reg) And
  1230. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1231. Then
  1232. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1233. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1234. Begin
  1235. Taicpu(p).opcode := A_MOV;
  1236. Taicpu(p).changeopsize(S_L);
  1237. Taicpu(p).LoadReg(0,rg.makeregsize(Taicpu(p).oper[0].reg,OS_32));
  1238. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1239. End
  1240. End;
  1241. S_WL:
  1242. Begin
  1243. If (rg.makeregsize(Taicpu(p).oper[0].reg,OS_32)=Taicpu(p).oper[1].reg) And
  1244. Not(CS_LittleSize In aktglobalswitches)
  1245. Then
  1246. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1247. Begin
  1248. Taicpu(p).opcode := A_AND;
  1249. Taicpu(p).changeopsize(S_L);
  1250. Taicpu(p).LoadConst(0,$ffff);
  1251. End
  1252. Else
  1253. If GetNextInstruction(p, hp1) And
  1254. (Tai(hp1).typ = ait_instruction) And
  1255. (Taicpu(hp1).opcode = A_AND) And
  1256. (Taicpu(hp1).oper[0].typ = top_const) And
  1257. (Taicpu(hp1).oper[1].typ = top_reg) And
  1258. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)
  1259. Then
  1260. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1261. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1262. Begin
  1263. Taicpu(p).opcode := A_MOV;
  1264. Taicpu(p).changeopsize(S_L);
  1265. Taicpu(p).LoadReg(0,rg.makeregsize(Taicpu(p).oper[0].reg,OS_32));
  1266. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ffff);
  1267. End;
  1268. End;
  1269. End
  1270. Else
  1271. If (Taicpu(p).oper[0].typ = top_ref) Then
  1272. Begin
  1273. If GetNextInstruction(p, hp1) And
  1274. (Tai(hp1).typ = ait_instruction) And
  1275. (Taicpu(hp1).opcode = A_AND) And
  1276. (Taicpu(hp1).oper[0].typ = Top_Const) And
  1277. (Taicpu(hp1).oper[1].typ = Top_Reg) And
  1278. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) Then
  1279. Begin
  1280. Taicpu(p).opcode := A_MOV;
  1281. Case Taicpu(p).opsize Of
  1282. S_BL:
  1283. Begin
  1284. Taicpu(p).changeopsize(S_L);
  1285. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1286. End;
  1287. S_WL:
  1288. Begin
  1289. Taicpu(p).changeopsize(S_L);
  1290. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ffff);
  1291. End;
  1292. S_BW:
  1293. Begin
  1294. Taicpu(p).changeopsize(S_W);
  1295. Taicpu(hp1).LoadConst(0,Taicpu(hp1).oper[0].val And $ff);
  1296. End;
  1297. End;
  1298. End;
  1299. End;
  1300. End;
  1301. A_POP:
  1302. Begin
  1303. if target_info.target=target_i386_go32v2 then
  1304. begin
  1305. { Transform a series of pop/pop/pop/push/push/push to }
  1306. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1307. { because I'm not sure whether they can cope with }
  1308. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1309. { such a problem when using esp as frame pointer (JM) }
  1310. if (Taicpu(p).oper[0].typ = top_reg) then
  1311. begin
  1312. hp1 := p;
  1313. hp2 := p;
  1314. l := 0;
  1315. while getNextInstruction(hp1,hp1) and
  1316. (hp1.typ = ait_instruction) and
  1317. (Taicpu(hp1).opcode = A_POP) and
  1318. (Taicpu(hp1).oper[0].typ = top_reg) do
  1319. begin
  1320. hp2 := hp1;
  1321. inc(l,4);
  1322. end;
  1323. getLastInstruction(p,hp3);
  1324. l1 := 0;
  1325. while (hp2 <> hp3) and
  1326. assigned(hp1) and
  1327. (hp1.typ = ait_instruction) and
  1328. (Taicpu(hp1).opcode = A_PUSH) and
  1329. (Taicpu(hp1).oper[0].typ = top_reg) and
  1330. (Taicpu(hp1).oper[0].reg = Taicpu(hp2).oper[0].reg) do
  1331. begin
  1332. { change it to a two op operation }
  1333. Taicpu(hp2).oper[1].typ:=top_none;
  1334. Taicpu(hp2).ops:=2;
  1335. Taicpu(hp2).opcode := A_MOV;
  1336. Taicpu(hp2).Loadoper(1,Taicpu(hp1).oper[0]);
  1337. reference_reset(tmpref);
  1338. tmpRef.base := STACK_POINTER_REG;
  1339. tmpRef.offset := l;
  1340. Taicpu(hp2).loadRef(0,tmpRef);
  1341. hp4 := hp1;
  1342. getNextInstruction(hp1,hp1);
  1343. asml.remove(hp4);
  1344. hp4.free;
  1345. getLastInstruction(hp2,hp2);
  1346. dec(l,4);
  1347. inc(l1);
  1348. end;
  1349. if l <> -4 then
  1350. begin
  1351. inc(l,4);
  1352. for l1 := l1 downto 1 do
  1353. begin
  1354. getNextInstruction(hp2,hp2);
  1355. dec(Taicpu(hp2).oper[0].ref^.offset,l);
  1356. end
  1357. end
  1358. end
  1359. end
  1360. else
  1361. begin
  1362. if (Taicpu(p).oper[0].typ = top_reg) And
  1363. GetNextInstruction(p, hp1) And
  1364. (Tai(hp1).typ=ait_instruction) and
  1365. (Taicpu(hp1).opcode=A_PUSH) and
  1366. (Taicpu(hp1).oper[0].typ = top_reg) And
  1367. (Taicpu(hp1).oper[0].reg=Taicpu(p).oper[0].reg) then
  1368. Begin
  1369. { change it to a two op operation }
  1370. Taicpu(p).oper[1].typ:=top_none;
  1371. Taicpu(p).ops:=2;
  1372. Taicpu(p).opcode := A_MOV;
  1373. Taicpu(p).Loadoper(1,Taicpu(p).oper[0]);
  1374. reference_reset(tmpref);
  1375. TmpRef.base := R_ESP;
  1376. Taicpu(p).LoadRef(0,TmpRef);
  1377. asml.Remove(hp1);
  1378. hp1.free;
  1379. End;
  1380. end;
  1381. end;
  1382. A_PUSH:
  1383. Begin
  1384. If (Taicpu(p).opsize = S_W) And
  1385. (Taicpu(p).oper[0].typ = Top_Const) And
  1386. GetNextInstruction(p, hp1) And
  1387. (Tai(hp1).typ = ait_instruction) And
  1388. (Taicpu(hp1).opcode = A_PUSH) And
  1389. (Taicpu(hp1).oper[0].typ = Top_Const) And
  1390. (Taicpu(hp1).opsize = S_W) Then
  1391. Begin
  1392. Taicpu(p).changeopsize(S_L);
  1393. Taicpu(p).LoadConst(0,Taicpu(p).oper[0].val shl 16 + word(Taicpu(hp1).oper[0].val));
  1394. asml.Remove(hp1);
  1395. hp1.free;
  1396. End;
  1397. End;
  1398. A_SHL, A_SAL:
  1399. Begin
  1400. If (Taicpu(p).oper[0].typ = Top_Const) And
  1401. (Taicpu(p).oper[1].typ = Top_Reg) And
  1402. (Taicpu(p).opsize = S_L) And
  1403. (Taicpu(p).oper[0].val <= 3)
  1404. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1405. Then
  1406. Begin
  1407. TmpBool1 := True; {should we check the next instruction?}
  1408. TmpBool2 := False; {have we found an add/sub which could be
  1409. integrated in the lea?}
  1410. reference_reset(tmpref);
  1411. TmpRef.index := Taicpu(p).oper[1].reg;
  1412. TmpRef.scalefactor := 1 shl Taicpu(p).oper[0].val;
  1413. While TmpBool1 And
  1414. GetNextInstruction(p, hp1) And
  1415. (Tai(hp1).typ = ait_instruction) And
  1416. ((((Taicpu(hp1).opcode = A_ADD) Or
  1417. (Taicpu(hp1).opcode = A_SUB)) And
  1418. (Taicpu(hp1).oper[1].typ = Top_Reg) And
  1419. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg)) or
  1420. (((Taicpu(hp1).opcode = A_INC) or
  1421. (Taicpu(hp1).opcode = A_DEC)) and
  1422. (Taicpu(hp1).oper[0].typ = Top_Reg) and
  1423. (Taicpu(hp1).oper[0].reg = Taicpu(p).oper[1].reg))) Do
  1424. Begin
  1425. TmpBool1 := False;
  1426. If (Taicpu(hp1).oper[0].typ = Top_Const)
  1427. Then
  1428. Begin
  1429. TmpBool1 := True;
  1430. TmpBool2 := True;
  1431. case Taicpu(hp1).opcode of
  1432. A_ADD:
  1433. inc(TmpRef.offset, longint(Taicpu(hp1).oper[0].val));
  1434. A_SUB:
  1435. dec(TmpRef.offset, longint(Taicpu(hp1).oper[0].val));
  1436. end;
  1437. asml.Remove(hp1);
  1438. hp1.free;
  1439. End
  1440. Else
  1441. If (Taicpu(hp1).oper[0].typ = Top_Reg) And
  1442. (((Taicpu(hp1).opcode = A_ADD) And
  1443. (TmpRef.base = R_NO)) or
  1444. (Taicpu(hp1).opcode = A_INC) or
  1445. (Taicpu(hp1).opcode = A_DEC)) Then
  1446. Begin
  1447. TmpBool1 := True;
  1448. TmpBool2 := True;
  1449. case Taicpu(hp1).opcode of
  1450. A_ADD:
  1451. TmpRef.base := Taicpu(hp1).oper[0].reg;
  1452. A_INC:
  1453. inc(TmpRef.offset);
  1454. A_DEC:
  1455. dec(TmpRef.offset);
  1456. end;
  1457. asml.Remove(hp1);
  1458. hp1.free;
  1459. End;
  1460. End;
  1461. If TmpBool2 Or
  1462. ((aktoptprocessor < ClassP6) And
  1463. (Taicpu(p).oper[0].val <= 3) And
  1464. Not(CS_LittleSize in aktglobalswitches))
  1465. Then
  1466. Begin
  1467. If Not(TmpBool2) And
  1468. (Taicpu(p).oper[0].val = 1)
  1469. Then
  1470. Begin
  1471. hp1 := Taicpu.Op_reg_reg(A_ADD,Taicpu(p).opsize,
  1472. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg)
  1473. End
  1474. Else hp1 := Taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1475. Taicpu(p).oper[1].reg);
  1476. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1477. p.free;
  1478. p := hp1;
  1479. End;
  1480. End
  1481. Else
  1482. If (aktoptprocessor < ClassP6) And
  1483. (Taicpu(p).oper[0].typ = top_const) And
  1484. (Taicpu(p).oper[1].typ = top_reg) Then
  1485. If (Taicpu(p).oper[0].val = 1)
  1486. Then
  1487. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1488. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1489. (unlike shl, which is only Tairable in the U pipe)}
  1490. Begin
  1491. hp1 := Taicpu.Op_reg_reg(A_ADD,Taicpu(p).opsize,
  1492. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg);
  1493. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1494. p.free;
  1495. p := hp1;
  1496. End
  1497. Else If (Taicpu(p).opsize = S_L) and
  1498. (Taicpu(p).oper[0].val<= 3) Then
  1499. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1500. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1501. Begin
  1502. reference_reset(tmpref);
  1503. TmpRef.index := Taicpu(p).oper[1].reg;
  1504. TmpRef.scalefactor := 1 shl Taicpu(p).oper[0].val;
  1505. hp1 := Taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, Taicpu(p).oper[1].reg);
  1506. InsertLLItem(AsmL,p.previous, p.next, hp1);
  1507. p.free;
  1508. p := hp1;
  1509. End
  1510. End;
  1511. A_SETcc :
  1512. { changes
  1513. setcc (funcres) setcc reg
  1514. movb (funcres), reg to leave/ret
  1515. leave/ret }
  1516. Begin
  1517. If (Taicpu(p).oper[0].typ = top_ref) And
  1518. GetNextInstruction(p, hp1) And
  1519. GetNextInstruction(hp1, hp2) And
  1520. (hp2.typ = ait_instruction) And
  1521. ((Taicpu(hp2).opcode = A_LEAVE) or
  1522. (Taicpu(hp2).opcode = A_RET)) And
  1523. (Taicpu(p).oper[0].ref^.Base = procinfo^.FramePointer) And
  1524. (Taicpu(p).oper[0].ref^.Index = R_NO) And
  1525. (Taicpu(p).oper[0].ref^.Offset >= procinfo^.Return_Offset) And
  1526. (hp1.typ = ait_instruction) And
  1527. (Taicpu(hp1).opcode = A_MOV) And
  1528. (Taicpu(hp1).opsize = S_B) And
  1529. (Taicpu(hp1).oper[0].typ = top_ref) And
  1530. RefsEqual(Taicpu(hp1).oper[0].ref^, Taicpu(p).oper[0].ref^) Then
  1531. Begin
  1532. Taicpu(p).LoadReg(0,Taicpu(hp1).oper[1].reg);
  1533. asml.Remove(hp1);
  1534. hp1.free;
  1535. End
  1536. End;
  1537. A_SUB:
  1538. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1539. { * change "sub/add const1, reg" or "dec reg" followed by
  1540. "sub const2, reg" to one "sub ..., reg" }
  1541. Begin
  1542. If (Taicpu(p).oper[0].typ = top_const) And
  1543. (Taicpu(p).oper[1].typ = top_reg) Then
  1544. If (Taicpu(p).oper[0].val = 2) And
  1545. (Taicpu(p).oper[1].reg = R_ESP) and
  1546. { Don't do the sub/push optimization if the sub }
  1547. { comes from setting up the stack frame (JM) }
  1548. (not getLastInstruction(p,hp1) or
  1549. (hp1.typ <> ait_instruction) or
  1550. (Taicpu(hp1).opcode <> A_MOV) or
  1551. (Taicpu(hp1).oper[0].typ <> top_reg) or
  1552. (Taicpu(hp1).oper[0].reg <> R_ESP) or
  1553. (Taicpu(hp1).oper[1].typ <> top_reg) or
  1554. (Taicpu(hp1).oper[1].reg <> R_EBP)) then
  1555. Begin
  1556. hp1 := Tai(p.next);
  1557. While Assigned(hp1) And
  1558. (Tai(hp1).typ In [ait_instruction]+SkipInstr) And
  1559. not regReadByInstruction(R_ESP,hp1) and
  1560. not regModifiedByInstruction(R_ESP,hp1) do
  1561. hp1 := Tai(hp1.next);
  1562. If Assigned(hp1) And
  1563. (Tai(hp1).typ = ait_instruction) And
  1564. (Taicpu(hp1).opcode = A_PUSH) And
  1565. (Taicpu(hp1).opsize = S_W)
  1566. Then
  1567. Begin
  1568. Taicpu(hp1).changeopsize(S_L);
  1569. if Taicpu(hp1).oper[0].typ=top_reg then
  1570. Taicpu(hp1).LoadReg(0,rg.makeregsize(Taicpu(hp1).oper[0].reg,OS_32));
  1571. hp1 := Tai(p.next);
  1572. asml.Remove(p);
  1573. p.free;
  1574. p := hp1;
  1575. Continue
  1576. End;
  1577. If DoSubAddOpt(p) Then continue;
  1578. End
  1579. Else If DoSubAddOpt(p) Then Continue
  1580. End;
  1581. A_XOR:
  1582. If (Taicpu(p).oper[0].typ = top_reg) And
  1583. (Taicpu(p).oper[1].typ = top_reg) And
  1584. (Taicpu(p).oper[0].reg = Taicpu(p).oper[1].reg) then
  1585. { temporarily change this to 'mov reg,0' to make it easier }
  1586. { for the CSE. Will be changed back in pass 2 }
  1587. begin
  1588. Taicpu(p).opcode := A_MOV;
  1589. Taicpu(p).loadconst(0,0);
  1590. end;
  1591. End;
  1592. end; { if is_jmp }
  1593. End;
  1594. { ait_label:
  1595. Begin
  1596. If labelCanBeSkipped(Tai_label(p))
  1597. Then
  1598. Begin
  1599. hp1 := Tai(p.next);
  1600. asml.Remove(p);
  1601. p.free;
  1602. p := hp1;
  1603. Continue
  1604. End;
  1605. End;}
  1606. End;
  1607. p:=Tai(p.next);
  1608. end;
  1609. end;
  1610. function isFoldableArithOp(hp1: Taicpu; reg: tregister): boolean;
  1611. begin
  1612. IsFoldableArithOp := False;
  1613. case hp1.opcode of
  1614. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1615. isFoldableArithOp :=
  1616. ((Taicpu(hp1).oper[0].typ = top_const) or
  1617. ((Taicpu(hp1).oper[0].typ = top_reg) and
  1618. (Taicpu(hp1).oper[0].reg <> reg))) and
  1619. (Taicpu(hp1).oper[1].typ = top_reg) and
  1620. (Taicpu(hp1).oper[1].reg = reg);
  1621. A_INC,A_DEC:
  1622. isFoldableArithOp :=
  1623. (Taicpu(hp1).oper[0].typ = top_reg) and
  1624. (Taicpu(hp1).oper[0].reg = reg);
  1625. end;
  1626. end;
  1627. Procedure PeepHoleOptPass2(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  1628. {$ifdef USECMOV}
  1629. function CanBeCMOV(p : Tai) : boolean;
  1630. begin
  1631. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1632. (Taicpu(p).opcode=A_MOV) and
  1633. (Taicpu(p).opsize in [S_L,S_W]) and
  1634. (Taicpu(p).oper[0].typ in [top_reg,top_ref]) and
  1635. (Taicpu(p).oper[1].typ in [top_reg]);
  1636. end;
  1637. {$endif USECMOV}
  1638. var
  1639. p,hp1,hp2: Tai;
  1640. {$ifdef USECMOV}
  1641. l : longint;
  1642. condition : tasmcond;
  1643. hp3: Tai;
  1644. {$endif USECMOV}
  1645. UsedRegs, TmpUsedRegs: TRegSet;
  1646. Begin
  1647. P := BlockStart;
  1648. UsedRegs := [];
  1649. While (P <> BlockEnd) Do
  1650. Begin
  1651. UpdateUsedRegs(UsedRegs, Tai(p.next));
  1652. Case p.Typ Of
  1653. Ait_Instruction:
  1654. Begin
  1655. Case Taicpu(p).opcode Of
  1656. {$ifdef USECMOV}
  1657. A_Jcc:
  1658. if (aktspecificoptprocessor=ClassP6) then
  1659. begin
  1660. { check for
  1661. jCC xxx
  1662. <several movs>
  1663. xxx:
  1664. }
  1665. l:=0;
  1666. GetNextInstruction(p, hp1);
  1667. while assigned(hp1) And
  1668. CanBeCMOV(hp1) do
  1669. begin
  1670. inc(l);
  1671. GetNextInstruction(hp1,hp1);
  1672. end;
  1673. if assigned(hp1) then
  1674. begin
  1675. if FindLabel(tasmlabel(Taicpu(p).oper[0].sym),hp1) then
  1676. begin
  1677. if (l<=4) and (l>0) then
  1678. begin
  1679. condition:=inverse_cond[Taicpu(p).condition];
  1680. GetNextInstruction(p,hp1);
  1681. asml.remove(p);
  1682. p.free;
  1683. p:=hp1;
  1684. repeat
  1685. Taicpu(hp1).opcode:=A_CMOVcc;
  1686. Taicpu(hp1).condition:=condition;
  1687. GetNextInstruction(hp1,hp1);
  1688. until not(assigned(hp1)) or
  1689. not(CanBeCMOV(hp1));
  1690. asml.remove(hp1);
  1691. hp1.free;
  1692. continue;
  1693. end;
  1694. end
  1695. else
  1696. begin
  1697. { check further for
  1698. jCC xxx
  1699. <several movs>
  1700. jmp yyy
  1701. xxx:
  1702. <several movs>
  1703. yyy:
  1704. }
  1705. { hp2 points to jmp xxx }
  1706. hp2:=hp1;
  1707. { skip hp1 to xxx }
  1708. GetNextInstruction(hp1, hp1);
  1709. if assigned(hp2) and
  1710. assigned(hp1) and
  1711. (l<=3) and
  1712. (hp2.typ=ait_instruction) and
  1713. (Taicpu(hp2).is_jmp) and
  1714. (Taicpu(hp2).condition=C_None) and
  1715. FindLabel(tasmlabel(Taicpu(p).oper[0].sym),hp1) then
  1716. begin
  1717. l:=0;
  1718. while assigned(hp1) And
  1719. CanBeCMOV(hp1) do
  1720. begin
  1721. inc(l);
  1722. GetNextInstruction(hp1, hp1);
  1723. end;
  1724. end;
  1725. {
  1726. if assigned(hp1) and
  1727. FindLabel(tasmlabel(Taicpu(hp2).oper[0].sym),hp1) then
  1728. begin
  1729. condition:=inverse_cond[Taicpu(p).condition];
  1730. GetNextInstruction(p,hp1);
  1731. asml.remove(p);
  1732. p.free;
  1733. p:=hp1;
  1734. repeat
  1735. Taicpu(hp1).opcode:=A_CMOVcc;
  1736. Taicpu(hp1).condition:=condition;
  1737. GetNextInstruction(hp1,hp1);
  1738. until not(assigned(hp1)) or
  1739. not(CanBeCMOV(hp1));
  1740. hp2:=hp1.next;
  1741. condition:=inverse_cond[condition];
  1742. asml.remove(hp1.next)
  1743. hp1.next.free;
  1744. asml.remove(hp1);
  1745. hp1.free;
  1746. continue;
  1747. end;
  1748. }
  1749. end;
  1750. end;
  1751. end;
  1752. {$endif USECMOV}
  1753. A_FSTP,A_FISTP:
  1754. if doFpuLoadStoreOpt(asmL,p) then
  1755. continue;
  1756. A_IMUL:
  1757. begin
  1758. if ((Taicpu(p).oper[0].typ = top_const) or
  1759. (Taicpu(p).oper[0].typ = top_symbol)) and
  1760. (Taicpu(p).oper[1].typ = top_reg) and
  1761. ((Taicpu(p).oper[2].typ = top_none) or
  1762. ((Taicpu(p).oper[2].typ = top_reg) and
  1763. (Taicpu(p).oper[2].reg = Taicpu(p).oper[1].reg))) and
  1764. getLastInstruction(p,hp1) and
  1765. (hp1.typ = ait_instruction) and
  1766. (Taicpu(hp1).opcode = A_MOV) and
  1767. (Taicpu(hp1).oper[0].typ = top_reg) and
  1768. (Taicpu(hp1).oper[1].typ = top_reg) and
  1769. (Taicpu(hp1).oper[1].reg = Taicpu(p).oper[1].reg) then
  1770. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1771. begin
  1772. Taicpu(p).ops := 3;
  1773. Taicpu(p).loadreg(1,Taicpu(hp1).oper[0].reg);
  1774. Taicpu(p).loadreg(2,Taicpu(hp1).oper[1].reg);
  1775. asml.remove(hp1);
  1776. hp1.free;
  1777. end;
  1778. end;
  1779. A_MOV:
  1780. Begin
  1781. If (Taicpu(p).oper[0].typ = top_reg) And
  1782. (Taicpu(p).oper[1].typ = top_reg) And
  1783. GetNextInstruction(p, hp1) And
  1784. (hp1.typ = ait_Instruction) And
  1785. ((Taicpu(hp1).opcode = A_MOV) or
  1786. (Taicpu(hp1).opcode = A_MOVZX) or
  1787. (Taicpu(hp1).opcode = A_MOVSX)) And
  1788. (Taicpu(hp1).oper[0].typ = top_ref) And
  1789. (Taicpu(hp1).oper[1].typ = top_reg) And
  1790. ((Taicpu(hp1).oper[0].ref^.Base = Taicpu(p).oper[1].reg) Or
  1791. (Taicpu(hp1).oper[0].ref^.Index = Taicpu(p).oper[1].reg)) And
  1792. (Reg32(Taicpu(hp1).oper[1].reg) = Taicpu(p).oper[1].reg) Then
  1793. {mov reg1, reg2
  1794. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1795. Begin
  1796. If (Taicpu(hp1).oper[0].ref^.Base = Taicpu(p).oper[1].reg) Then
  1797. Taicpu(hp1).oper[0].ref^.Base := Taicpu(p).oper[0].reg;
  1798. If (Taicpu(hp1).oper[0].ref^.Index = Taicpu(p).oper[1].reg) Then
  1799. Taicpu(hp1).oper[0].ref^.Index := Taicpu(p).oper[0].reg;
  1800. asml.Remove(p);
  1801. p.free;
  1802. p := hp1;
  1803. Continue;
  1804. End
  1805. Else If (Taicpu(p).oper[0].typ = top_ref) And
  1806. GetNextInstruction(p,hp1) And
  1807. (hp1.typ = ait_instruction) And
  1808. IsFoldableArithOp(Taicpu(hp1),Taicpu(p).oper[1].reg) And
  1809. GetNextInstruction(hp1,hp2) And
  1810. (hp2.typ = ait_instruction) And
  1811. (Taicpu(hp2).opcode = A_MOV) And
  1812. (Taicpu(hp2).oper[0].typ = top_reg) And
  1813. (Taicpu(hp2).oper[0].reg = Taicpu(p).oper[1].reg) And
  1814. (Taicpu(hp2).oper[1].typ = top_ref) Then
  1815. Begin
  1816. TmpUsedRegs := UsedRegs;
  1817. UpdateUsedRegs(TmpUsedRegs,Tai(hp1.next));
  1818. If (RefsEqual(Taicpu(hp2).oper[1].ref^, Taicpu(p).oper[0].ref^) And
  1819. Not(RegUsedAfterInstruction(Taicpu(p).oper[1].reg,
  1820. hp2, TmpUsedRegs)))
  1821. Then
  1822. { change mov (ref), reg }
  1823. { add/sub/or/... reg2/$const, reg }
  1824. { mov reg, (ref) }
  1825. { # release reg }
  1826. { to add/sub/or/... reg2/$const, (ref) }
  1827. Begin
  1828. case Taicpu(hp1).opcode of
  1829. A_INC,A_DEC:
  1830. Taicpu(hp1).LoadRef(0,Taicpu(p).oper[0].ref^)
  1831. else
  1832. Taicpu(hp1).LoadRef(1,Taicpu(p).oper[0].ref^);
  1833. end;
  1834. asml.Remove(p);
  1835. asml.Remove(hp2);
  1836. p.free;
  1837. hp2.free;
  1838. p := hp1
  1839. End;
  1840. End
  1841. End;
  1842. End;
  1843. End;
  1844. End;
  1845. p := Tai(p.next)
  1846. End;
  1847. End;
  1848. Procedure PostPeepHoleOpts(AsmL: TAAsmOutput; BlockStart, BlockEnd: Tai);
  1849. var
  1850. p,hp1,hp2: Tai;
  1851. Begin
  1852. P := BlockStart;
  1853. While (P <> BlockEnd) Do
  1854. Begin
  1855. Case p.Typ Of
  1856. Ait_Instruction:
  1857. Begin
  1858. Case Taicpu(p).opcode Of
  1859. A_CALL:
  1860. If (AktOptProcessor < ClassP6) And
  1861. GetNextInstruction(p, hp1) And
  1862. (hp1.typ = ait_instruction) And
  1863. (Taicpu(hp1).opcode = A_JMP) And
  1864. (Taicpu(hp1).oper[0].typ = top_symbol) Then
  1865. Begin
  1866. hp2 := Taicpu.Op_sym(A_PUSH,S_L,Taicpu(hp1).oper[0].sym);
  1867. InsertLLItem(AsmL, p.previous, p, hp2);
  1868. Taicpu(p).opcode := A_JMP;
  1869. Taicpu(p).is_jmp := true;
  1870. asml.Remove(hp1);
  1871. hp1.free;
  1872. End;
  1873. A_CMP:
  1874. Begin
  1875. if (Taicpu(p).oper[0].typ = top_const) and
  1876. (Taicpu(p).oper[0].val = 0) and
  1877. (Taicpu(p).oper[1].typ = top_reg) then
  1878. {change "cmp $0, %reg" to "test %reg, %reg"}
  1879. begin
  1880. Taicpu(p).opcode := A_TEST;
  1881. Taicpu(p).loadreg(0,Taicpu(p).oper[1].reg);
  1882. continue;
  1883. end;
  1884. End;
  1885. A_MOV:
  1886. if (Taicpu(p).oper[0].typ = Top_Const) And
  1887. (Taicpu(p).oper[0].val = 0) And
  1888. (Taicpu(p).oper[1].typ = Top_Reg) Then
  1889. { change "mov $0, %reg" into "xor %reg, %reg" }
  1890. Begin
  1891. Taicpu(p).opcode := A_XOR;
  1892. Taicpu(p).LoadReg(0,Taicpu(p).oper[1].reg);
  1893. End;
  1894. A_MOVZX:
  1895. { if register vars are on, it's possible there is code like }
  1896. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1897. { so we can't safely replace the movzx then with xor/mov, }
  1898. { since that would change the flags (JM) }
  1899. if not(cs_regalloc in aktglobalswitches) then
  1900. Begin
  1901. If (Taicpu(p).oper[1].typ = top_reg) Then
  1902. If (Taicpu(p).oper[0].typ = top_reg)
  1903. Then
  1904. Case Taicpu(p).opsize of
  1905. S_BL:
  1906. Begin
  1907. If IsGP32Reg(Taicpu(p).oper[1].reg) And
  1908. Not(CS_LittleSize in aktglobalswitches) And
  1909. (aktoptprocessor = ClassP5)
  1910. Then
  1911. {Change "movzbl %reg1, %reg2" to
  1912. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1913. PentiumMMX}
  1914. Begin
  1915. hp1 := Taicpu.op_reg_reg(A_XOR, S_L,
  1916. Taicpu(p).oper[1].reg, Taicpu(p).oper[1].reg);
  1917. InsertLLItem(AsmL,p.previous, p, hp1);
  1918. Taicpu(p).opcode := A_MOV;
  1919. Taicpu(p).changeopsize(S_B);
  1920. Taicpu(p).LoadReg(1,rg.makeregsize(Taicpu(p).oper[1].reg,OS_8));
  1921. End;
  1922. End;
  1923. End
  1924. Else
  1925. If (Taicpu(p).oper[0].typ = top_ref) And
  1926. (Taicpu(p).oper[0].ref^.base <> Taicpu(p).oper[1].reg) And
  1927. (Taicpu(p).oper[0].ref^.index <> Taicpu(p).oper[1].reg) And
  1928. Not(CS_LittleSize in aktglobalswitches) And
  1929. IsGP32Reg(Taicpu(p).oper[1].reg) And
  1930. (aktoptprocessor = ClassP5) And
  1931. (Taicpu(p).opsize = S_BL)
  1932. Then
  1933. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1934. Pentium and PentiumMMX}
  1935. Begin
  1936. hp1 := Taicpu.Op_reg_reg(A_XOR, S_L, Taicpu(p).oper[1].reg,
  1937. Taicpu(p).oper[1].reg);
  1938. Taicpu(p).opcode := A_MOV;
  1939. Taicpu(p).changeopsize(S_B);
  1940. Taicpu(p).LoadReg(1,rg.makeregsize(Taicpu(p).oper[1].reg,OS_8));
  1941. InsertLLItem(AsmL,p.previous, p, hp1);
  1942. End;
  1943. End;
  1944. A_TEST, A_OR:
  1945. {removes the line marked with (x) from the sequence
  1946. And/or/xor/add/sub/... $x, %y
  1947. test/or %y, %y (x)
  1948. j(n)z _Label
  1949. as the first instruction already adjusts the ZF}
  1950. Begin
  1951. If OpsEqual(Taicpu(p).oper[0],Taicpu(p).oper[1]) Then
  1952. If GetLastInstruction(p, hp1) And
  1953. (Tai(hp1).typ = ait_instruction) Then
  1954. Case Taicpu(hp1).opcode Of
  1955. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  1956. Begin
  1957. If OpsEqual(Taicpu(hp1).oper[1],Taicpu(p).oper[0]) Then
  1958. Begin
  1959. hp1 := Tai(p.next);
  1960. asml.remove(p);
  1961. p.free;
  1962. p := Tai(hp1);
  1963. continue
  1964. End;
  1965. End;
  1966. A_DEC, A_INC, A_NEG:
  1967. Begin
  1968. If OpsEqual(Taicpu(hp1).oper[0],Taicpu(p).oper[0]) Then
  1969. Begin
  1970. Case Taicpu(hp1).opcode Of
  1971. A_DEC, A_INC:
  1972. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1973. Begin
  1974. Case Taicpu(hp1).opcode Of
  1975. A_DEC: Taicpu(hp1).opcode := A_SUB;
  1976. A_INC: Taicpu(hp1).opcode := A_ADD;
  1977. End;
  1978. Taicpu(hp1).Loadoper(1,Taicpu(hp1).oper[0]);
  1979. Taicpu(hp1).LoadConst(0,1);
  1980. Taicpu(hp1).ops:=2;
  1981. End
  1982. End;
  1983. hp1 := Tai(p.next);
  1984. asml.remove(p);
  1985. p.free;
  1986. p := Tai(hp1);
  1987. continue
  1988. End;
  1989. End
  1990. End
  1991. End;
  1992. End;
  1993. End;
  1994. End;
  1995. p := Tai(p.next)
  1996. End;
  1997. End;
  1998. End.
  1999. {
  2000. $Log$
  2001. Revision 1.26 2002-05-16 19:46:52 carl
  2002. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  2003. + try to fix temp allocation (still in ifdef)
  2004. + generic constructor calls
  2005. + start of tassembler / tmodulebase class cleanup
  2006. Revision 1.24 2002/05/12 16:53:18 peter
  2007. * moved entry and exitcode to ncgutil and cgobj
  2008. * foreach gets extra argument for passing local data to the
  2009. iterator function
  2010. * -CR checks also class typecasts at runtime by changing them
  2011. into as
  2012. * fixed compiler to cycle with the -CR option
  2013. * fixed stabs with elf writer, finally the global variables can
  2014. be watched
  2015. * removed a lot of routines from cga unit and replaced them by
  2016. calls to cgobj
  2017. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  2018. u32bit then the other is typecasted also to u32bit without giving
  2019. a rangecheck warning/error.
  2020. * fixed pascal calling method with reversing also the high tree in
  2021. the parast, detected by tcalcst3 test
  2022. Revision 1.23 2002/04/21 15:40:49 carl
  2023. * changeregsize -> rg.makeregsize
  2024. Revision 1.22 2002/04/20 21:37:07 carl
  2025. + generic FPC_CHECKPOINTER
  2026. + first parameter offset in stack now portable
  2027. * rename some constants
  2028. + move some cpu stuff to other units
  2029. - remove unused constents
  2030. * fix stacksize for some targets
  2031. * fix generic size problems which depend now on EXTEND_SIZE constant
  2032. * removing frame pointer in routines is only available for : i386,m68k and vis targets
  2033. Revision 1.21 2002/04/15 19:44:21 peter
  2034. * fixed stackcheck that would be called recursively when a stack
  2035. error was found
  2036. * generic rg.makeregsize(reg,size) for i386 register resizing
  2037. * removed some more routines from cga unit
  2038. * fixed returnvalue handling
  2039. * fixed default stacksize of linux and go32v2, 8kb was a bit small :-)
  2040. Revision 1.20 2002/04/02 20:30:16 jonas
  2041. + support for folding inc/dec in shl/add/sub sequences toa single lea
  2042. instruction
  2043. Revision 1.19 2002/04/02 13:01:58 jonas
  2044. * fixed nasty bug in "and" peepholeoptimization that caused wrong
  2045. optimizations after Peter's big location patch
  2046. Revision 1.18 2002/03/31 20:26:40 jonas
  2047. + a_loadfpu_* and a_loadmm_* methods in tcg
  2048. * register allocation is now handled by a class and is mostly processor
  2049. independent (+rgobj.pas and i386/rgcpu.pas)
  2050. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  2051. * some small improvements and fixes to the optimizer
  2052. * some register allocation fixes
  2053. * some fpuvaroffset fixes in the unary minus node
  2054. * push/popusedregisters is now called rg.save/restoreusedregisters and
  2055. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  2056. also better optimizable)
  2057. * fixed and optimized register saving/restoring for new/dispose nodes
  2058. * LOC_FPU locations now also require their "register" field to be set to
  2059. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  2060. - list field removed of the tnode class because it's not used currently
  2061. and can cause hard-to-find bugs
  2062. Revision 1.17 2001/12/29 15:29:59 jonas
  2063. * powerpc/cgcpu.pas compiles :)
  2064. * several powerpc-related fixes
  2065. * cpuasm unit is now based on common tainst unit
  2066. + nppcmat unit for powerpc (almost complete)
  2067. Revision 1.16 2001/10/12 13:53:24 jonas
  2068. * fixed small crashing bug ("merged")
  2069. * some more optimizations are now only done once at the end of the optimizing
  2070. cycle instead of every iteration
  2071. Revision 1.15 2001/08/26 13:37:01 florian
  2072. * some cg reorganisation
  2073. * some PPC updates
  2074. Revision 1.14 2001/08/01 09:46:55 jonas
  2075. * fixed endless loop with web bug 1571 (merged)
  2076. Revision 1.13 2001/04/13 01:22:19 peter
  2077. * symtable change to classes
  2078. * range check generation and errors fixed, make cycle DEBUG=1 works
  2079. * memory leaks fixed
  2080. Revision 1.12 2001/04/06 14:06:03 jonas
  2081. * fixed incompatibility between new regvar handling and -Op2
  2082. Revision 1.11 2001/04/02 21:20:39 peter
  2083. * resulttype rewrite
  2084. Revision 1.10 2001/02/08 12:13:40 jonas
  2085. * fixed web bug 1391
  2086. Revision 1.9 2001/01/27 21:29:35 florian
  2087. * behavior -Oa optimized
  2088. Revision 1.8 2001/01/10 10:29:36 jonas
  2089. * really fixed problems with -Op2 opts (merged)
  2090. Revision 1.7 2001/01/07 15:49:49 jonas
  2091. * fixed bug in call/jmp optimization with -Op1 and -Op2
  2092. Revision 1.6 2000/12/25 00:07:33 peter
  2093. + new tlinkedlist class (merge of old tstringqueue,tcontainer and
  2094. tlinkedlist objects)
  2095. Revision 1.5 2000/12/16 16:00:12 jonas
  2096. * removed warnings about possible range check errors
  2097. Revision 1.4 2000/11/29 00:30:49 florian
  2098. * unused units removed from uses clause
  2099. * some changes for widestrings
  2100. Revision 1.3 2000/11/14 09:53:18 jonas
  2101. * added missing allocregbetween() (merged)
  2102. Revision 1.2 2000/10/24 10:40:54 jonas
  2103. + register renaming ("fixes" bug1088)
  2104. * changed command line options meanings for optimizer:
  2105. O2 now means peepholopts, CSE and register renaming in 1 pass
  2106. O3 is the same, but repeated until no further optimizations are
  2107. possible or until 5 passes have been done (to avoid endless loops)
  2108. * changed aopt386 so it does this looping
  2109. * added some procedures from csopt386 to the interface because they're
  2110. used by rropt386 as well
  2111. * some changes to csopt386 and daopt386 so that newly added instructions
  2112. by the CSE get optimizer info (they were simply skipped previously),
  2113. this fixes some bugs
  2114. Revision 1.1 2000/10/15 09:47:43 peter
  2115. * moved to i386/
  2116. Revision 1.13 2000/10/02 13:01:29 jonas
  2117. * fixed bug regarding removal of "test/or reg,reg": apparently, shr/shl
  2118. doesn't set the zero flag according to the contents of the register
  2119. after the shift :( (mergfed from fixes branch)
  2120. Revision 1.12 2000/09/24 15:06:23 peter
  2121. * use defines.inc
  2122. Revision 1.11 2000/09/18 11:28:36 jonas
  2123. * fixed web bug 1133 (merged from fixes branch)
  2124. Revision 1.10 2000/08/18 10:09:13 jonas
  2125. * fix for web bug1099 (merged from fixes branch)
  2126. Revision 1.9 2000/08/05 13:33:08 peter
  2127. * $ifdef go32v2 -> target_info.target=go32v2
  2128. Revision 1.8 2000/08/05 10:35:51 jonas
  2129. * readded l1 variable (between ifdef go32v2 to avoid hints/notes)
  2130. Revision 1.7 2000/08/04 22:00:52 peter
  2131. * merges from fixes
  2132. Revision 1.6 2000/07/31 08:44:05 jonas
  2133. - removed imul support from -dfoldarithops since "imull [reg32],[mem32]"
  2134. doesn't exist (merged from fixes branch)
  2135. Revision 1.5 2000/07/28 13:56:23 jonas
  2136. * fixed bug in shr/shl optimization when -Og is used (merged from fixes
  2137. branch)
  2138. Revision 1.4 2000/07/21 15:19:55 jonas
  2139. * daopt386: changes to getnextinstruction/getlastinstruction so they
  2140. ignore labels who have is_addr set
  2141. + daopt386/csopt386: remove loads of registers which are overwritten
  2142. before their contents are used (especially usefull for removing superfluous
  2143. maybe_loadself outputs and push/pops transformed by below optimization
  2144. + popt386: transform pop/pop/pop/.../push/push/push to sequences of
  2145. 'movl x(%esp),%reg' (only active when compiling a go32v2 compiler
  2146. currently because I don't know whether it's safe to do this under Win32/
  2147. Linux (because of problems we had when using esp as frame pointer on
  2148. those os'es)
  2149. Revision 1.3 2000/07/14 05:11:49 michael
  2150. + Patch to 1.1
  2151. Revision 1.2 2000/07/13 11:32:45 michael
  2152. + removed logs
  2153. }