popt386.pas 102 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: taasmoutput; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: taasmoutput; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  35. var
  36. supreg: tsuperregister;
  37. begin
  38. supreg := getsupreg(reg);
  39. UpdateUsedRegs(UsedRegs, tai(p.Next));
  40. RegUsedAfterInstruction :=
  41. (supreg in UsedRegs) and
  42. (not(getNextInstruction(p,p)) or
  43. not(regLoadedWithNewValue(supreg,false,p)));
  44. end;
  45. function doFpuLoadStoreOpt(asmL: TAAsmoutput; var p: tai): boolean;
  46. { returns true if a "continue" should be done after this optimization }
  47. var hp1, hp2: tai;
  48. begin
  49. doFpuLoadStoreOpt := false;
  50. if (taicpu(p).oper[0]^.typ = top_ref) and
  51. getNextInstruction(p, hp1) and
  52. (hp1.typ = ait_instruction) and
  53. (((taicpu(hp1).opcode = A_FLD) and
  54. (taicpu(p).opcode = A_FSTP)) or
  55. ((taicpu(p).opcode = A_FISTP) and
  56. (taicpu(hp1).opcode = A_FILD))) and
  57. (taicpu(hp1).oper[0]^.typ = top_ref) and
  58. (taicpu(hp1).opsize = taicpu(p).opsize) and
  59. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  60. begin
  61. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  62. if (taicpu(p).opsize=S_FX) and
  63. getNextInstruction(hp1, hp2) and
  64. (hp2.typ = ait_instruction) and
  65. ((taicpu(hp2).opcode = A_LEAVE) or
  66. (taicpu(hp2).opcode = A_RET)) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(asmL, p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. { can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. }
  93. end;
  94. end;
  95. procedure PrePeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  96. var
  97. p,hp1: tai;
  98. l: aint;
  99. tmpRef: treference;
  100. begin
  101. p := BlockStart;
  102. while (p <> BlockEnd) Do
  103. begin
  104. case p.Typ Of
  105. Ait_Instruction:
  106. begin
  107. case taicpu(p).opcode Of
  108. A_IMUL:
  109. {changes certain "imul const, %reg"'s to lea sequences}
  110. begin
  111. if (taicpu(p).oper[0]^.typ = Top_Const) and
  112. (taicpu(p).oper[1]^.typ = Top_Reg) and
  113. (taicpu(p).opsize = S_L) then
  114. if (taicpu(p).oper[0]^.val = 1) then
  115. if (taicpu(p).ops = 2) then
  116. {remove "imul $1, reg"}
  117. begin
  118. hp1 := tai(p.Next);
  119. asml.remove(p);
  120. p.free;
  121. p := hp1;
  122. continue;
  123. end
  124. else
  125. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  126. begin
  127. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  128. InsertLLItem(asml, p.previous, p.next, hp1);
  129. p.free;
  130. p := hp1;
  131. end
  132. else if
  133. ((taicpu(p).ops <= 2) or
  134. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  135. (aktoptprocessor < ClassPentium2) and
  136. (taicpu(p).oper[0]^.val <= 12) and
  137. not(CS_LittleSize in aktglobalswitches) and
  138. (not(GetNextInstruction(p, hp1)) or
  139. {GetNextInstruction(p, hp1) and}
  140. not((tai(hp1).typ = ait_instruction) and
  141. ((taicpu(hp1).opcode=A_Jcc) and
  142. (taicpu(hp1).condition in [C_O,C_NO])))) then
  143. begin
  144. reference_reset(tmpref);
  145. case taicpu(p).oper[0]^.val Of
  146. 3: begin
  147. {imul 3, reg1, reg2 to
  148. lea (reg1,reg1,2), reg2
  149. imul 3, reg1 to
  150. lea (reg1,reg1,2), reg1}
  151. TmpRef.base := taicpu(p).oper[1]^.reg;
  152. TmpRef.index := taicpu(p).oper[1]^.reg;
  153. TmpRef.ScaleFactor := 2;
  154. if (taicpu(p).ops = 2) then
  155. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  156. else
  157. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  158. InsertLLItem(asml,p.previous, p.next, hp1);
  159. p.free;
  160. p := hp1;
  161. end;
  162. 5: begin
  163. {imul 5, reg1, reg2 to
  164. lea (reg1,reg1,4), reg2
  165. imul 5, reg1 to
  166. lea (reg1,reg1,4), reg1}
  167. TmpRef.base := taicpu(p).oper[1]^.reg;
  168. TmpRef.index := taicpu(p).oper[1]^.reg;
  169. TmpRef.ScaleFactor := 4;
  170. if (taicpu(p).ops = 2) then
  171. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  172. else
  173. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  174. InsertLLItem(asml,p.previous, p.next, hp1);
  175. p.free;
  176. p := hp1;
  177. end;
  178. 6: begin
  179. {imul 6, reg1, reg2 to
  180. lea (,reg1,2), reg2
  181. lea (reg2,reg1,4), reg2
  182. imul 6, reg1 to
  183. lea (reg1,reg1,2), reg1
  184. add reg1, reg1}
  185. if (aktoptprocessor <= Class386) then
  186. begin
  187. TmpRef.index := taicpu(p).oper[1]^.reg;
  188. if (taicpu(p).ops = 3) then
  189. begin
  190. TmpRef.base := taicpu(p).oper[2]^.reg;
  191. TmpRef.ScaleFactor := 4;
  192. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  193. end
  194. else
  195. begin
  196. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  197. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  198. end;
  199. InsertLLItem(asml,p, p.next, hp1);
  200. reference_reset(tmpref);
  201. TmpRef.index := taicpu(p).oper[1]^.reg;
  202. TmpRef.ScaleFactor := 2;
  203. if (taicpu(p).ops = 3) then
  204. begin
  205. TmpRef.base := NR_NO;
  206. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  207. taicpu(p).oper[2]^.reg);
  208. end
  209. else
  210. begin
  211. TmpRef.base := taicpu(p).oper[1]^.reg;
  212. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  213. end;
  214. InsertLLItem(asml,p.previous, p.next, hp1);
  215. p.free;
  216. p := tai(hp1.next);
  217. end
  218. end;
  219. 9: begin
  220. {imul 9, reg1, reg2 to
  221. lea (reg1,reg1,8), reg2
  222. imul 9, reg1 to
  223. lea (reg1,reg1,8), reg1}
  224. TmpRef.base := taicpu(p).oper[1]^.reg;
  225. TmpRef.index := taicpu(p).oper[1]^.reg;
  226. TmpRef.ScaleFactor := 8;
  227. if (taicpu(p).ops = 2) then
  228. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  229. else
  230. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  231. InsertLLItem(asml,p.previous, p.next, hp1);
  232. p.free;
  233. p := hp1;
  234. end;
  235. 10: begin
  236. {imul 10, reg1, reg2 to
  237. lea (reg1,reg1,4), reg2
  238. add reg2, reg2
  239. imul 10, reg1 to
  240. lea (reg1,reg1,4), reg1
  241. add reg1, reg1}
  242. if (aktoptprocessor <= Class386) then
  243. begin
  244. if (taicpu(p).ops = 3) then
  245. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  246. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  247. else
  248. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  249. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  250. InsertLLItem(asml,p, p.next, hp1);
  251. TmpRef.base := taicpu(p).oper[1]^.reg;
  252. TmpRef.index := taicpu(p).oper[1]^.reg;
  253. TmpRef.ScaleFactor := 4;
  254. if (taicpu(p).ops = 3) then
  255. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  256. else
  257. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  258. InsertLLItem(asml,p.previous, p.next, hp1);
  259. p.free;
  260. p := tai(hp1.next);
  261. end
  262. end;
  263. 12: begin
  264. {imul 12, reg1, reg2 to
  265. lea (,reg1,4), reg2
  266. lea (,reg1,8) reg2
  267. imul 12, reg1 to
  268. lea (reg1,reg1,2), reg1
  269. lea (,reg1,4), reg1}
  270. if (aktoptprocessor <= Class386)
  271. then
  272. begin
  273. TmpRef.index := taicpu(p).oper[1]^.reg;
  274. if (taicpu(p).ops = 3) then
  275. begin
  276. TmpRef.base := taicpu(p).oper[2]^.reg;
  277. TmpRef.ScaleFactor := 8;
  278. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  279. end
  280. else
  281. begin
  282. TmpRef.base := NR_NO;
  283. TmpRef.ScaleFactor := 4;
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  285. end;
  286. InsertLLItem(asml,p, p.next, hp1);
  287. reference_reset(tmpref);
  288. TmpRef.index := taicpu(p).oper[1]^.reg;
  289. if (taicpu(p).ops = 3) then
  290. begin
  291. TmpRef.base := NR_NO;
  292. TmpRef.ScaleFactor := 4;
  293. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  294. end
  295. else
  296. begin
  297. TmpRef.base := taicpu(p).oper[1]^.reg;
  298. TmpRef.ScaleFactor := 2;
  299. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  300. end;
  301. InsertLLItem(asml,p.previous, p.next, hp1);
  302. p.free;
  303. p := tai(hp1.next);
  304. end
  305. end
  306. end;
  307. end;
  308. end;
  309. A_SAR, A_SHR:
  310. {changes the code sequence
  311. shr/sar const1, x
  312. shl const2, x
  313. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  314. begin
  315. if GetNextInstruction(p, hp1) and
  316. (tai(hp1).typ = ait_instruction) and
  317. (taicpu(hp1).opcode = A_SHL) and
  318. (taicpu(p).oper[0]^.typ = top_const) and
  319. (taicpu(hp1).oper[0]^.typ = top_const) and
  320. (taicpu(hp1).opsize = taicpu(p).opsize) and
  321. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  322. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  323. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  324. not(CS_LittleSize in aktglobalswitches) then
  325. { shr/sar const1, %reg
  326. shl const2, %reg
  327. with const1 > const2 }
  328. begin
  329. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  330. taicpu(hp1).opcode := A_AND;
  331. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  332. case taicpu(p).opsize Of
  333. S_L: taicpu(hp1).LoadConst(0,l Xor aint($ffffffff));
  334. S_B: taicpu(hp1).LoadConst(0,l Xor $ff);
  335. S_W: taicpu(hp1).LoadConst(0,l Xor $ffff);
  336. end;
  337. end
  338. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  339. not(CS_LittleSize in aktglobalswitches) then
  340. { shr/sar const1, %reg
  341. shl const2, %reg
  342. with const1 < const2 }
  343. begin
  344. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  345. taicpu(p).opcode := A_AND;
  346. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  347. case taicpu(p).opsize Of
  348. S_L: taicpu(p).LoadConst(0,l Xor aint($ffffffff));
  349. S_B: taicpu(p).LoadConst(0,l Xor $ff);
  350. S_W: taicpu(p).LoadConst(0,l Xor $ffff);
  351. end;
  352. end
  353. else
  354. { shr/sar const1, %reg
  355. shl const2, %reg
  356. with const1 = const2 }
  357. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  358. begin
  359. taicpu(p).opcode := A_AND;
  360. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  361. case taicpu(p).opsize Of
  362. S_B: taicpu(p).LoadConst(0,l Xor $ff);
  363. S_W: taicpu(p).LoadConst(0,l Xor $ffff);
  364. S_L: taicpu(p).LoadConst(0,l Xor aint($ffffffff));
  365. end;
  366. asml.remove(hp1);
  367. hp1.free;
  368. end;
  369. end;
  370. A_XOR:
  371. if (taicpu(p).oper[0]^.typ = top_reg) and
  372. (taicpu(p).oper[1]^.typ = top_reg) and
  373. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  374. { temporarily change this to 'mov reg,0' to make it easier }
  375. { for the CSE. Will be changed back in pass 2 }
  376. begin
  377. taicpu(p).opcode := A_MOV;
  378. taicpu(p).loadconst(0,0);
  379. end;
  380. end;
  381. end;
  382. end;
  383. p := tai(p.next)
  384. end;
  385. end;
  386. procedure PeepHoleOptPass1(Asml: taasmoutput; BlockStart, BlockEnd: tai);
  387. {First pass of peepholeoptimizations}
  388. var
  389. l : longint;
  390. p,hp1,hp2 : tai;
  391. hp3,hp4: tai;
  392. TmpRef: TReference;
  393. UsedRegs, TmpUsedRegs: TRegSet;
  394. TmpBool1, TmpBool2: Boolean;
  395. function SkipLabels(hp: tai; var hp2: tai): boolean;
  396. {skips all labels and returns the next "real" instruction}
  397. begin
  398. while assigned(hp.next) and
  399. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  400. hp := tai(hp.next);
  401. if assigned(hp.next) then
  402. begin
  403. SkipLabels := True;
  404. hp2 := tai(hp.next)
  405. end
  406. else
  407. begin
  408. hp2 := hp;
  409. SkipLabels := False
  410. end;
  411. end;
  412. function GetFinalDestination(asml: taasmoutput; hp: taicpu; level: longint): boolean;
  413. {traces sucessive jumps to their final destination and sets it, e.g.
  414. je l1 je l3
  415. <code> <code>
  416. l1: becomes l1:
  417. je l2 je l3
  418. <code> <code>
  419. l2: l2:
  420. jmp l3 jmp l3
  421. the level parameter denotes how deeep we have already followed the jump,
  422. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  423. var p1, p2: tai;
  424. l: tasmlabel;
  425. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  426. begin
  427. FindAnyLabel := false;
  428. while assigned(hp.next) and
  429. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  430. hp := tai(hp.next);
  431. if assigned(hp.next) and
  432. (tai(hp.next).typ = ait_label) then
  433. begin
  434. FindAnyLabel := true;
  435. l := tai_label(hp.next).l;
  436. end
  437. end;
  438. begin
  439. GetfinalDestination := false;
  440. if level > 20 then
  441. exit;
  442. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  443. if assigned(p1) then
  444. begin
  445. SkipLabels(p1,p1);
  446. if (tai(p1).typ = ait_instruction) and
  447. (taicpu(p1).is_jmp) then
  448. if { the next instruction after the label where the jump hp arrives}
  449. { is unconditional or of the same type as hp, so continue }
  450. (taicpu(p1).condition in [C_None,hp.condition]) or
  451. { the next instruction after the label where the jump hp arrives}
  452. { is the opposite of hp (so this one is never taken), but after }
  453. { that one there is a branch that will be taken, so perform a }
  454. { little hack: set p1 equal to this instruction (that's what the}
  455. { last SkipLabels is for, only works with short bool evaluation)}
  456. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  457. SkipLabels(p1,p2) and
  458. (p2.typ = ait_instruction) and
  459. (taicpu(p2).is_jmp) and
  460. (taicpu(p2).condition in [C_None,hp.condition]) and
  461. SkipLabels(p1,p1)) then
  462. begin
  463. { quick check for loops of the form "l5: ; jmp l5 }
  464. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  465. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  466. exit;
  467. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  468. exit;
  469. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  471. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  472. end
  473. else
  474. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  475. if not FindAnyLabel(p1,l) then
  476. begin
  477. {$ifdef finaldestdebug}
  478. insertllitem(asml,p1,p1.next,tai_comment.Create(
  479. strpnew('previous label inserted'))));
  480. {$endif finaldestdebug}
  481. objectlibrary.getjumplabel(l);
  482. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  483. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. l.increfs;
  486. { this won't work, since the new label isn't in the labeltable }
  487. { so it will fail the rangecheck. Labeltable should become a }
  488. { hashtable to support this: }
  489. { GetFinalDestination(asml, hp); }
  490. end
  491. else
  492. begin
  493. {$ifdef finaldestdebug}
  494. insertllitem(asml,p1,p1.next,tai_comment.Create(
  495. strpnew('next label reused'))));
  496. {$endif finaldestdebug}
  497. l.increfs;
  498. hp.oper[0]^.ref^.symbol := l;
  499. if not GetFinalDestination(asml, hp,succ(level)) then
  500. exit;
  501. end;
  502. end;
  503. GetFinalDestination := true;
  504. end;
  505. function DoSubAddOpt(var p: tai): Boolean;
  506. begin
  507. DoSubAddOpt := False;
  508. if GetLastInstruction(p, hp1) and
  509. (hp1.typ = ait_instruction) and
  510. (taicpu(hp1).opsize = taicpu(p).opsize) then
  511. case taicpu(hp1).opcode Of
  512. A_DEC:
  513. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  514. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  515. begin
  516. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val+1);
  517. asml.remove(hp1);
  518. hp1.free;
  519. end;
  520. A_SUB:
  521. if (taicpu(hp1).oper[0]^.typ = top_const) and
  522. (taicpu(hp1).oper[1]^.typ = top_reg) and
  523. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  524. begin
  525. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  526. asml.remove(hp1);
  527. hp1.free;
  528. end;
  529. A_ADD:
  530. if (taicpu(hp1).oper[0]^.typ = top_const) and
  531. (taicpu(hp1).oper[1]^.typ = top_reg) and
  532. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  533. begin
  534. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  535. asml.remove(hp1);
  536. hp1.free;
  537. if (taicpu(p).oper[0]^.val = 0) then
  538. begin
  539. hp1 := tai(p.next);
  540. asml.remove(p);
  541. p.free;
  542. if not GetLastInstruction(hp1, p) then
  543. p := hp1;
  544. DoSubAddOpt := True;
  545. end
  546. end;
  547. end;
  548. end;
  549. begin
  550. p := BlockStart;
  551. UsedRegs := [];
  552. while (p <> BlockEnd) Do
  553. begin
  554. UpDateUsedRegs(UsedRegs, tai(p.next));
  555. case p.Typ Of
  556. ait_instruction:
  557. begin
  558. { Handle Jmp Optimizations }
  559. if taicpu(p).is_jmp then
  560. begin
  561. {the following if-block removes all code between a jmp and the next label,
  562. because it can never be executed}
  563. if (taicpu(p).opcode = A_JMP) then
  564. begin
  565. while GetNextInstruction(p, hp1) and
  566. (hp1.typ <> ait_label) do
  567. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  568. begin
  569. asml.remove(hp1);
  570. hp1.free;
  571. end
  572. else break;
  573. end;
  574. { remove jumps to a label coming right after them }
  575. if GetNextInstruction(p, hp1) then
  576. begin
  577. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  578. {$warning FIXME removing the first instruction fails}
  579. (p<>blockstart) then
  580. begin
  581. hp2:=tai(hp1.next);
  582. asml.remove(p);
  583. p.free;
  584. p:=hp2;
  585. continue;
  586. end
  587. else
  588. begin
  589. if hp1.typ = ait_label then
  590. SkipLabels(hp1,hp1);
  591. if (tai(hp1).typ=ait_instruction) and
  592. (taicpu(hp1).opcode=A_JMP) and
  593. GetNextInstruction(hp1, hp2) and
  594. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  595. begin
  596. if taicpu(p).opcode=A_Jcc then
  597. begin
  598. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  599. tai_label(hp2).l.decrefs;
  600. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  601. { when free'ing hp1, the ref. isn't decresed, so we don't
  602. increase it (FK)
  603. taicpu(p).oper[0]^.ref^.symbol.increfs;
  604. }
  605. asml.remove(hp1);
  606. hp1.free;
  607. GetFinalDestination(asml, taicpu(p),0);
  608. end
  609. else
  610. begin
  611. GetFinalDestination(asml, taicpu(p),0);
  612. p:=tai(p.next);
  613. continue;
  614. end;
  615. end
  616. else
  617. GetFinalDestination(asml, taicpu(p),0);
  618. end;
  619. end;
  620. end
  621. else
  622. { All other optimizes }
  623. begin
  624. for l := 0 to taicpu(p).ops-1 Do
  625. if (taicpu(p).oper[l]^.typ = top_ref) then
  626. With taicpu(p).oper[l]^.ref^ Do
  627. begin
  628. if (base = NR_NO) and
  629. (index <> NR_NO) and
  630. (scalefactor in [0,1]) then
  631. begin
  632. base := index;
  633. index := NR_NO
  634. end
  635. end;
  636. case taicpu(p).opcode Of
  637. A_AND:
  638. begin
  639. if (taicpu(p).oper[0]^.typ = top_const) and
  640. (taicpu(p).oper[1]^.typ = top_reg) and
  641. GetNextInstruction(p, hp1) and
  642. (tai(hp1).typ = ait_instruction) and
  643. (taicpu(hp1).opcode = A_AND) and
  644. (taicpu(hp1).oper[0]^.typ = top_const) and
  645. (taicpu(hp1).oper[1]^.typ = top_reg) and
  646. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  647. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  648. begin
  649. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  650. asml.remove(hp1);
  651. hp1.free;
  652. end
  653. else
  654. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  655. jump, but only if it's a conditional jump (PFV) }
  656. if (taicpu(p).oper[1]^.typ = top_reg) and
  657. GetNextInstruction(p, hp1) and
  658. (hp1.typ = ait_instruction) and
  659. (taicpu(hp1).is_jmp) and
  660. (taicpu(hp1).opcode<>A_JMP) and
  661. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  662. taicpu(p).opcode := A_TEST;
  663. end;
  664. A_CMP:
  665. begin
  666. if (taicpu(p).oper[0]^.typ = top_const) and
  667. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  668. (taicpu(p).oper[0]^.val = 0) and
  669. GetNextInstruction(p, hp1) and
  670. (hp1.typ = ait_instruction) and
  671. (taicpu(hp1).is_jmp) and
  672. (taicpu(hp1).opcode=A_Jcc) and
  673. (taicpu(hp1).condition in [C_LE,C_BE]) and
  674. GetNextInstruction(hp1,hp2) and
  675. (hp2.typ = ait_instruction) and
  676. (taicpu(hp2).opcode = A_DEC) and
  677. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  678. GetNextInstruction(hp2, hp3) and
  679. (hp3.typ = ait_instruction) and
  680. (taicpu(hp3).is_jmp) and
  681. (taicpu(hp3).opcode = A_JMP) and
  682. GetNextInstruction(hp3, hp4) and
  683. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  684. begin
  685. taicpu(hp2).Opcode := A_SUB;
  686. taicpu(hp2).Loadoper(1,taicpu(hp2).oper[0]^);
  687. taicpu(hp2).LoadConst(0,1);
  688. taicpu(hp2).ops:=2;
  689. taicpu(hp3).Opcode := A_Jcc;
  690. case taicpu(hp1).condition of
  691. C_LE: taicpu(hp3).condition := C_GE;
  692. C_BE: taicpu(hp3).condition := C_AE;
  693. end;
  694. asml.remove(p);
  695. asml.remove(hp1);
  696. p.free;
  697. hp1.free;
  698. p := hp2;
  699. continue;
  700. end
  701. end;
  702. A_FLD:
  703. begin
  704. if (taicpu(p).oper[0]^.typ = top_reg) and
  705. GetNextInstruction(p, hp1) and
  706. (hp1.typ = Ait_Instruction) and
  707. (taicpu(hp1).oper[0]^.typ = top_reg) and
  708. (taicpu(hp1).oper[1]^.typ = top_reg) and
  709. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  710. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  711. { change to
  712. fld reg fxxx reg,st
  713. fxxxp st, st1 (hp1)
  714. Remark: non commutative operations must be reversed!
  715. }
  716. begin
  717. case taicpu(hp1).opcode Of
  718. A_FMULP,A_FADDP,
  719. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  720. begin
  721. case taicpu(hp1).opcode Of
  722. A_FADDP: taicpu(hp1).opcode := A_FADD;
  723. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  724. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  725. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  726. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  727. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  728. end;
  729. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  730. taicpu(hp1).oper[1]^.reg := NR_ST;
  731. asml.remove(p);
  732. p.free;
  733. p := hp1;
  734. continue;
  735. end;
  736. end;
  737. end
  738. else
  739. if (taicpu(p).oper[0]^.typ = top_ref) and
  740. GetNextInstruction(p, hp2) and
  741. (hp2.typ = Ait_Instruction) and
  742. (taicpu(hp2).ops = 2) and
  743. (taicpu(hp2).oper[0]^.typ = top_reg) and
  744. (taicpu(hp2).oper[1]^.typ = top_reg) and
  745. (taicpu(p).opsize in [S_FS, S_FL]) and
  746. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  747. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  748. if GetLastInstruction(p, hp1) and
  749. (hp1.typ = Ait_Instruction) and
  750. ((taicpu(hp1).opcode = A_FLD) or
  751. (taicpu(hp1).opcode = A_FST)) and
  752. (taicpu(hp1).opsize = taicpu(p).opsize) and
  753. (taicpu(hp1).oper[0]^.typ = top_ref) and
  754. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  755. if ((taicpu(hp2).opcode = A_FMULP) or
  756. (taicpu(hp2).opcode = A_FADDP)) then
  757. { change to
  758. fld/fst mem1 (hp1) fld/fst mem1
  759. fld mem1 (p) fadd/
  760. faddp/ fmul st, st
  761. fmulp st, st1 (hp2) }
  762. begin
  763. asml.remove(p);
  764. p.free;
  765. p := hp1;
  766. if (taicpu(hp2).opcode = A_FADDP) then
  767. taicpu(hp2).opcode := A_FADD
  768. else
  769. taicpu(hp2).opcode := A_FMUL;
  770. taicpu(hp2).oper[1]^.reg := NR_ST;
  771. end
  772. else
  773. { change to
  774. fld/fst mem1 (hp1) fld/fst mem1
  775. fld mem1 (p) fld st}
  776. begin
  777. taicpu(p).changeopsize(S_FL);
  778. taicpu(p).loadreg(0,NR_ST);
  779. end
  780. else
  781. begin
  782. case taicpu(hp2).opcode Of
  783. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  784. { change to
  785. fld/fst mem1 (hp1) fld/fst mem1
  786. fld mem2 (p) fxxx mem2
  787. fxxxp st, st1 (hp2) }
  788. begin
  789. case taicpu(hp2).opcode Of
  790. A_FADDP: taicpu(p).opcode := A_FADD;
  791. A_FMULP: taicpu(p).opcode := A_FMUL;
  792. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  793. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  794. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  795. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  796. end;
  797. asml.remove(hp2);
  798. hp2.free;
  799. end
  800. end
  801. end
  802. end;
  803. A_FSTP,A_FISTP:
  804. if doFpuLoadStoreOpt(asmL,p) then
  805. continue;
  806. A_LEA:
  807. begin
  808. {removes seg register prefixes from LEA operations, as they
  809. don't do anything}
  810. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  811. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  812. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  813. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  814. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  815. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  816. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  817. (taicpu(p).oper[0]^.ref^.offset = 0) then
  818. begin
  819. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  820. taicpu(p).oper[1]^.reg);
  821. InsertLLItem(asml,p.previous,p.next, hp1);
  822. p.free;
  823. p := hp1;
  824. continue;
  825. end
  826. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  827. begin
  828. hp1 := tai(p.Next);
  829. asml.remove(p);
  830. p.free;
  831. p := hp1;
  832. continue;
  833. end
  834. else
  835. with taicpu(p).oper[0]^.ref^ do
  836. if (base = taicpu(p).oper[1]^.reg) then
  837. begin
  838. l := offset;
  839. if (l=1) then
  840. begin
  841. taicpu(p).opcode := A_INC;
  842. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  843. taicpu(p).ops := 1
  844. end
  845. else if (l=-1) then
  846. begin
  847. taicpu(p).opcode := A_DEC;
  848. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  849. taicpu(p).ops := 1;
  850. end
  851. else
  852. begin
  853. taicpu(p).opcode := A_ADD;
  854. taicpu(p).loadconst(0,l);
  855. end;
  856. end;
  857. end;
  858. A_MOV:
  859. begin
  860. TmpUsedRegs := UsedRegs;
  861. if (taicpu(p).oper[1]^.typ = top_reg) and
  862. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  863. GetNextInstruction(p, hp1) and
  864. (tai(hp1).typ = ait_instruction) and
  865. (taicpu(hp1).opcode = A_MOV) and
  866. (taicpu(hp1).oper[0]^.typ = top_reg) and
  867. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  868. begin
  869. {we have "mov x, %treg; mov %treg, y}
  870. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  871. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  872. case taicpu(p).oper[0]^.typ Of
  873. top_reg:
  874. begin
  875. { change "mov %reg, %treg; mov %treg, y"
  876. to "mov %reg, y" }
  877. taicpu(p).LoadOper(1,taicpu(hp1).oper[1]^);
  878. asml.remove(hp1);
  879. hp1.free;
  880. continue;
  881. end;
  882. top_ref:
  883. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  884. begin
  885. { change "mov mem, %treg; mov %treg, %reg"
  886. to "mov mem, %reg" }
  887. taicpu(p).Loadoper(1,taicpu(hp1).oper[1]^);
  888. asml.remove(hp1);
  889. hp1.free;
  890. continue;
  891. end;
  892. end
  893. end
  894. else
  895. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  896. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  897. penalty}
  898. if (taicpu(p).oper[0]^.typ = top_reg) and
  899. (taicpu(p).oper[1]^.typ = top_reg) and
  900. GetNextInstruction(p,hp1) and
  901. (tai(hp1).typ = ait_instruction) and
  902. (taicpu(hp1).ops >= 1) and
  903. (taicpu(hp1).oper[0]^.typ = top_reg) and
  904. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  905. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  906. begin
  907. if ((taicpu(hp1).opcode = A_OR) or
  908. (taicpu(hp1).opcode = A_TEST)) and
  909. (taicpu(hp1).oper[1]^.typ = top_reg) and
  910. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  911. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  912. begin
  913. TmpUsedRegs := UsedRegs;
  914. { reg1 will be used after the first instruction, }
  915. { so update the allocation info }
  916. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  917. if GetNextInstruction(hp1, hp2) and
  918. (hp2.typ = ait_instruction) and
  919. taicpu(hp2).is_jmp and
  920. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  921. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  922. "test %reg1, %reg1; jxx" }
  923. begin
  924. taicpu(hp1).Loadoper(0,taicpu(p).oper[0]^);
  925. taicpu(hp1).Loadoper(1,taicpu(p).oper[0]^);
  926. asml.remove(p);
  927. p.free;
  928. p := hp1;
  929. continue
  930. end
  931. else
  932. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  933. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  934. begin
  935. taicpu(hp1).Loadoper(0,taicpu(p).oper[0]^);
  936. taicpu(hp1).Loadoper(1,taicpu(p).oper[0]^);
  937. end;
  938. end
  939. { else
  940. if (taicpu(p.next)^.opcode
  941. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  942. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  943. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  944. end
  945. else
  946. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  947. x >= RetOffset) as it doesn't do anything (it writes either to a
  948. parameter or to the temporary storage room for the function
  949. result)}
  950. if GetNextInstruction(p, hp1) and
  951. (tai(hp1).typ = ait_instruction) then
  952. if ((taicpu(hp1).opcode = A_LEAVE) or
  953. (taicpu(hp1).opcode = A_RET)) and
  954. (taicpu(p).oper[1]^.typ = top_ref) and
  955. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  956. not(assigned(current_procinfo.procdef.funcretsym) and
  957. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  958. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  959. (taicpu(p).oper[0]^.typ = top_reg) then
  960. begin
  961. asml.remove(p);
  962. p.free;
  963. p := hp1;
  964. RemoveLastDeallocForFuncRes(asmL,p);
  965. end
  966. else
  967. if (taicpu(p).oper[0]^.typ = top_reg) and
  968. (taicpu(p).oper[1]^.typ = top_ref) and
  969. (taicpu(p).opsize = taicpu(hp1).opsize) and
  970. (taicpu(hp1).opcode = A_CMP) and
  971. (taicpu(hp1).oper[1]^.typ = top_ref) and
  972. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  973. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  974. begin
  975. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  976. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  977. end;
  978. { Next instruction is also a MOV ? }
  979. if GetNextInstruction(p, hp1) and
  980. (tai(hp1).typ = ait_instruction) and
  981. (taicpu(hp1).opcode = A_MOV) and
  982. (taicpu(hp1).opsize = taicpu(p).opsize) then
  983. begin
  984. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  985. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  986. {mov reg1, mem1 or mov mem1, reg1
  987. mov mem2, reg2 mov reg2, mem2}
  988. begin
  989. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  990. {mov reg1, mem1 or mov mem1, reg1
  991. mov mem2, reg1 mov reg2, mem1}
  992. begin
  993. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  994. { Removes the second statement from
  995. mov reg1, mem1/reg2
  996. mov mem1/reg2, reg1 }
  997. begin
  998. if (taicpu(p).oper[0]^.typ = top_reg) then
  999. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1000. asml.remove(hp1);
  1001. hp1.free;
  1002. end
  1003. else
  1004. begin
  1005. TmpUsedRegs := UsedRegs;
  1006. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1007. if (taicpu(p).oper[1]^.typ = top_ref) and
  1008. { mov reg1, mem1
  1009. mov mem2, reg1 }
  1010. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1011. GetNextInstruction(hp1, hp2) and
  1012. (hp2.typ = ait_instruction) and
  1013. (taicpu(hp2).opcode = A_CMP) and
  1014. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1015. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1016. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1017. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1018. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1019. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1020. { change to
  1021. mov reg1, mem1 mov reg1, mem1
  1022. mov mem2, reg1 cmp reg1, mem2
  1023. cmp mem1, reg1 }
  1024. begin
  1025. asml.remove(hp2);
  1026. hp2.free;
  1027. taicpu(hp1).opcode := A_CMP;
  1028. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1029. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1030. end;
  1031. end;
  1032. end
  1033. else
  1034. begin
  1035. tmpUsedRegs := UsedRegs;
  1036. if GetNextInstruction(hp1, hp2) and
  1037. (taicpu(p).oper[0]^.typ = top_ref) and
  1038. (taicpu(p).oper[1]^.typ = top_reg) and
  1039. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1040. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1041. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1042. (tai(hp2).typ = ait_instruction) and
  1043. (taicpu(hp2).opcode = A_MOV) and
  1044. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1045. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1046. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1047. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1048. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1049. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1050. { mov mem1, %reg1
  1051. mov %reg1, mem2
  1052. mov mem2, reg2
  1053. to:
  1054. mov mem1, reg2
  1055. mov reg2, mem2}
  1056. begin
  1057. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1058. taicpu(p).Loadoper(1,taicpu(hp2).oper[1]^);
  1059. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1060. asml.remove(hp2);
  1061. hp2.free;
  1062. end
  1063. else
  1064. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1065. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1066. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1067. { mov mem1, reg1 mov mem1, reg1
  1068. mov reg1, mem2 mov reg1, mem2
  1069. mov mem2, reg2 mov mem2, reg1
  1070. to: to:
  1071. mov mem1, reg1 mov mem1, reg1
  1072. mov mem1, reg2 mov reg1, mem2
  1073. mov reg1, mem2
  1074. or (if mem1 depends on reg1
  1075. and/or if mem2 depends on reg2)
  1076. to:
  1077. mov mem1, reg1
  1078. mov reg1, mem2
  1079. mov reg1, reg2
  1080. }
  1081. begin
  1082. taicpu(hp1).LoadRef(0,taicpu(p).oper[0]^.ref^);
  1083. taicpu(hp1).LoadReg(1,taicpu(hp2).oper[1]^.reg);
  1084. taicpu(hp2).LoadRef(1,taicpu(hp2).oper[0]^.ref^);
  1085. taicpu(hp2).LoadReg(0,taicpu(p).oper[1]^.reg);
  1086. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1087. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1088. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1089. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1090. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1091. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1092. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1093. end
  1094. else
  1095. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1096. begin
  1097. taicpu(hp2).LoadReg(0,taicpu(hp1).Oper[0]^.reg);
  1098. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1099. end
  1100. else
  1101. begin
  1102. asml.remove(hp2);
  1103. hp2.free;
  1104. end
  1105. end
  1106. end
  1107. else
  1108. (* {movl [mem1],reg1
  1109. movl [mem1],reg2
  1110. to:
  1111. movl [mem1],reg1
  1112. movl reg1,reg2 }
  1113. if (taicpu(p).oper[0]^.typ = top_ref) and
  1114. (taicpu(p).oper[1]^.typ = top_reg) and
  1115. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1116. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1117. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1118. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1119. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1120. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1121. taicpu(hp1).LoadReg(0,taicpu(p).oper[1]^.reg)
  1122. else*)
  1123. { movl const1,[mem1]
  1124. movl [mem1],reg1
  1125. to:
  1126. movl const1,reg1
  1127. movl reg1,[mem1] }
  1128. if (taicpu(p).oper[0]^.typ = top_const) and
  1129. (taicpu(p).oper[1]^.typ = top_ref) and
  1130. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1131. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1132. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1133. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) then
  1134. begin
  1135. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1136. taicpu(hp1).LoadReg(0,taicpu(hp1).oper[1]^.reg);
  1137. taicpu(hp1).LoadRef(1,taicpu(p).oper[1]^.ref^);
  1138. taicpu(p).LoadReg(1,taicpu(hp1).oper[0]^.reg);
  1139. end
  1140. end;
  1141. end;
  1142. A_MOVZX:
  1143. begin
  1144. {removes superfluous And's after movzx's}
  1145. if (taicpu(p).oper[1]^.typ = top_reg) and
  1146. GetNextInstruction(p, hp1) and
  1147. (tai(hp1).typ = ait_instruction) and
  1148. (taicpu(hp1).opcode = A_AND) and
  1149. (taicpu(hp1).oper[0]^.typ = top_const) and
  1150. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1151. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1152. case taicpu(p).opsize Of
  1153. S_BL, S_BW:
  1154. if (taicpu(hp1).oper[0]^.val = $ff) then
  1155. begin
  1156. asml.remove(hp1);
  1157. hp1.free;
  1158. end;
  1159. S_WL:
  1160. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1161. begin
  1162. asml.remove(hp1);
  1163. hp1.free;
  1164. end;
  1165. end;
  1166. {changes some movzx constructs to faster synonims (all examples
  1167. are given with eax/ax, but are also valid for other registers)}
  1168. if (taicpu(p).oper[1]^.typ = top_reg) then
  1169. if (taicpu(p).oper[0]^.typ = top_reg) then
  1170. case taicpu(p).opsize of
  1171. S_BW:
  1172. begin
  1173. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1174. not(CS_LittleSize in aktglobalswitches) then
  1175. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1176. begin
  1177. taicpu(p).opcode := A_AND;
  1178. taicpu(p).changeopsize(S_W);
  1179. taicpu(p).LoadConst(0,$ff);
  1180. end
  1181. else if GetNextInstruction(p, hp1) and
  1182. (tai(hp1).typ = ait_instruction) and
  1183. (taicpu(hp1).opcode = A_AND) and
  1184. (taicpu(hp1).oper[0]^.typ = top_const) and
  1185. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1186. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1187. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1188. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1189. begin
  1190. taicpu(p).opcode := A_MOV;
  1191. taicpu(p).changeopsize(S_W);
  1192. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1193. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1194. end;
  1195. end;
  1196. S_BL:
  1197. begin
  1198. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1199. not(CS_LittleSize in aktglobalswitches) then
  1200. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1201. begin
  1202. taicpu(p).opcode := A_AND;
  1203. taicpu(p).changeopsize(S_L);
  1204. taicpu(p).loadconst(0,$ff)
  1205. end
  1206. else if GetNextInstruction(p, hp1) and
  1207. (tai(hp1).typ = ait_instruction) and
  1208. (taicpu(hp1).opcode = A_AND) and
  1209. (taicpu(hp1).oper[0]^.typ = top_const) and
  1210. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1211. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1212. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1213. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1214. begin
  1215. taicpu(p).opcode := A_MOV;
  1216. taicpu(p).changeopsize(S_L);
  1217. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1218. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1219. end
  1220. end;
  1221. S_WL:
  1222. begin
  1223. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1224. not(CS_LittleSize in aktglobalswitches) then
  1225. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1226. begin
  1227. taicpu(p).opcode := A_AND;
  1228. taicpu(p).changeopsize(S_L);
  1229. taicpu(p).LoadConst(0,$ffff);
  1230. end
  1231. else if GetNextInstruction(p, hp1) and
  1232. (tai(hp1).typ = ait_instruction) and
  1233. (taicpu(hp1).opcode = A_AND) and
  1234. (taicpu(hp1).oper[0]^.typ = top_const) and
  1235. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1236. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1237. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1238. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1239. begin
  1240. taicpu(p).opcode := A_MOV;
  1241. taicpu(p).changeopsize(S_L);
  1242. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1243. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1244. end;
  1245. end;
  1246. end
  1247. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1248. begin
  1249. if GetNextInstruction(p, hp1) and
  1250. (tai(hp1).typ = ait_instruction) and
  1251. (taicpu(hp1).opcode = A_AND) and
  1252. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1253. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1254. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1255. begin
  1256. taicpu(p).opcode := A_MOV;
  1257. case taicpu(p).opsize Of
  1258. S_BL:
  1259. begin
  1260. taicpu(p).changeopsize(S_L);
  1261. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1262. end;
  1263. S_WL:
  1264. begin
  1265. taicpu(p).changeopsize(S_L);
  1266. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1267. end;
  1268. S_BW:
  1269. begin
  1270. taicpu(p).changeopsize(S_W);
  1271. taicpu(hp1).LoadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1272. end;
  1273. end;
  1274. end;
  1275. end;
  1276. end;
  1277. (* should not be generated anymore by the current code generator
  1278. A_POP:
  1279. begin
  1280. if target_info.system=system_i386_go32v2 then
  1281. begin
  1282. { Transform a series of pop/pop/pop/push/push/push to }
  1283. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1284. { because I'm not sure whether they can cope with }
  1285. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1286. { such a problem when using esp as frame pointer (JM) }
  1287. if (taicpu(p).oper[0]^.typ = top_reg) then
  1288. begin
  1289. hp1 := p;
  1290. hp2 := p;
  1291. l := 0;
  1292. while getNextInstruction(hp1,hp1) and
  1293. (hp1.typ = ait_instruction) and
  1294. (taicpu(hp1).opcode = A_POP) and
  1295. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1296. begin
  1297. hp2 := hp1;
  1298. inc(l,4);
  1299. end;
  1300. getLastInstruction(p,hp3);
  1301. l1 := 0;
  1302. while (hp2 <> hp3) and
  1303. assigned(hp1) and
  1304. (hp1.typ = ait_instruction) and
  1305. (taicpu(hp1).opcode = A_PUSH) and
  1306. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1307. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1308. begin
  1309. { change it to a two op operation }
  1310. taicpu(hp2).oper[1]^.typ:=top_none;
  1311. taicpu(hp2).ops:=2;
  1312. taicpu(hp2).opcode := A_MOV;
  1313. taicpu(hp2).Loadoper(1,taicpu(hp1).oper[0]^);
  1314. reference_reset(tmpref);
  1315. tmpRef.base.enum:=R_INTREGISTER;
  1316. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1317. convert_register_to_enum(tmpref.base);
  1318. tmpRef.offset := l;
  1319. taicpu(hp2).loadRef(0,tmpRef);
  1320. hp4 := hp1;
  1321. getNextInstruction(hp1,hp1);
  1322. asml.remove(hp4);
  1323. hp4.free;
  1324. getLastInstruction(hp2,hp2);
  1325. dec(l,4);
  1326. inc(l1);
  1327. end;
  1328. if l <> -4 then
  1329. begin
  1330. inc(l,4);
  1331. for l1 := l1 downto 1 do
  1332. begin
  1333. getNextInstruction(hp2,hp2);
  1334. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1335. end
  1336. end
  1337. end
  1338. end
  1339. else
  1340. begin
  1341. if (taicpu(p).oper[0]^.typ = top_reg) and
  1342. GetNextInstruction(p, hp1) and
  1343. (tai(hp1).typ=ait_instruction) and
  1344. (taicpu(hp1).opcode=A_PUSH) and
  1345. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1346. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1347. begin
  1348. { change it to a two op operation }
  1349. taicpu(p).oper[1]^.typ:=top_none;
  1350. taicpu(p).ops:=2;
  1351. taicpu(p).opcode := A_MOV;
  1352. taicpu(p).Loadoper(1,taicpu(p).oper[0]^);
  1353. reference_reset(tmpref);
  1354. TmpRef.base.enum := R_ESP;
  1355. taicpu(p).LoadRef(0,TmpRef);
  1356. asml.remove(hp1);
  1357. hp1.free;
  1358. end;
  1359. end;
  1360. end;
  1361. *)
  1362. A_PUSH:
  1363. begin
  1364. if (taicpu(p).opsize = S_W) and
  1365. (taicpu(p).oper[0]^.typ = Top_Const) and
  1366. GetNextInstruction(p, hp1) and
  1367. (tai(hp1).typ = ait_instruction) and
  1368. (taicpu(hp1).opcode = A_PUSH) and
  1369. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1370. (taicpu(hp1).opsize = S_W) then
  1371. begin
  1372. taicpu(p).changeopsize(S_L);
  1373. taicpu(p).LoadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1374. asml.remove(hp1);
  1375. hp1.free;
  1376. end;
  1377. end;
  1378. A_SHL, A_SAL:
  1379. begin
  1380. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1381. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1382. (taicpu(p).opsize = S_L) and
  1383. (taicpu(p).oper[0]^.val <= 3) then
  1384. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1385. begin
  1386. TmpBool1 := True; {should we check the next instruction?}
  1387. TmpBool2 := False; {have we found an add/sub which could be
  1388. integrated in the lea?}
  1389. reference_reset(tmpref);
  1390. TmpRef.index := taicpu(p).oper[1]^.reg;
  1391. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1392. while TmpBool1 and
  1393. GetNextInstruction(p, hp1) and
  1394. (tai(hp1).typ = ait_instruction) and
  1395. ((((taicpu(hp1).opcode = A_ADD) or
  1396. (taicpu(hp1).opcode = A_SUB)) and
  1397. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1398. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1399. (((taicpu(hp1).opcode = A_INC) or
  1400. (taicpu(hp1).opcode = A_DEC)) and
  1401. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1402. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) Do
  1403. begin
  1404. TmpBool1 := False;
  1405. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1406. begin
  1407. TmpBool1 := True;
  1408. TmpBool2 := True;
  1409. case taicpu(hp1).opcode of
  1410. A_ADD:
  1411. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1412. A_SUB:
  1413. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1414. end;
  1415. asml.remove(hp1);
  1416. hp1.free;
  1417. end
  1418. else
  1419. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1420. (((taicpu(hp1).opcode = A_ADD) and
  1421. (TmpRef.base = NR_NO)) or
  1422. (taicpu(hp1).opcode = A_INC) or
  1423. (taicpu(hp1).opcode = A_DEC)) then
  1424. begin
  1425. TmpBool1 := True;
  1426. TmpBool2 := True;
  1427. case taicpu(hp1).opcode of
  1428. A_ADD:
  1429. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1430. A_INC:
  1431. inc(TmpRef.offset);
  1432. A_DEC:
  1433. dec(TmpRef.offset);
  1434. end;
  1435. asml.remove(hp1);
  1436. hp1.free;
  1437. end;
  1438. end;
  1439. if TmpBool2 or
  1440. ((aktoptprocessor < ClassPentium2) and
  1441. (taicpu(p).oper[0]^.val <= 3) and
  1442. not(CS_LittleSize in aktglobalswitches)) then
  1443. begin
  1444. if not(TmpBool2) and
  1445. (taicpu(p).oper[0]^.val = 1) then
  1446. begin
  1447. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1448. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1449. end
  1450. else
  1451. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1452. taicpu(p).oper[1]^.reg);
  1453. InsertLLItem(asml,p.previous, p.next, hp1);
  1454. p.free;
  1455. p := hp1;
  1456. end;
  1457. end
  1458. else
  1459. if (aktoptprocessor < ClassPentium2) and
  1460. (taicpu(p).oper[0]^.typ = top_const) and
  1461. (taicpu(p).oper[1]^.typ = top_reg) then
  1462. if (taicpu(p).oper[0]^.val = 1) then
  1463. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1464. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1465. (unlike shl, which is only Tairable in the U pipe)}
  1466. begin
  1467. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1468. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1469. InsertLLItem(asml,p.previous, p.next, hp1);
  1470. p.free;
  1471. p := hp1;
  1472. end
  1473. else if (taicpu(p).opsize = S_L) and
  1474. (taicpu(p).oper[0]^.val<= 3) then
  1475. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1476. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1477. begin
  1478. reference_reset(tmpref);
  1479. TmpRef.index := taicpu(p).oper[1]^.reg;
  1480. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1481. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1482. InsertLLItem(asml,p.previous, p.next, hp1);
  1483. p.free;
  1484. p := hp1;
  1485. end
  1486. end;
  1487. A_SETcc :
  1488. { changes
  1489. setcc (funcres) setcc reg
  1490. movb (funcres), reg to leave/ret
  1491. leave/ret }
  1492. begin
  1493. if (taicpu(p).oper[0]^.typ = top_ref) and
  1494. GetNextInstruction(p, hp1) and
  1495. GetNextInstruction(hp1, hp2) and
  1496. (hp2.typ = ait_instruction) and
  1497. ((taicpu(hp2).opcode = A_LEAVE) or
  1498. (taicpu(hp2).opcode = A_RET)) and
  1499. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1500. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1501. not(assigned(current_procinfo.procdef.funcretsym) and
  1502. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1503. (hp1.typ = ait_instruction) and
  1504. (taicpu(hp1).opcode = A_MOV) and
  1505. (taicpu(hp1).opsize = S_B) and
  1506. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1507. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1508. begin
  1509. taicpu(p).LoadReg(0,taicpu(hp1).oper[1]^.reg);
  1510. asml.remove(hp1);
  1511. hp1.free;
  1512. end
  1513. end;
  1514. A_SUB:
  1515. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1516. { * change "sub/add const1, reg" or "dec reg" followed by
  1517. "sub const2, reg" to one "sub ..., reg" }
  1518. begin
  1519. if (taicpu(p).oper[0]^.typ = top_const) and
  1520. (taicpu(p).oper[1]^.typ = top_reg) then
  1521. if (taicpu(p).oper[0]^.val = 2) and
  1522. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1523. { Don't do the sub/push optimization if the sub }
  1524. { comes from setting up the stack frame (JM) }
  1525. (not getLastInstruction(p,hp1) or
  1526. (hp1.typ <> ait_instruction) or
  1527. (taicpu(hp1).opcode <> A_MOV) or
  1528. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1529. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1530. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1531. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1532. begin
  1533. hp1 := tai(p.next);
  1534. while Assigned(hp1) and
  1535. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1536. not regReadByInstruction(RS_ESP,hp1) and
  1537. not regModifiedByInstruction(RS_ESP,hp1) do
  1538. hp1 := tai(hp1.next);
  1539. if Assigned(hp1) and
  1540. (tai(hp1).typ = ait_instruction) and
  1541. (taicpu(hp1).opcode = A_PUSH) and
  1542. (taicpu(hp1).opsize = S_W) then
  1543. begin
  1544. taicpu(hp1).changeopsize(S_L);
  1545. if taicpu(hp1).oper[0]^.typ=top_reg then
  1546. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1547. hp1 := tai(p.next);
  1548. asml.remove(p);
  1549. p.free;
  1550. p := hp1;
  1551. continue
  1552. end;
  1553. if DoSubAddOpt(p) then
  1554. continue;
  1555. end
  1556. else if DoSubAddOpt(p) then
  1557. continue
  1558. end;
  1559. end;
  1560. end; { if is_jmp }
  1561. end;
  1562. end;
  1563. updateUsedRegs(UsedRegs,p);
  1564. p:=tai(p.next);
  1565. end;
  1566. end;
  1567. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  1568. begin
  1569. isFoldableArithOp := False;
  1570. case hp1.opcode of
  1571. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  1572. isFoldableArithOp :=
  1573. ((taicpu(hp1).oper[0]^.typ = top_const) or
  1574. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  1575. (taicpu(hp1).oper[0]^.reg <> reg))) and
  1576. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1577. (taicpu(hp1).oper[1]^.reg = reg);
  1578. A_INC,A_DEC:
  1579. isFoldableArithOp :=
  1580. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1581. (taicpu(hp1).oper[0]^.reg = reg);
  1582. end;
  1583. end;
  1584. procedure PeepHoleOptPass2(asml: taasmoutput; BlockStart, BlockEnd: tai);
  1585. {$ifdef USECMOV}
  1586. function CanBeCMOV(p : tai) : boolean;
  1587. begin
  1588. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1589. (taicpu(p).opcode=A_MOV) and
  1590. (taicpu(p).opsize in [S_L,S_W]) and
  1591. ((taicpu(p).oper[0]^.typ = top_reg)
  1592. { we can't use cmov ref,reg because
  1593. ref could be nil and cmov still throws an exception
  1594. if ref=nil but the mov isn't done (FK)
  1595. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1596. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1597. }
  1598. ) and
  1599. (taicpu(p).oper[1]^.typ in [top_reg]);
  1600. end;
  1601. {$endif USECMOV}
  1602. var
  1603. p,hp1,hp2: tai;
  1604. {$ifdef USECMOV}
  1605. l : longint;
  1606. condition : tasmcond;
  1607. hp3: tai;
  1608. {$endif USECMOV}
  1609. UsedRegs, TmpUsedRegs: TRegSet;
  1610. begin
  1611. p := BlockStart;
  1612. UsedRegs := [];
  1613. while (p <> BlockEnd) Do
  1614. begin
  1615. UpdateUsedRegs(UsedRegs, tai(p.next));
  1616. case p.Typ Of
  1617. Ait_Instruction:
  1618. begin
  1619. case taicpu(p).opcode Of
  1620. {$ifdef USECMOV}
  1621. A_Jcc:
  1622. if (aktspecificoptprocessor>=ClassPentium2) then
  1623. begin
  1624. { check for
  1625. jCC xxx
  1626. <several movs>
  1627. xxx:
  1628. }
  1629. l:=0;
  1630. GetNextInstruction(p, hp1);
  1631. while assigned(hp1) and
  1632. CanBeCMOV(hp1) and
  1633. { stop on labels }
  1634. not(hp1.typ=ait_label) do
  1635. begin
  1636. inc(l);
  1637. GetNextInstruction(hp1,hp1);
  1638. end;
  1639. if assigned(hp1) then
  1640. begin
  1641. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1642. begin
  1643. if (l<=4) and (l>0) then
  1644. begin
  1645. condition:=inverse_cond(taicpu(p).condition);
  1646. hp2:=p;
  1647. GetNextInstruction(p,hp1);
  1648. p:=hp1;
  1649. repeat
  1650. taicpu(hp1).opcode:=A_CMOVcc;
  1651. taicpu(hp1).condition:=condition;
  1652. GetNextInstruction(hp1,hp1);
  1653. until not(assigned(hp1)) or
  1654. not(CanBeCMOV(hp1));
  1655. { wait with removing else GetNextInstruction could
  1656. ignore the label if it was the only usage in the
  1657. jump moved away }
  1658. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1659. asml.remove(hp2);
  1660. hp2.free;
  1661. continue;
  1662. end;
  1663. end
  1664. else
  1665. begin
  1666. { check further for
  1667. jCC xxx
  1668. <several movs 1>
  1669. jmp yyy
  1670. xxx:
  1671. <several movs 2>
  1672. yyy:
  1673. }
  1674. { hp2 points to jmp yyy }
  1675. hp2:=hp1;
  1676. { skip hp1 to xxx }
  1677. GetNextInstruction(hp1, hp1);
  1678. if assigned(hp2) and
  1679. assigned(hp1) and
  1680. (l<=3) and
  1681. (hp2.typ=ait_instruction) and
  1682. (taicpu(hp2).is_jmp) and
  1683. (taicpu(hp2).condition=C_None) and
  1684. { real label and jump, no further references to the
  1685. label are allowed }
  1686. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1687. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1688. begin
  1689. l:=0;
  1690. { skip hp1 to <several moves 2> }
  1691. GetNextInstruction(hp1, hp1);
  1692. while assigned(hp1) and
  1693. CanBeCMOV(hp1) do
  1694. begin
  1695. inc(l);
  1696. GetNextInstruction(hp1, hp1);
  1697. end;
  1698. { hp1 points to yyy: }
  1699. if assigned(hp1) and
  1700. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1701. begin
  1702. condition:=inverse_cond(taicpu(p).condition);
  1703. GetNextInstruction(p,hp1);
  1704. hp3:=p;
  1705. p:=hp1;
  1706. repeat
  1707. taicpu(hp1).opcode:=A_CMOVcc;
  1708. taicpu(hp1).condition:=condition;
  1709. GetNextInstruction(hp1,hp1);
  1710. until not(assigned(hp1)) or
  1711. not(CanBeCMOV(hp1));
  1712. { hp2 is still at jmp yyy }
  1713. GetNextInstruction(hp2,hp1);
  1714. { hp2 is now at xxx: }
  1715. condition:=inverse_cond(condition);
  1716. GetNextInstruction(hp1,hp1);
  1717. { hp1 is now at <several movs 2> }
  1718. repeat
  1719. taicpu(hp1).opcode:=A_CMOVcc;
  1720. taicpu(hp1).condition:=condition;
  1721. GetNextInstruction(hp1,hp1);
  1722. until not(assigned(hp1)) or
  1723. not(CanBeCMOV(hp1));
  1724. {
  1725. asml.remove(hp1.next)
  1726. hp1.next.free;
  1727. asml.remove(hp1);
  1728. hp1.free;
  1729. }
  1730. { remove jCC }
  1731. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1732. asml.remove(hp3);
  1733. hp3.free;
  1734. { remove jmp }
  1735. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1736. asml.remove(hp2);
  1737. hp2.free;
  1738. continue;
  1739. end;
  1740. end;
  1741. end;
  1742. end;
  1743. end;
  1744. {$endif USECMOV}
  1745. A_FSTP,A_FISTP:
  1746. if doFpuLoadStoreOpt(asmL,p) then
  1747. continue;
  1748. A_IMUL:
  1749. begin
  1750. if (taicpu(p).ops >= 2) and
  1751. ((taicpu(p).oper[0]^.typ = top_const) or
  1752. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1753. (taicpu(p).oper[1]^.typ = top_reg) and
  1754. ((taicpu(p).ops = 2) or
  1755. ((taicpu(p).oper[2]^.typ = top_reg) and
  1756. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1757. getLastInstruction(p,hp1) and
  1758. (hp1.typ = ait_instruction) and
  1759. (taicpu(hp1).opcode = A_MOV) and
  1760. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1761. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1762. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1763. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1764. begin
  1765. taicpu(p).ops := 3;
  1766. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1767. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1768. asml.remove(hp1);
  1769. hp1.free;
  1770. end;
  1771. end;
  1772. A_MOV:
  1773. begin
  1774. if (taicpu(p).oper[0]^.typ = top_reg) and
  1775. (taicpu(p).oper[1]^.typ = top_reg) and
  1776. GetNextInstruction(p, hp1) and
  1777. (hp1.typ = ait_Instruction) and
  1778. ((taicpu(hp1).opcode = A_MOV) or
  1779. (taicpu(hp1).opcode = A_MOVZX) or
  1780. (taicpu(hp1).opcode = A_MOVSX)) and
  1781. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1782. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1783. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1784. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1785. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1786. {mov reg1, reg2
  1787. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1788. begin
  1789. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1790. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1791. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1792. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1793. asml.remove(p);
  1794. p.free;
  1795. p := hp1;
  1796. continue;
  1797. end
  1798. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1799. GetNextInstruction(p,hp1) and
  1800. (hp1.typ = ait_instruction) and
  1801. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1802. GetNextInstruction(hp1,hp2) and
  1803. (hp2.typ = ait_instruction) and
  1804. (taicpu(hp2).opcode = A_MOV) and
  1805. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1806. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1807. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1808. begin
  1809. TmpUsedRegs := UsedRegs;
  1810. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1811. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1812. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  1813. hp2, TmpUsedRegs))) then
  1814. { change mov (ref), reg }
  1815. { add/sub/or/... reg2/$const, reg }
  1816. { mov reg, (ref) }
  1817. { # release reg }
  1818. { to add/sub/or/... reg2/$const, (ref) }
  1819. begin
  1820. case taicpu(hp1).opcode of
  1821. A_INC,A_DEC:
  1822. taicpu(hp1).LoadRef(0,taicpu(p).oper[0]^.ref^)
  1823. else
  1824. taicpu(hp1).LoadRef(1,taicpu(p).oper[0]^.ref^);
  1825. end;
  1826. asml.remove(p);
  1827. asml.remove(hp2);
  1828. p.free;
  1829. hp2.free;
  1830. p := hp1
  1831. end;
  1832. end
  1833. end;
  1834. end;
  1835. end;
  1836. end;
  1837. p := tai(p.next)
  1838. end;
  1839. end;
  1840. procedure PostPeepHoleOpts(asml: taasmoutput; BlockStart, BlockEnd: tai);
  1841. var
  1842. p,hp1,hp2: tai;
  1843. begin
  1844. p := BlockStart;
  1845. while (p <> BlockEnd) Do
  1846. begin
  1847. case p.Typ Of
  1848. Ait_Instruction:
  1849. begin
  1850. case taicpu(p).opcode Of
  1851. A_CALL:
  1852. if (AktOptProcessor < ClassPentium2) and
  1853. GetNextInstruction(p, hp1) and
  1854. (hp1.typ = ait_instruction) and
  1855. (taicpu(hp1).opcode = A_JMP) and
  1856. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1857. begin
  1858. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1859. InsertLLItem(asml, p.previous, p, hp2);
  1860. taicpu(p).opcode := A_JMP;
  1861. taicpu(p).is_jmp := true;
  1862. asml.remove(hp1);
  1863. hp1.free;
  1864. end;
  1865. A_CMP:
  1866. begin
  1867. if (taicpu(p).oper[0]^.typ = top_const) and
  1868. (taicpu(p).oper[0]^.val = 0) and
  1869. (taicpu(p).oper[1]^.typ = top_reg) then
  1870. {change "cmp $0, %reg" to "test %reg, %reg"}
  1871. begin
  1872. taicpu(p).opcode := A_TEST;
  1873. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1874. continue;
  1875. end;
  1876. end;
  1877. (*
  1878. Optimization is not safe; xor clears the carry flag.
  1879. See test/tgadint64 in the test suite.
  1880. A_MOV:
  1881. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1882. (taicpu(p).oper[0]^.val = 0) and
  1883. (taicpu(p).oper[1]^.typ = Top_Reg) then
  1884. { change "mov $0, %reg" into "xor %reg, %reg" }
  1885. begin
  1886. taicpu(p).opcode := A_XOR;
  1887. taicpu(p).LoadReg(0,taicpu(p).oper[1]^.reg);
  1888. end;
  1889. *)
  1890. A_MOVZX:
  1891. { if register vars are on, it's possible there is code like }
  1892. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1893. { so we can't safely replace the movzx then with xor/mov, }
  1894. { since that would change the flags (JM) }
  1895. if not(cs_regvars in aktglobalswitches) then
  1896. begin
  1897. if (taicpu(p).oper[1]^.typ = top_reg) then
  1898. if (taicpu(p).oper[0]^.typ = top_reg)
  1899. then
  1900. case taicpu(p).opsize of
  1901. S_BL:
  1902. begin
  1903. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  1904. not(CS_LittleSize in aktglobalswitches) and
  1905. (aktoptprocessor = ClassPentium) then
  1906. {Change "movzbl %reg1, %reg2" to
  1907. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1908. PentiumMMX}
  1909. begin
  1910. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1911. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1912. InsertLLItem(asml,p.previous, p, hp1);
  1913. taicpu(p).opcode := A_MOV;
  1914. taicpu(p).changeopsize(S_B);
  1915. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1916. end;
  1917. end;
  1918. end
  1919. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1920. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1921. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1922. not(CS_LittleSize in aktglobalswitches) and
  1923. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  1924. (aktoptprocessor = ClassPentium) and
  1925. (taicpu(p).opsize = S_BL) then
  1926. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1927. Pentium and PentiumMMX}
  1928. begin
  1929. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1930. taicpu(p).oper[1]^.reg);
  1931. taicpu(p).opcode := A_MOV;
  1932. taicpu(p).changeopsize(S_B);
  1933. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1934. InsertLLItem(asml,p.previous, p, hp1);
  1935. end;
  1936. end;
  1937. A_TEST, A_OR:
  1938. {removes the line marked with (x) from the sequence
  1939. and/or/xor/add/sub/... $x, %y
  1940. test/or %y, %y (x)
  1941. j(n)z _Label
  1942. as the first instruction already adjusts the ZF}
  1943. begin
  1944. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1945. if GetLastInstruction(p, hp1) and
  1946. (tai(hp1).typ = ait_instruction) then
  1947. case taicpu(hp1).opcode Of
  1948. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  1949. begin
  1950. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1951. begin
  1952. hp1 := tai(p.next);
  1953. asml.remove(p);
  1954. p.free;
  1955. p := tai(hp1);
  1956. continue
  1957. end;
  1958. end;
  1959. A_DEC, A_INC, A_NEG:
  1960. begin
  1961. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) then
  1962. begin
  1963. case taicpu(hp1).opcode Of
  1964. A_DEC, A_INC:
  1965. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1966. begin
  1967. case taicpu(hp1).opcode Of
  1968. A_DEC: taicpu(hp1).opcode := A_SUB;
  1969. A_INC: taicpu(hp1).opcode := A_ADD;
  1970. end;
  1971. taicpu(hp1).Loadoper(1,taicpu(hp1).oper[0]^);
  1972. taicpu(hp1).LoadConst(0,1);
  1973. taicpu(hp1).ops:=2;
  1974. end
  1975. end;
  1976. hp1 := tai(p.next);
  1977. asml.remove(p);
  1978. p.free;
  1979. p := tai(hp1);
  1980. continue
  1981. end;
  1982. end
  1983. end
  1984. end;
  1985. end;
  1986. end;
  1987. end;
  1988. p := tai(p.next)
  1989. end;
  1990. end;
  1991. end.