popt386.pas 126 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  22. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  26. implementation
  27. uses
  28. cutils,globtype,systems,
  29. globals,cgbase,procinfo,
  30. symsym,
  31. {$ifdef finaldestdebug}
  32. cobjects,
  33. {$endif finaldestdebug}
  34. cpuinfo,cpubase,cgutils,daopt386,
  35. cgx86;
  36. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  37. begin
  38. isFoldableArithOp := False;
  39. case hp1.opcode of
  40. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  41. isFoldableArithOp :=
  42. ((taicpu(hp1).oper[0]^.typ = top_const) or
  43. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  44. (taicpu(hp1).oper[0]^.reg <> reg))) and
  45. (taicpu(hp1).oper[1]^.typ = top_reg) and
  46. (taicpu(hp1).oper[1]^.reg = reg);
  47. A_INC,A_DEC,A_NEG,A_NOT:
  48. isFoldableArithOp :=
  49. (taicpu(hp1).oper[0]^.typ = top_reg) and
  50. (taicpu(hp1).oper[0]^.reg = reg);
  51. end;
  52. end;
  53. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  54. var
  55. supreg: tsuperregister;
  56. begin
  57. supreg := getsupreg(reg);
  58. UpdateUsedRegs(UsedRegs, tai(p.Next));
  59. RegUsedAfterInstruction :=
  60. (supreg in UsedRegs) and
  61. (not(getNextInstruction(p,p)) or
  62. not(regLoadedWithNewValue(supreg,false,p)));
  63. end;
  64. function IsExitCode(p : tai) : boolean;
  65. var
  66. hp2,hp3 : tai;
  67. begin
  68. result:=(p.typ=ait_instruction) and
  69. ((taicpu(p).opcode = A_RET) or
  70. ((taicpu(p).opcode=A_LEAVE) and
  71. GetNextInstruction(p,hp2) and
  72. (hp2.typ=ait_instruction) and
  73. (taicpu(hp2).opcode=A_RET)
  74. ) or
  75. ((taicpu(p).opcode=A_MOV) and
  76. (taicpu(p).oper[0]^.typ=top_reg) and
  77. (taicpu(p).oper[0]^.reg=NR_EBP) and
  78. (taicpu(p).oper[1]^.typ=top_reg) and
  79. (taicpu(p).oper[1]^.reg=NR_ESP) and
  80. GetNextInstruction(p,hp2) and
  81. (hp2.typ=ait_instruction) and
  82. (taicpu(hp2).opcode=A_POP) and
  83. (taicpu(hp2).oper[0]^.typ=top_reg) and
  84. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  85. GetNextInstruction(hp2,hp3) and
  86. (hp3.typ=ait_instruction) and
  87. (taicpu(hp3).opcode=A_RET)
  88. )
  89. );
  90. end;
  91. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  92. { returns true if a "continue" should be done after this optimization }
  93. var hp1, hp2: tai;
  94. begin
  95. doFpuLoadStoreOpt := false;
  96. if (taicpu(p).oper[0]^.typ = top_ref) and
  97. getNextInstruction(p, hp1) and
  98. (hp1.typ = ait_instruction) and
  99. (((taicpu(hp1).opcode = A_FLD) and
  100. (taicpu(p).opcode = A_FSTP)) or
  101. ((taicpu(p).opcode = A_FISTP) and
  102. (taicpu(hp1).opcode = A_FILD))) and
  103. (taicpu(hp1).oper[0]^.typ = top_ref) and
  104. (taicpu(hp1).opsize = taicpu(p).opsize) and
  105. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  106. begin
  107. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  108. if (taicpu(p).opsize=S_FX) and
  109. getNextInstruction(hp1, hp2) and
  110. (hp2.typ = ait_instruction) and
  111. IsExitCode(hp2) and
  112. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  113. not(assigned(current_procinfo.procdef.funcretsym) and
  114. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  115. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  116. begin
  117. asml.remove(p);
  118. asml.remove(hp1);
  119. p.free;
  120. hp1.free;
  121. p := hp2;
  122. removeLastDeallocForFuncRes(asmL, p);
  123. doFPULoadStoreOpt := true;
  124. end
  125. (* can't be done because the store operation rounds
  126. else
  127. { fst can't store an extended value! }
  128. if (taicpu(p).opsize <> S_FX) and
  129. (taicpu(p).opsize <> S_IQ) then
  130. begin
  131. if (taicpu(p).opcode = A_FSTP) then
  132. taicpu(p).opcode := A_FST
  133. else taicpu(p).opcode := A_FIST;
  134. asml.remove(hp1);
  135. hp1.free;
  136. end
  137. *)
  138. end;
  139. end;
  140. { returns true if p contains a memory operand with a segment set }
  141. function InsContainsSegRef(p: taicpu): boolean;
  142. var
  143. i: longint;
  144. begin
  145. result:=true;
  146. for i:=0 to p.opercnt-1 do
  147. if (p.oper[i]^.typ=top_ref) and
  148. (p.oper[i]^.ref^.segment<>NR_NO) then
  149. exit;
  150. result:=false;
  151. end;
  152. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  153. var
  154. p,hp1: tai;
  155. l: aint;
  156. tmpRef: treference;
  157. begin
  158. p := BlockStart;
  159. while (p <> BlockEnd) Do
  160. begin
  161. case p.Typ Of
  162. Ait_Instruction:
  163. begin
  164. if InsContainsSegRef(taicpu(p)) then
  165. begin
  166. p := tai(p.next);
  167. continue;
  168. end;
  169. case taicpu(p).opcode Of
  170. A_IMUL:
  171. {changes certain "imul const, %reg"'s to lea sequences}
  172. begin
  173. if (taicpu(p).oper[0]^.typ = Top_Const) and
  174. (taicpu(p).oper[1]^.typ = Top_Reg) and
  175. (taicpu(p).opsize = S_L) then
  176. if (taicpu(p).oper[0]^.val = 1) then
  177. if (taicpu(p).ops = 2) then
  178. {remove "imul $1, reg"}
  179. begin
  180. hp1 := tai(p.Next);
  181. asml.remove(p);
  182. p.free;
  183. p := hp1;
  184. continue;
  185. end
  186. else
  187. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  188. begin
  189. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  190. InsertLLItem(asml, p.previous, p.next, hp1);
  191. p.free;
  192. p := hp1;
  193. end
  194. else if
  195. ((taicpu(p).ops <= 2) or
  196. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  197. (taicpu(p).oper[0]^.val <= 12) and
  198. not(cs_opt_size in current_settings.optimizerswitches) and
  199. (not(GetNextInstruction(p, hp1)) or
  200. {GetNextInstruction(p, hp1) and}
  201. not((tai(hp1).typ = ait_instruction) and
  202. ((taicpu(hp1).opcode=A_Jcc) and
  203. (taicpu(hp1).condition in [C_O,C_NO])))) then
  204. begin
  205. reference_reset(tmpref,1);
  206. case taicpu(p).oper[0]^.val Of
  207. 3: begin
  208. {imul 3, reg1, reg2 to
  209. lea (reg1,reg1,2), reg2
  210. imul 3, reg1 to
  211. lea (reg1,reg1,2), reg1}
  212. TmpRef.base := taicpu(p).oper[1]^.reg;
  213. TmpRef.index := taicpu(p).oper[1]^.reg;
  214. TmpRef.ScaleFactor := 2;
  215. if (taicpu(p).ops = 2) then
  216. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  217. else
  218. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  219. InsertLLItem(asml,p.previous, p.next, hp1);
  220. p.free;
  221. p := hp1;
  222. end;
  223. 5: begin
  224. {imul 5, reg1, reg2 to
  225. lea (reg1,reg1,4), reg2
  226. imul 5, reg1 to
  227. lea (reg1,reg1,4), reg1}
  228. TmpRef.base := taicpu(p).oper[1]^.reg;
  229. TmpRef.index := taicpu(p).oper[1]^.reg;
  230. TmpRef.ScaleFactor := 4;
  231. if (taicpu(p).ops = 2) then
  232. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  233. else
  234. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  235. InsertLLItem(asml,p.previous, p.next, hp1);
  236. p.free;
  237. p := hp1;
  238. end;
  239. 6: begin
  240. {imul 6, reg1, reg2 to
  241. lea (,reg1,2), reg2
  242. lea (reg2,reg1,4), reg2
  243. imul 6, reg1 to
  244. lea (reg1,reg1,2), reg1
  245. add reg1, reg1}
  246. if (current_settings.optimizecputype <= cpu_386) then
  247. begin
  248. TmpRef.index := taicpu(p).oper[1]^.reg;
  249. if (taicpu(p).ops = 3) then
  250. begin
  251. TmpRef.base := taicpu(p).oper[2]^.reg;
  252. TmpRef.ScaleFactor := 4;
  253. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  254. end
  255. else
  256. begin
  257. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  258. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  259. end;
  260. InsertLLItem(asml,p, p.next, hp1);
  261. reference_reset(tmpref,2);
  262. TmpRef.index := taicpu(p).oper[1]^.reg;
  263. TmpRef.ScaleFactor := 2;
  264. if (taicpu(p).ops = 3) then
  265. begin
  266. TmpRef.base := NR_NO;
  267. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  268. taicpu(p).oper[2]^.reg);
  269. end
  270. else
  271. begin
  272. TmpRef.base := taicpu(p).oper[1]^.reg;
  273. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  274. end;
  275. InsertLLItem(asml,p.previous, p.next, hp1);
  276. p.free;
  277. p := tai(hp1.next);
  278. end
  279. end;
  280. 9: begin
  281. {imul 9, reg1, reg2 to
  282. lea (reg1,reg1,8), reg2
  283. imul 9, reg1 to
  284. lea (reg1,reg1,8), reg1}
  285. TmpRef.base := taicpu(p).oper[1]^.reg;
  286. TmpRef.index := taicpu(p).oper[1]^.reg;
  287. TmpRef.ScaleFactor := 8;
  288. if (taicpu(p).ops = 2) then
  289. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  290. else
  291. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  292. InsertLLItem(asml,p.previous, p.next, hp1);
  293. p.free;
  294. p := hp1;
  295. end;
  296. 10: begin
  297. {imul 10, reg1, reg2 to
  298. lea (reg1,reg1,4), reg2
  299. add reg2, reg2
  300. imul 10, reg1 to
  301. lea (reg1,reg1,4), reg1
  302. add reg1, reg1}
  303. if (current_settings.optimizecputype <= cpu_386) then
  304. begin
  305. if (taicpu(p).ops = 3) then
  306. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  307. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  308. else
  309. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  310. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  311. InsertLLItem(asml,p, p.next, hp1);
  312. TmpRef.base := taicpu(p).oper[1]^.reg;
  313. TmpRef.index := taicpu(p).oper[1]^.reg;
  314. TmpRef.ScaleFactor := 4;
  315. if (taicpu(p).ops = 3) then
  316. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  317. else
  318. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  319. InsertLLItem(asml,p.previous, p.next, hp1);
  320. p.free;
  321. p := tai(hp1.next);
  322. end
  323. end;
  324. 12: begin
  325. {imul 12, reg1, reg2 to
  326. lea (,reg1,4), reg2
  327. lea (reg2,reg1,8), reg2
  328. imul 12, reg1 to
  329. lea (reg1,reg1,2), reg1
  330. lea (,reg1,4), reg1}
  331. if (current_settings.optimizecputype <= cpu_386)
  332. then
  333. begin
  334. TmpRef.index := taicpu(p).oper[1]^.reg;
  335. if (taicpu(p).ops = 3) then
  336. begin
  337. TmpRef.base := taicpu(p).oper[2]^.reg;
  338. TmpRef.ScaleFactor := 8;
  339. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  340. end
  341. else
  342. begin
  343. TmpRef.base := NR_NO;
  344. TmpRef.ScaleFactor := 4;
  345. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  346. end;
  347. InsertLLItem(asml,p, p.next, hp1);
  348. reference_reset(tmpref,2);
  349. TmpRef.index := taicpu(p).oper[1]^.reg;
  350. if (taicpu(p).ops = 3) then
  351. begin
  352. TmpRef.base := NR_NO;
  353. TmpRef.ScaleFactor := 4;
  354. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  355. end
  356. else
  357. begin
  358. TmpRef.base := taicpu(p).oper[1]^.reg;
  359. TmpRef.ScaleFactor := 2;
  360. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  361. end;
  362. InsertLLItem(asml,p.previous, p.next, hp1);
  363. p.free;
  364. p := tai(hp1.next);
  365. end
  366. end
  367. end;
  368. end;
  369. end;
  370. A_SAR, A_SHR:
  371. {changes the code sequence
  372. shr/sar const1, x
  373. shl const2, x
  374. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  375. begin
  376. if GetNextInstruction(p, hp1) and
  377. (tai(hp1).typ = ait_instruction) and
  378. (taicpu(hp1).opcode = A_SHL) and
  379. (taicpu(p).oper[0]^.typ = top_const) and
  380. (taicpu(hp1).oper[0]^.typ = top_const) and
  381. (taicpu(hp1).opsize = taicpu(p).opsize) and
  382. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  383. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  384. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  385. not(cs_opt_size in current_settings.optimizerswitches) then
  386. { shr/sar const1, %reg
  387. shl const2, %reg
  388. with const1 > const2 }
  389. begin
  390. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  391. taicpu(hp1).opcode := A_AND;
  392. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  393. case taicpu(p).opsize Of
  394. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  395. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  396. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  397. end;
  398. end
  399. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  400. not(cs_opt_size in current_settings.optimizerswitches) then
  401. { shr/sar const1, %reg
  402. shl const2, %reg
  403. with const1 < const2 }
  404. begin
  405. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  406. taicpu(p).opcode := A_AND;
  407. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  408. case taicpu(p).opsize Of
  409. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  410. S_B: taicpu(p).loadConst(0,l Xor $ff);
  411. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  412. end;
  413. end
  414. else
  415. { shr/sar const1, %reg
  416. shl const2, %reg
  417. with const1 = const2 }
  418. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  419. begin
  420. taicpu(p).opcode := A_AND;
  421. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  422. case taicpu(p).opsize Of
  423. S_B: taicpu(p).loadConst(0,l Xor $ff);
  424. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  425. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  426. end;
  427. asml.remove(hp1);
  428. hp1.free;
  429. end;
  430. end;
  431. A_XOR:
  432. if (taicpu(p).oper[0]^.typ = top_reg) and
  433. (taicpu(p).oper[1]^.typ = top_reg) and
  434. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  435. { temporarily change this to 'mov reg,0' to make it easier }
  436. { for the CSE. Will be changed back in pass 2 }
  437. begin
  438. taicpu(p).opcode := A_MOV;
  439. taicpu(p).loadConst(0,0);
  440. end;
  441. end;
  442. end;
  443. end;
  444. p := tai(p.next)
  445. end;
  446. end;
  447. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  448. begin
  449. result :=
  450. (instr.typ = ait_instruction) and
  451. (taicpu(instr).opcode = op) and
  452. ((opsize = []) or (taicpu(instr).opsize in opsize));
  453. end;
  454. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  455. begin
  456. result :=
  457. (instr.typ = ait_instruction) and
  458. ((taicpu(instr).opcode = op1) or
  459. (taicpu(instr).opcode = op2)
  460. ) and
  461. ((opsize = []) or (taicpu(instr).opsize in opsize));
  462. end;
  463. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  464. begin
  465. result :=
  466. (instr.typ = ait_instruction) and
  467. ((taicpu(instr).opcode = op1) or
  468. (taicpu(instr).opcode = op2) or
  469. (taicpu(instr).opcode = op3)
  470. ) and
  471. ((opsize = []) or (taicpu(instr).opsize in opsize));
  472. end;
  473. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  474. begin
  475. result := (oper.typ = top_reg) and (oper.reg = reg);
  476. end;
  477. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  478. begin
  479. result := (oper.typ = top_const) and (oper.val = a);
  480. end;
  481. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
  482. begin
  483. result := oper1.typ = oper2.typ;
  484. if result then
  485. case oper1.typ of
  486. top_const:
  487. Result:=oper1.val = oper2.val;
  488. top_reg:
  489. Result:=oper1.reg = oper2.reg;
  490. top_ref:
  491. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  492. else
  493. internalerror(2013102801);
  494. end
  495. end;
  496. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  497. begin
  498. Result:=(ref.offset=0) and
  499. (ref.scalefactor in [0,1]) and
  500. (ref.segment=NR_NO) and
  501. (ref.symbol=nil) and
  502. (ref.relsymbol=nil) and
  503. ((base=NR_INVALID) or
  504. (ref.base=base)) and
  505. ((index=NR_INVALID) or
  506. (ref.index=index));
  507. end;
  508. { First pass of peephole optimizations }
  509. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  510. {$ifdef DEBUG_AOPTCPU}
  511. procedure DebugMsg(const s: string;p : tai);
  512. begin
  513. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  514. end;
  515. {$else DEBUG_AOPTCPU}
  516. procedure DebugMsg(const s: string;p : tai);inline;
  517. begin
  518. end;
  519. {$endif DEBUG_AOPTCPU}
  520. function WriteOk : Boolean;
  521. begin
  522. writeln('Ok');
  523. Result:=True;
  524. end;
  525. var
  526. l : longint;
  527. p,hp1,hp2 : tai;
  528. hp3,hp4: tai;
  529. v:aint;
  530. TmpRef: TReference;
  531. UsedRegs, TmpUsedRegs: TRegSet;
  532. TmpBool1, TmpBool2: Boolean;
  533. function SkipLabels(hp: tai; var hp2: tai): boolean;
  534. {skips all labels and returns the next "real" instruction}
  535. begin
  536. while assigned(hp.next) and
  537. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  538. hp := tai(hp.next);
  539. if assigned(hp.next) then
  540. begin
  541. SkipLabels := True;
  542. hp2 := tai(hp.next)
  543. end
  544. else
  545. begin
  546. hp2 := hp;
  547. SkipLabels := False
  548. end;
  549. end;
  550. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  551. {traces sucessive jumps to their final destination and sets it, e.g.
  552. je l1 je l3
  553. <code> <code>
  554. l1: becomes l1:
  555. je l2 je l3
  556. <code> <code>
  557. l2: l2:
  558. jmp l3 jmp l3
  559. the level parameter denotes how deeep we have already followed the jump,
  560. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  561. var p1, p2: tai;
  562. l: tasmlabel;
  563. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  564. begin
  565. FindAnyLabel := false;
  566. while assigned(hp.next) and
  567. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  568. hp := tai(hp.next);
  569. if assigned(hp.next) and
  570. (tai(hp.next).typ = ait_label) then
  571. begin
  572. FindAnyLabel := true;
  573. l := tai_label(hp.next).labsym;
  574. end
  575. end;
  576. begin
  577. GetfinalDestination := false;
  578. if level > 20 then
  579. exit;
  580. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  581. if assigned(p1) then
  582. begin
  583. SkipLabels(p1,p1);
  584. if (tai(p1).typ = ait_instruction) and
  585. (taicpu(p1).is_jmp) then
  586. if { the next instruction after the label where the jump hp arrives}
  587. { is unconditional or of the same type as hp, so continue }
  588. (taicpu(p1).condition in [C_None,hp.condition]) or
  589. { the next instruction after the label where the jump hp arrives}
  590. { is the opposite of hp (so this one is never taken), but after }
  591. { that one there is a branch that will be taken, so perform a }
  592. { little hack: set p1 equal to this instruction (that's what the}
  593. { last SkipLabels is for, only works with short bool evaluation)}
  594. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  595. SkipLabels(p1,p2) and
  596. (p2.typ = ait_instruction) and
  597. (taicpu(p2).is_jmp) and
  598. (taicpu(p2).condition in [C_None,hp.condition]) and
  599. SkipLabels(p1,p1)) then
  600. begin
  601. { quick check for loops of the form "l5: ; jmp l5 }
  602. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  603. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  604. exit;
  605. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  606. exit;
  607. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  608. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  609. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  610. end
  611. else
  612. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  613. if not FindAnyLabel(p1,l) then
  614. begin
  615. {$ifdef finaldestdebug}
  616. insertllitem(asml,p1,p1.next,tai_comment.Create(
  617. strpnew('previous label inserted'))));
  618. {$endif finaldestdebug}
  619. current_asmdata.getjumplabel(l);
  620. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  621. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  622. hp.oper[0]^.ref^.symbol := l;
  623. l.increfs;
  624. { this won't work, since the new label isn't in the labeltable }
  625. { so it will fail the rangecheck. Labeltable should become a }
  626. { hashtable to support this: }
  627. { GetFinalDestination(asml, hp); }
  628. end
  629. else
  630. begin
  631. {$ifdef finaldestdebug}
  632. insertllitem(asml,p1,p1.next,tai_comment.Create(
  633. strpnew('next label reused'))));
  634. {$endif finaldestdebug}
  635. l.increfs;
  636. hp.oper[0]^.ref^.symbol := l;
  637. if not GetFinalDestination(asml, hp,succ(level)) then
  638. exit;
  639. end;
  640. end;
  641. GetFinalDestination := true;
  642. end;
  643. function DoSubAddOpt(var p: tai): Boolean;
  644. begin
  645. DoSubAddOpt := False;
  646. if GetLastInstruction(p, hp1) and
  647. (hp1.typ = ait_instruction) and
  648. (taicpu(hp1).opsize = taicpu(p).opsize) then
  649. case taicpu(hp1).opcode Of
  650. A_DEC:
  651. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  652. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  653. begin
  654. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  655. asml.remove(hp1);
  656. hp1.free;
  657. end;
  658. A_SUB:
  659. if (taicpu(hp1).oper[0]^.typ = top_const) and
  660. (taicpu(hp1).oper[1]^.typ = top_reg) and
  661. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  662. begin
  663. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  664. asml.remove(hp1);
  665. hp1.free;
  666. end;
  667. A_ADD:
  668. if (taicpu(hp1).oper[0]^.typ = top_const) and
  669. (taicpu(hp1).oper[1]^.typ = top_reg) and
  670. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  671. begin
  672. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  673. asml.remove(hp1);
  674. hp1.free;
  675. if (taicpu(p).oper[0]^.val = 0) then
  676. begin
  677. hp1 := tai(p.next);
  678. asml.remove(p);
  679. p.free;
  680. if not GetLastInstruction(hp1, p) then
  681. p := hp1;
  682. DoSubAddOpt := True;
  683. end
  684. end;
  685. end;
  686. end;
  687. begin
  688. p := BlockStart;
  689. UsedRegs := [];
  690. while (p <> BlockEnd) Do
  691. begin
  692. UpDateUsedRegs(UsedRegs, tai(p.next));
  693. case p.Typ Of
  694. ait_instruction:
  695. begin
  696. current_filepos:=taicpu(p).fileinfo;
  697. if InsContainsSegRef(taicpu(p)) then
  698. begin
  699. p := tai(p.next);
  700. continue;
  701. end;
  702. { Handle Jmp Optimizations }
  703. if taicpu(p).is_jmp then
  704. begin
  705. {the following if-block removes all code between a jmp and the next label,
  706. because it can never be executed}
  707. if (taicpu(p).opcode = A_JMP) then
  708. begin
  709. hp2:=p;
  710. while GetNextInstruction(hp2, hp1) and
  711. (hp1.typ <> ait_label) do
  712. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  713. begin
  714. { don't kill start/end of assembler block,
  715. no-line-info-start/end etc }
  716. if hp1.typ<>ait_marker then
  717. begin
  718. asml.remove(hp1);
  719. hp1.free;
  720. end
  721. else
  722. hp2:=hp1;
  723. end
  724. else break;
  725. end;
  726. { remove jumps to a label coming right after them }
  727. if GetNextInstruction(p, hp1) then
  728. begin
  729. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  730. { TODO: FIXME removing the first instruction fails}
  731. (p<>blockstart) then
  732. begin
  733. hp2:=tai(hp1.next);
  734. asml.remove(p);
  735. p.free;
  736. p:=hp2;
  737. continue;
  738. end
  739. else
  740. begin
  741. if hp1.typ = ait_label then
  742. SkipLabels(hp1,hp1);
  743. if (tai(hp1).typ=ait_instruction) and
  744. (taicpu(hp1).opcode=A_JMP) and
  745. GetNextInstruction(hp1, hp2) and
  746. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  747. begin
  748. if taicpu(p).opcode=A_Jcc then
  749. begin
  750. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  751. tai_label(hp2).labsym.decrefs;
  752. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  753. { when free'ing hp1, the ref. isn't decresed, so we don't
  754. increase it (FK)
  755. taicpu(p).oper[0]^.ref^.symbol.increfs;
  756. }
  757. asml.remove(hp1);
  758. hp1.free;
  759. GetFinalDestination(asml, taicpu(p),0);
  760. end
  761. else
  762. begin
  763. GetFinalDestination(asml, taicpu(p),0);
  764. p:=tai(p.next);
  765. continue;
  766. end;
  767. end
  768. else
  769. GetFinalDestination(asml, taicpu(p),0);
  770. end;
  771. end;
  772. end
  773. else
  774. { All other optimizes }
  775. begin
  776. for l := 0 to taicpu(p).ops-1 Do
  777. if (taicpu(p).oper[l]^.typ = top_ref) then
  778. With taicpu(p).oper[l]^.ref^ Do
  779. begin
  780. if (base = NR_NO) and
  781. (index <> NR_NO) and
  782. (scalefactor in [0,1]) then
  783. begin
  784. base := index;
  785. index := NR_NO
  786. end
  787. end;
  788. case taicpu(p).opcode Of
  789. A_AND:
  790. begin
  791. if (taicpu(p).oper[0]^.typ = top_const) and
  792. (taicpu(p).oper[1]^.typ = top_reg) and
  793. GetNextInstruction(p, hp1) and
  794. (tai(hp1).typ = ait_instruction) and
  795. (taicpu(hp1).opcode = A_AND) and
  796. (taicpu(hp1).oper[0]^.typ = top_const) and
  797. (taicpu(hp1).oper[1]^.typ = top_reg) and
  798. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  799. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  800. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  801. begin
  802. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  803. asml.remove(p);
  804. p.free;
  805. p:=hp1;
  806. end
  807. else
  808. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  809. jump, but only if it's a conditional jump (PFV) }
  810. if (taicpu(p).oper[1]^.typ = top_reg) and
  811. GetNextInstruction(p, hp1) and
  812. (hp1.typ = ait_instruction) and
  813. (taicpu(hp1).is_jmp) and
  814. (taicpu(hp1).opcode<>A_JMP) and
  815. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  816. taicpu(p).opcode := A_TEST;
  817. end;
  818. A_CMP:
  819. begin
  820. { cmp register,$8000 neg register
  821. je target --> jo target
  822. .... only if register is deallocated before jump.}
  823. case Taicpu(p).opsize of
  824. S_B: v:=$80;
  825. S_W: v:=$8000;
  826. S_L: v:=aint($80000000);
  827. else
  828. internalerror(2013112905);
  829. end;
  830. if (taicpu(p).oper[0]^.typ=Top_const) and
  831. (taicpu(p).oper[0]^.val=v) and
  832. (Taicpu(p).oper[1]^.typ=top_reg) and
  833. GetNextInstruction(p, hp1) and
  834. (hp1.typ=ait_instruction) and
  835. (taicpu(hp1).opcode=A_Jcc) and
  836. (Taicpu(hp1).condition in [C_E,C_NE]) and
  837. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  838. begin
  839. Taicpu(p).opcode:=A_NEG;
  840. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  841. Taicpu(p).clearop(1);
  842. Taicpu(p).ops:=1;
  843. if Taicpu(hp1).condition=C_E then
  844. Taicpu(hp1).condition:=C_O
  845. else
  846. Taicpu(hp1).condition:=C_NO;
  847. continue;
  848. end;
  849. {
  850. @@2: @@2:
  851. .... ....
  852. cmp operand1,0
  853. jle/jbe @@1
  854. dec operand1 --> sub operand1,1
  855. jmp @@2 jge/jae @@2
  856. @@1: @@1:
  857. ... ....}
  858. if (taicpu(p).oper[0]^.typ = top_const) and
  859. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  860. (taicpu(p).oper[0]^.val = 0) and
  861. GetNextInstruction(p, hp1) and
  862. (hp1.typ = ait_instruction) and
  863. (taicpu(hp1).is_jmp) and
  864. (taicpu(hp1).opcode=A_Jcc) and
  865. (taicpu(hp1).condition in [C_LE,C_BE]) and
  866. GetNextInstruction(hp1,hp2) and
  867. (hp2.typ = ait_instruction) and
  868. (taicpu(hp2).opcode = A_DEC) and
  869. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  870. GetNextInstruction(hp2, hp3) and
  871. (hp3.typ = ait_instruction) and
  872. (taicpu(hp3).is_jmp) and
  873. (taicpu(hp3).opcode = A_JMP) and
  874. GetNextInstruction(hp3, hp4) and
  875. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  876. begin
  877. taicpu(hp2).Opcode := A_SUB;
  878. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  879. taicpu(hp2).loadConst(0,1);
  880. taicpu(hp2).ops:=2;
  881. taicpu(hp3).Opcode := A_Jcc;
  882. case taicpu(hp1).condition of
  883. C_LE: taicpu(hp3).condition := C_GE;
  884. C_BE: taicpu(hp3).condition := C_AE;
  885. end;
  886. asml.remove(p);
  887. asml.remove(hp1);
  888. p.free;
  889. hp1.free;
  890. p := hp2;
  891. continue;
  892. end
  893. end;
  894. A_FLD:
  895. begin
  896. if (taicpu(p).oper[0]^.typ = top_reg) and
  897. GetNextInstruction(p, hp1) and
  898. (hp1.typ = Ait_Instruction) and
  899. (taicpu(hp1).oper[0]^.typ = top_reg) and
  900. (taicpu(hp1).oper[1]^.typ = top_reg) and
  901. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  902. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  903. { change to
  904. fld reg fxxx reg,st
  905. fxxxp st, st1 (hp1)
  906. Remark: non commutative operations must be reversed!
  907. }
  908. begin
  909. case taicpu(hp1).opcode Of
  910. A_FMULP,A_FADDP,
  911. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  912. begin
  913. case taicpu(hp1).opcode Of
  914. A_FADDP: taicpu(hp1).opcode := A_FADD;
  915. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  916. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  917. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  918. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  919. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  920. end;
  921. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  922. taicpu(hp1).oper[1]^.reg := NR_ST;
  923. asml.remove(p);
  924. p.free;
  925. p := hp1;
  926. continue;
  927. end;
  928. end;
  929. end
  930. else
  931. if (taicpu(p).oper[0]^.typ = top_ref) and
  932. GetNextInstruction(p, hp2) and
  933. (hp2.typ = Ait_Instruction) and
  934. (taicpu(hp2).ops = 2) and
  935. (taicpu(hp2).oper[0]^.typ = top_reg) and
  936. (taicpu(hp2).oper[1]^.typ = top_reg) and
  937. (taicpu(p).opsize in [S_FS, S_FL]) and
  938. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  939. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  940. if GetLastInstruction(p, hp1) and
  941. (hp1.typ = Ait_Instruction) and
  942. ((taicpu(hp1).opcode = A_FLD) or
  943. (taicpu(hp1).opcode = A_FST)) and
  944. (taicpu(hp1).opsize = taicpu(p).opsize) and
  945. (taicpu(hp1).oper[0]^.typ = top_ref) and
  946. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  947. if ((taicpu(hp2).opcode = A_FMULP) or
  948. (taicpu(hp2).opcode = A_FADDP)) then
  949. { change to
  950. fld/fst mem1 (hp1) fld/fst mem1
  951. fld mem1 (p) fadd/
  952. faddp/ fmul st, st
  953. fmulp st, st1 (hp2) }
  954. begin
  955. asml.remove(p);
  956. p.free;
  957. p := hp1;
  958. if (taicpu(hp2).opcode = A_FADDP) then
  959. taicpu(hp2).opcode := A_FADD
  960. else
  961. taicpu(hp2).opcode := A_FMUL;
  962. taicpu(hp2).oper[1]^.reg := NR_ST;
  963. end
  964. else
  965. { change to
  966. fld/fst mem1 (hp1) fld/fst mem1
  967. fld mem1 (p) fld st}
  968. begin
  969. taicpu(p).changeopsize(S_FL);
  970. taicpu(p).loadreg(0,NR_ST);
  971. end
  972. else
  973. begin
  974. case taicpu(hp2).opcode Of
  975. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  976. { change to
  977. fld/fst mem1 (hp1) fld/fst mem1
  978. fld mem2 (p) fxxx mem2
  979. fxxxp st, st1 (hp2) }
  980. begin
  981. case taicpu(hp2).opcode Of
  982. A_FADDP: taicpu(p).opcode := A_FADD;
  983. A_FMULP: taicpu(p).opcode := A_FMUL;
  984. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  985. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  986. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  987. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  988. end;
  989. asml.remove(hp2);
  990. hp2.free;
  991. end
  992. end
  993. end
  994. end;
  995. A_FSTP,A_FISTP:
  996. if doFpuLoadStoreOpt(asmL,p) then
  997. continue;
  998. A_LEA:
  999. begin
  1000. {removes seg register prefixes from LEA operations, as they
  1001. don't do anything}
  1002. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1003. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1004. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1005. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1006. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1007. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1008. begin
  1009. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1010. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1011. begin
  1012. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1013. taicpu(p).oper[1]^.reg);
  1014. InsertLLItem(asml,p.previous,p.next, hp1);
  1015. p.free;
  1016. p := hp1;
  1017. continue;
  1018. end
  1019. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1020. begin
  1021. hp1 := tai(p.Next);
  1022. asml.remove(p);
  1023. p.free;
  1024. p := hp1;
  1025. continue;
  1026. end
  1027. { continue to use lea to adjust the stack pointer,
  1028. it is the recommended way, but only if not optimizing for size }
  1029. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1030. (cs_opt_size in current_settings.optimizerswitches) then
  1031. with taicpu(p).oper[0]^.ref^ do
  1032. if (base = taicpu(p).oper[1]^.reg) then
  1033. begin
  1034. l := offset;
  1035. if (l=1) and UseIncDec then
  1036. begin
  1037. taicpu(p).opcode := A_INC;
  1038. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1039. taicpu(p).ops := 1
  1040. end
  1041. else if (l=-1) and UseIncDec then
  1042. begin
  1043. taicpu(p).opcode := A_DEC;
  1044. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1045. taicpu(p).ops := 1;
  1046. end
  1047. else
  1048. begin
  1049. if (l<0) and (l<>-2147483648) then
  1050. begin
  1051. taicpu(p).opcode := A_SUB;
  1052. taicpu(p).loadConst(0,-l);
  1053. end
  1054. else
  1055. begin
  1056. taicpu(p).opcode := A_ADD;
  1057. taicpu(p).loadConst(0,l);
  1058. end;
  1059. end;
  1060. end;
  1061. end
  1062. (*
  1063. This is unsafe, lea doesn't modify the flags but "add"
  1064. does. This breaks webtbs/tw15694.pp. The above
  1065. transformations are also unsafe, but they don't seem to
  1066. be triggered by code that FPC generators (or that at
  1067. least does not occur in the tests...). This needs to be
  1068. fixed by checking for the liveness of the flags register.
  1069. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1070. begin
  1071. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1072. taicpu(p).oper[0]^.ref^.base);
  1073. InsertLLItem(asml,p.previous,p.next, hp1);
  1074. DebugMsg('Peephole Lea2AddBase done',hp1);
  1075. p.free;
  1076. p:=hp1;
  1077. continue;
  1078. end
  1079. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1080. begin
  1081. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1082. taicpu(p).oper[0]^.ref^.index);
  1083. InsertLLItem(asml,p.previous,p.next,hp1);
  1084. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1085. p.free;
  1086. p:=hp1;
  1087. continue;
  1088. end
  1089. *)
  1090. end;
  1091. A_MOV:
  1092. begin
  1093. TmpUsedRegs := UsedRegs;
  1094. if (taicpu(p).oper[1]^.typ = top_reg) and
  1095. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1096. GetNextInstruction(p, hp1) and
  1097. (tai(hp1).typ = ait_instruction) and
  1098. (taicpu(hp1).opcode = A_MOV) and
  1099. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1100. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1101. begin
  1102. {we have "mov x, %treg; mov %treg, y}
  1103. if not(RegInOp(getsupreg(taicpu(p).oper[1]^.reg),taicpu(hp1).oper[1]^)) and
  1104. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1105. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  1106. case taicpu(p).oper[0]^.typ Of
  1107. top_reg:
  1108. begin
  1109. { change "mov %reg, %treg; mov %treg, y"
  1110. to "mov %reg, y" }
  1111. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1112. asml.remove(hp1);
  1113. hp1.free;
  1114. continue;
  1115. end;
  1116. top_ref:
  1117. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1118. begin
  1119. { change "mov mem, %treg; mov %treg, %reg"
  1120. to "mov mem, %reg" }
  1121. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1122. asml.remove(hp1);
  1123. hp1.free;
  1124. continue;
  1125. end;
  1126. end
  1127. end
  1128. else
  1129. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  1130. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  1131. penalty}
  1132. if (taicpu(p).oper[0]^.typ = top_reg) and
  1133. (taicpu(p).oper[1]^.typ = top_reg) and
  1134. GetNextInstruction(p,hp1) and
  1135. (tai(hp1).typ = ait_instruction) and
  1136. (taicpu(hp1).ops >= 1) and
  1137. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1138. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1139. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  1140. begin
  1141. if ((taicpu(hp1).opcode = A_OR) or
  1142. (taicpu(hp1).opcode = A_TEST)) and
  1143. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1144. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1145. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  1146. begin
  1147. TmpUsedRegs := UsedRegs;
  1148. { reg1 will be used after the first instruction, }
  1149. { so update the allocation info }
  1150. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1151. if GetNextInstruction(hp1, hp2) and
  1152. (hp2.typ = ait_instruction) and
  1153. taicpu(hp2).is_jmp and
  1154. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1155. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1156. "test %reg1, %reg1; jxx" }
  1157. begin
  1158. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1159. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1160. asml.remove(p);
  1161. p.free;
  1162. p := hp1;
  1163. continue
  1164. end
  1165. else
  1166. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1167. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1168. begin
  1169. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1170. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1171. end;
  1172. end
  1173. { else
  1174. if (taicpu(p.next)^.opcode
  1175. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1176. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1177. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1178. end
  1179. else
  1180. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1181. x >= RetOffset) as it doesn't do anything (it writes either to a
  1182. parameter or to the temporary storage room for the function
  1183. result)}
  1184. if GetNextInstruction(p, hp1) and
  1185. (tai(hp1).typ = ait_instruction) then
  1186. if IsExitCode(hp1) and
  1187. (taicpu(p).oper[1]^.typ = top_ref) and
  1188. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1189. not(assigned(current_procinfo.procdef.funcretsym) and
  1190. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1191. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1192. (taicpu(p).oper[0]^.typ = top_reg) then
  1193. begin
  1194. asml.remove(p);
  1195. p.free;
  1196. p := hp1;
  1197. RemoveLastDeallocForFuncRes(asmL,p);
  1198. end
  1199. else
  1200. if (taicpu(p).oper[0]^.typ = top_reg) and
  1201. (taicpu(p).oper[1]^.typ = top_ref) and
  1202. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1203. (taicpu(hp1).opcode = A_CMP) and
  1204. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1205. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1206. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1207. begin
  1208. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1209. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1210. end;
  1211. { Next instruction is also a MOV ? }
  1212. if GetNextInstruction(p, hp1) and
  1213. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1214. begin
  1215. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1216. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1217. {mov reg1, mem1 or mov mem1, reg1
  1218. mov mem2, reg2 mov reg2, mem2}
  1219. begin
  1220. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1221. {mov reg1, mem1 or mov mem1, reg1
  1222. mov mem2, reg1 mov reg2, mem1}
  1223. begin
  1224. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1225. { Removes the second statement from
  1226. mov reg1, mem1/reg2
  1227. mov mem1/reg2, reg1 }
  1228. begin
  1229. if (taicpu(p).oper[0]^.typ = top_reg) then
  1230. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1231. asml.remove(hp1);
  1232. hp1.free;
  1233. end
  1234. else
  1235. begin
  1236. TmpUsedRegs := UsedRegs;
  1237. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1238. if (taicpu(p).oper[1]^.typ = top_ref) and
  1239. { mov reg1, mem1
  1240. mov mem2, reg1 }
  1241. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1242. GetNextInstruction(hp1, hp2) and
  1243. (hp2.typ = ait_instruction) and
  1244. (taicpu(hp2).opcode = A_CMP) and
  1245. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1246. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1247. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1248. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1249. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1250. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1251. { change to
  1252. mov reg1, mem1 mov reg1, mem1
  1253. mov mem2, reg1 cmp reg1, mem2
  1254. cmp mem1, reg1 }
  1255. begin
  1256. asml.remove(hp2);
  1257. hp2.free;
  1258. taicpu(hp1).opcode := A_CMP;
  1259. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1260. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1261. end;
  1262. end;
  1263. end
  1264. else
  1265. begin
  1266. tmpUsedRegs := UsedRegs;
  1267. if GetNextInstruction(hp1, hp2) and
  1268. (taicpu(p).oper[0]^.typ = top_ref) and
  1269. (taicpu(p).oper[1]^.typ = top_reg) and
  1270. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1271. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1272. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1273. (tai(hp2).typ = ait_instruction) and
  1274. (taicpu(hp2).opcode = A_MOV) and
  1275. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1276. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1277. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1278. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1279. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1280. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1281. { mov mem1, %reg1
  1282. mov %reg1, mem2
  1283. mov mem2, reg2
  1284. to:
  1285. mov mem1, reg2
  1286. mov reg2, mem2}
  1287. begin
  1288. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1289. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1290. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1291. asml.remove(hp2);
  1292. hp2.free;
  1293. end
  1294. else
  1295. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1296. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1297. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1298. { mov mem1, reg1 mov mem1, reg1
  1299. mov reg1, mem2 mov reg1, mem2
  1300. mov mem2, reg2 mov mem2, reg1
  1301. to: to:
  1302. mov mem1, reg1 mov mem1, reg1
  1303. mov mem1, reg2 mov reg1, mem2
  1304. mov reg1, mem2
  1305. or (if mem1 depends on reg1
  1306. and/or if mem2 depends on reg2)
  1307. to:
  1308. mov mem1, reg1
  1309. mov reg1, mem2
  1310. mov reg1, reg2
  1311. }
  1312. begin
  1313. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1314. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1315. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1316. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1317. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1318. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1319. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1320. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1321. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1322. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1323. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1324. end
  1325. else
  1326. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1327. begin
  1328. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1329. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1330. end
  1331. else
  1332. begin
  1333. asml.remove(hp2);
  1334. hp2.free;
  1335. end
  1336. end
  1337. end
  1338. else
  1339. (* {movl [mem1],reg1
  1340. movl [mem1],reg2
  1341. to:
  1342. movl [mem1],reg1
  1343. movl reg1,reg2 }
  1344. if (taicpu(p).oper[0]^.typ = top_ref) and
  1345. (taicpu(p).oper[1]^.typ = top_reg) and
  1346. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1347. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1348. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1349. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1350. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1351. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1352. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1353. else*)
  1354. { movl const1,[mem1]
  1355. movl [mem1],reg1
  1356. to:
  1357. movl const1,reg1
  1358. movl reg1,[mem1] }
  1359. if (taicpu(p).oper[0]^.typ = top_const) and
  1360. (taicpu(p).oper[1]^.typ = top_ref) and
  1361. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1362. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1363. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1364. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1365. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1366. begin
  1367. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1368. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1369. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1370. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1371. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1372. end
  1373. end;
  1374. if GetNextInstruction(p, hp1) and
  1375. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1376. GetNextInstruction(hp1, hp2) and
  1377. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1378. MatchOperand(Taicpu(p).oper[0]^,0) and
  1379. (Taicpu(p).oper[1]^.typ = top_reg) and
  1380. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1381. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1382. {mov reg1,0
  1383. bts reg1,operand1 --> mov reg1,operand2
  1384. or reg1,operand2 bts reg1,operand1}
  1385. begin
  1386. Taicpu(hp2).opcode:=A_MOV;
  1387. asml.remove(hp1);
  1388. insertllitem(asml,hp2,hp2.next,hp1);
  1389. asml.remove(p);
  1390. p.free;
  1391. p:=hp1;
  1392. end;
  1393. if GetNextInstruction(p, hp1) and
  1394. MatchInstruction(hp1,A_LEA,[S_L]) and
  1395. (Taicpu(p).oper[0]^.typ = top_ref) and
  1396. (Taicpu(p).oper[1]^.typ = top_reg) and
  1397. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1398. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1399. ) or
  1400. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1401. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1402. )
  1403. ) then
  1404. {mov reg1,ref
  1405. lea reg2,[reg1,reg2] --> add reg2,ref}
  1406. begin
  1407. TmpUsedRegs := UsedRegs;
  1408. { reg1 may not be used afterwards }
  1409. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1410. begin
  1411. Taicpu(hp1).opcode:=A_ADD;
  1412. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1413. DebugMsg('Peephole MovLea2Add done',hp1);
  1414. asml.remove(p);
  1415. p.free;
  1416. p:=hp1;
  1417. end;
  1418. end;
  1419. end;
  1420. A_MOVSX,
  1421. A_MOVZX :
  1422. begin
  1423. if (taicpu(p).oper[1]^.typ = top_reg) and
  1424. GetNextInstruction(p,hp1) and
  1425. (hp1.typ = ait_instruction) and
  1426. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1427. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1428. GetNextInstruction(hp1,hp2) and
  1429. MatchInstruction(hp2,A_MOV,[]) and
  1430. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1431. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1432. (((taicpu(hp1).ops=2) and
  1433. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1434. ((taicpu(hp1).ops=1) and
  1435. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1436. { reg2 must not be used after the sequence considered, so
  1437. it must be either deallocated or loaded with a new value }
  1438. (GetNextInstruction(hp2,hp3) and
  1439. (FindRegDealloc(getsupreg(taicpu(hp2).oper[0]^.reg),tai(hp3)) or
  1440. RegLoadedWithNewValue(getsupreg(taicpu(hp2).oper[0]^.reg), false, hp3))) then
  1441. { change movsX/movzX reg/ref, reg2 }
  1442. { add/sub/or/... reg3/$const, reg2 }
  1443. { mov reg2 reg/ref }
  1444. { to add/sub/or/... reg3/$const, reg/ref }
  1445. begin
  1446. { by example:
  1447. movswl %si,%eax movswl %si,%eax p
  1448. decl %eax addl %edx,%eax hp1
  1449. movw %ax,%si movw %ax,%si hp2
  1450. ->
  1451. movswl %si,%eax movswl %si,%eax p
  1452. decw %eax addw %edx,%eax hp1
  1453. movw %ax,%si movw %ax,%si hp2
  1454. }
  1455. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1456. {
  1457. ->
  1458. movswl %si,%eax movswl %si,%eax p
  1459. decw %si addw %dx,%si hp1
  1460. movw %ax,%si movw %ax,%si hp2
  1461. }
  1462. case taicpu(hp1).ops of
  1463. 1:
  1464. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1465. 2:
  1466. begin
  1467. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1468. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1469. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1470. end;
  1471. else
  1472. internalerror(2008042701);
  1473. end;
  1474. {
  1475. ->
  1476. decw %si addw %dx,%si p
  1477. }
  1478. asml.remove(p);
  1479. asml.remove(hp2);
  1480. p.free;
  1481. hp2.free;
  1482. p := hp1
  1483. end
  1484. { removes superfluous And's after movzx's }
  1485. else if taicpu(p).opcode=A_MOVZX then
  1486. begin
  1487. if (taicpu(p).oper[1]^.typ = top_reg) and
  1488. GetNextInstruction(p, hp1) and
  1489. (tai(hp1).typ = ait_instruction) and
  1490. (taicpu(hp1).opcode = A_AND) and
  1491. (taicpu(hp1).oper[0]^.typ = top_const) and
  1492. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1493. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1494. case taicpu(p).opsize Of
  1495. S_BL, S_BW:
  1496. if (taicpu(hp1).oper[0]^.val = $ff) then
  1497. begin
  1498. asml.remove(hp1);
  1499. hp1.free;
  1500. end;
  1501. S_WL:
  1502. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1503. begin
  1504. asml.remove(hp1);
  1505. hp1.free;
  1506. end;
  1507. end;
  1508. {changes some movzx constructs to faster synonims (all examples
  1509. are given with eax/ax, but are also valid for other registers)}
  1510. if (taicpu(p).oper[1]^.typ = top_reg) then
  1511. if (taicpu(p).oper[0]^.typ = top_reg) then
  1512. case taicpu(p).opsize of
  1513. S_BW:
  1514. begin
  1515. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1516. not(cs_opt_size in current_settings.optimizerswitches) then
  1517. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1518. begin
  1519. taicpu(p).opcode := A_AND;
  1520. taicpu(p).changeopsize(S_W);
  1521. taicpu(p).loadConst(0,$ff);
  1522. end
  1523. else if GetNextInstruction(p, hp1) and
  1524. (tai(hp1).typ = ait_instruction) and
  1525. (taicpu(hp1).opcode = A_AND) and
  1526. (taicpu(hp1).oper[0]^.typ = top_const) and
  1527. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1528. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1529. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1530. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1531. begin
  1532. taicpu(p).opcode := A_MOV;
  1533. taicpu(p).changeopsize(S_W);
  1534. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1535. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1536. end;
  1537. end;
  1538. S_BL:
  1539. begin
  1540. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1541. not(cs_opt_size in current_settings.optimizerswitches) then
  1542. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1543. begin
  1544. taicpu(p).opcode := A_AND;
  1545. taicpu(p).changeopsize(S_L);
  1546. taicpu(p).loadConst(0,$ff)
  1547. end
  1548. else if GetNextInstruction(p, hp1) and
  1549. (tai(hp1).typ = ait_instruction) and
  1550. (taicpu(hp1).opcode = A_AND) and
  1551. (taicpu(hp1).oper[0]^.typ = top_const) and
  1552. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1553. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1554. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1555. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1556. begin
  1557. taicpu(p).opcode := A_MOV;
  1558. taicpu(p).changeopsize(S_L);
  1559. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1560. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1561. end
  1562. end;
  1563. S_WL:
  1564. begin
  1565. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1566. not(cs_opt_size in current_settings.optimizerswitches) then
  1567. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1568. begin
  1569. taicpu(p).opcode := A_AND;
  1570. taicpu(p).changeopsize(S_L);
  1571. taicpu(p).loadConst(0,$ffff);
  1572. end
  1573. else if GetNextInstruction(p, hp1) and
  1574. (tai(hp1).typ = ait_instruction) and
  1575. (taicpu(hp1).opcode = A_AND) and
  1576. (taicpu(hp1).oper[0]^.typ = top_const) and
  1577. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1578. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1579. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1580. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1581. begin
  1582. taicpu(p).opcode := A_MOV;
  1583. taicpu(p).changeopsize(S_L);
  1584. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1585. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1586. end;
  1587. end;
  1588. end
  1589. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1590. begin
  1591. if GetNextInstruction(p, hp1) and
  1592. (tai(hp1).typ = ait_instruction) and
  1593. (taicpu(hp1).opcode = A_AND) and
  1594. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1595. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1596. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1597. begin
  1598. taicpu(p).opcode := A_MOV;
  1599. case taicpu(p).opsize Of
  1600. S_BL:
  1601. begin
  1602. taicpu(p).changeopsize(S_L);
  1603. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1604. end;
  1605. S_WL:
  1606. begin
  1607. taicpu(p).changeopsize(S_L);
  1608. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1609. end;
  1610. S_BW:
  1611. begin
  1612. taicpu(p).changeopsize(S_W);
  1613. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1614. end;
  1615. end;
  1616. end;
  1617. end;
  1618. end;
  1619. end;
  1620. (* should not be generated anymore by the current code generator
  1621. A_POP:
  1622. begin
  1623. if target_info.system=system_i386_go32v2 then
  1624. begin
  1625. { Transform a series of pop/pop/pop/push/push/push to }
  1626. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1627. { because I'm not sure whether they can cope with }
  1628. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1629. { such a problem when using esp as frame pointer (JM) }
  1630. if (taicpu(p).oper[0]^.typ = top_reg) then
  1631. begin
  1632. hp1 := p;
  1633. hp2 := p;
  1634. l := 0;
  1635. while getNextInstruction(hp1,hp1) and
  1636. (hp1.typ = ait_instruction) and
  1637. (taicpu(hp1).opcode = A_POP) and
  1638. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1639. begin
  1640. hp2 := hp1;
  1641. inc(l,4);
  1642. end;
  1643. getLastInstruction(p,hp3);
  1644. l1 := 0;
  1645. while (hp2 <> hp3) and
  1646. assigned(hp1) and
  1647. (hp1.typ = ait_instruction) and
  1648. (taicpu(hp1).opcode = A_PUSH) and
  1649. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1650. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1651. begin
  1652. { change it to a two op operation }
  1653. taicpu(hp2).oper[1]^.typ:=top_none;
  1654. taicpu(hp2).ops:=2;
  1655. taicpu(hp2).opcode := A_MOV;
  1656. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1657. reference_reset(tmpref);
  1658. tmpRef.base.enum:=R_INTREGISTER;
  1659. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1660. convert_register_to_enum(tmpref.base);
  1661. tmpRef.offset := l;
  1662. taicpu(hp2).loadRef(0,tmpRef);
  1663. hp4 := hp1;
  1664. getNextInstruction(hp1,hp1);
  1665. asml.remove(hp4);
  1666. hp4.free;
  1667. getLastInstruction(hp2,hp2);
  1668. dec(l,4);
  1669. inc(l1);
  1670. end;
  1671. if l <> -4 then
  1672. begin
  1673. inc(l,4);
  1674. for l1 := l1 downto 1 do
  1675. begin
  1676. getNextInstruction(hp2,hp2);
  1677. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1678. end
  1679. end
  1680. end
  1681. end
  1682. else
  1683. begin
  1684. if (taicpu(p).oper[0]^.typ = top_reg) and
  1685. GetNextInstruction(p, hp1) and
  1686. (tai(hp1).typ=ait_instruction) and
  1687. (taicpu(hp1).opcode=A_PUSH) and
  1688. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1689. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1690. begin
  1691. { change it to a two op operation }
  1692. taicpu(p).oper[1]^.typ:=top_none;
  1693. taicpu(p).ops:=2;
  1694. taicpu(p).opcode := A_MOV;
  1695. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1696. reference_reset(tmpref);
  1697. TmpRef.base.enum := R_ESP;
  1698. taicpu(p).loadRef(0,TmpRef);
  1699. asml.remove(hp1);
  1700. hp1.free;
  1701. end;
  1702. end;
  1703. end;
  1704. *)
  1705. A_PUSH:
  1706. begin
  1707. if (taicpu(p).opsize = S_W) and
  1708. (taicpu(p).oper[0]^.typ = Top_Const) and
  1709. GetNextInstruction(p, hp1) and
  1710. (tai(hp1).typ = ait_instruction) and
  1711. (taicpu(hp1).opcode = A_PUSH) and
  1712. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1713. (taicpu(hp1).opsize = S_W) then
  1714. begin
  1715. taicpu(p).changeopsize(S_L);
  1716. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1717. asml.remove(hp1);
  1718. hp1.free;
  1719. end;
  1720. end;
  1721. A_SHL, A_SAL:
  1722. begin
  1723. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1724. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1725. (taicpu(p).opsize = S_L) and
  1726. (taicpu(p).oper[0]^.val <= 3) then
  1727. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1728. begin
  1729. TmpBool1 := True; {should we check the next instruction?}
  1730. TmpBool2 := False; {have we found an add/sub which could be
  1731. integrated in the lea?}
  1732. reference_reset(tmpref,2);
  1733. TmpRef.index := taicpu(p).oper[1]^.reg;
  1734. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1735. while TmpBool1 and
  1736. GetNextInstruction(p, hp1) and
  1737. (tai(hp1).typ = ait_instruction) and
  1738. ((((taicpu(hp1).opcode = A_ADD) or
  1739. (taicpu(hp1).opcode = A_SUB)) and
  1740. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1741. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1742. (((taicpu(hp1).opcode = A_INC) or
  1743. (taicpu(hp1).opcode = A_DEC)) and
  1744. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1745. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1746. (not GetNextInstruction(hp1,hp2) or
  1747. not instrReadsFlags(hp2)) Do
  1748. begin
  1749. TmpBool1 := False;
  1750. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1751. begin
  1752. TmpBool1 := True;
  1753. TmpBool2 := True;
  1754. case taicpu(hp1).opcode of
  1755. A_ADD:
  1756. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1757. A_SUB:
  1758. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1759. end;
  1760. asml.remove(hp1);
  1761. hp1.free;
  1762. end
  1763. else
  1764. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1765. (((taicpu(hp1).opcode = A_ADD) and
  1766. (TmpRef.base = NR_NO)) or
  1767. (taicpu(hp1).opcode = A_INC) or
  1768. (taicpu(hp1).opcode = A_DEC)) then
  1769. begin
  1770. TmpBool1 := True;
  1771. TmpBool2 := True;
  1772. case taicpu(hp1).opcode of
  1773. A_ADD:
  1774. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1775. A_INC:
  1776. inc(TmpRef.offset);
  1777. A_DEC:
  1778. dec(TmpRef.offset);
  1779. end;
  1780. asml.remove(hp1);
  1781. hp1.free;
  1782. end;
  1783. end;
  1784. if TmpBool2 or
  1785. ((current_settings.optimizecputype < cpu_Pentium2) and
  1786. (taicpu(p).oper[0]^.val <= 3) and
  1787. not(cs_opt_size in current_settings.optimizerswitches)) then
  1788. begin
  1789. if not(TmpBool2) and
  1790. (taicpu(p).oper[0]^.val = 1) then
  1791. begin
  1792. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1793. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1794. end
  1795. else
  1796. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1797. taicpu(p).oper[1]^.reg);
  1798. InsertLLItem(asml,p.previous, p.next, hp1);
  1799. p.free;
  1800. p := hp1;
  1801. end;
  1802. end
  1803. else
  1804. if (current_settings.optimizecputype < cpu_Pentium2) and
  1805. (taicpu(p).oper[0]^.typ = top_const) and
  1806. (taicpu(p).oper[1]^.typ = top_reg) then
  1807. if (taicpu(p).oper[0]^.val = 1) then
  1808. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1809. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1810. (unlike shl, which is only Tairable in the U pipe)}
  1811. begin
  1812. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1813. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1814. InsertLLItem(asml,p.previous, p.next, hp1);
  1815. p.free;
  1816. p := hp1;
  1817. end
  1818. else if (taicpu(p).opsize = S_L) and
  1819. (taicpu(p).oper[0]^.val<= 3) then
  1820. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1821. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1822. begin
  1823. reference_reset(tmpref,2);
  1824. TmpRef.index := taicpu(p).oper[1]^.reg;
  1825. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1826. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1827. InsertLLItem(asml,p.previous, p.next, hp1);
  1828. p.free;
  1829. p := hp1;
  1830. end
  1831. end;
  1832. A_SETcc :
  1833. { changes
  1834. setcc (funcres) setcc reg
  1835. movb (funcres), reg to leave/ret
  1836. leave/ret }
  1837. begin
  1838. if (taicpu(p).oper[0]^.typ = top_ref) and
  1839. GetNextInstruction(p, hp1) and
  1840. GetNextInstruction(hp1, hp2) and
  1841. IsExitCode(hp2) and
  1842. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1843. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1844. not(assigned(current_procinfo.procdef.funcretsym) and
  1845. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1846. (hp1.typ = ait_instruction) and
  1847. (taicpu(hp1).opcode = A_MOV) and
  1848. (taicpu(hp1).opsize = S_B) and
  1849. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1850. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1851. begin
  1852. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1853. asml.remove(hp1);
  1854. hp1.free;
  1855. end
  1856. end;
  1857. A_SUB:
  1858. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1859. { * change "sub/add const1, reg" or "dec reg" followed by
  1860. "sub const2, reg" to one "sub ..., reg" }
  1861. begin
  1862. if (taicpu(p).oper[0]^.typ = top_const) and
  1863. (taicpu(p).oper[1]^.typ = top_reg) then
  1864. if (taicpu(p).oper[0]^.val = 2) and
  1865. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1866. { Don't do the sub/push optimization if the sub }
  1867. { comes from setting up the stack frame (JM) }
  1868. (not getLastInstruction(p,hp1) or
  1869. (hp1.typ <> ait_instruction) or
  1870. (taicpu(hp1).opcode <> A_MOV) or
  1871. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1872. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1873. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1874. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1875. begin
  1876. hp1 := tai(p.next);
  1877. while Assigned(hp1) and
  1878. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1879. not regReadByInstruction(RS_ESP,hp1) and
  1880. not regModifiedByInstruction(RS_ESP,hp1) do
  1881. hp1 := tai(hp1.next);
  1882. if Assigned(hp1) and
  1883. (tai(hp1).typ = ait_instruction) and
  1884. (taicpu(hp1).opcode = A_PUSH) and
  1885. (taicpu(hp1).opsize = S_W) then
  1886. begin
  1887. taicpu(hp1).changeopsize(S_L);
  1888. if taicpu(hp1).oper[0]^.typ=top_reg then
  1889. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1890. hp1 := tai(p.next);
  1891. asml.remove(p);
  1892. p.free;
  1893. p := hp1;
  1894. continue
  1895. end;
  1896. if DoSubAddOpt(p) then
  1897. continue;
  1898. end
  1899. else if DoSubAddOpt(p) then
  1900. continue
  1901. end;
  1902. end;
  1903. end; { if is_jmp }
  1904. end;
  1905. end;
  1906. updateUsedRegs(UsedRegs,p);
  1907. p:=tai(p.next);
  1908. end;
  1909. end;
  1910. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1911. {$ifdef DEBUG_AOPTCPU}
  1912. procedure DebugMsg(const s: string;p : tai);
  1913. begin
  1914. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1915. end;
  1916. {$else DEBUG_AOPTCPU}
  1917. procedure DebugMsg(const s: string;p : tai);inline;
  1918. begin
  1919. end;
  1920. {$endif DEBUG_AOPTCPU}
  1921. function CanBeCMOV(p : tai) : boolean;
  1922. begin
  1923. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1924. (taicpu(p).opcode=A_MOV) and
  1925. (taicpu(p).opsize in [S_L,S_W]) and
  1926. ((taicpu(p).oper[0]^.typ = top_reg)
  1927. { we can't use cmov ref,reg because
  1928. ref could be nil and cmov still throws an exception
  1929. if ref=nil but the mov isn't done (FK)
  1930. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1931. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1932. }
  1933. ) and
  1934. (taicpu(p).oper[1]^.typ in [top_reg]);
  1935. end;
  1936. var
  1937. p,hp1,hp2,hp3: tai;
  1938. l : longint;
  1939. condition : tasmcond;
  1940. UsedRegs, TmpUsedRegs: TRegSet;
  1941. carryadd_opcode: Tasmop;
  1942. begin
  1943. p := BlockStart;
  1944. UsedRegs := [];
  1945. while (p <> BlockEnd) Do
  1946. begin
  1947. UpdateUsedRegs(UsedRegs, tai(p.next));
  1948. case p.Typ Of
  1949. Ait_Instruction:
  1950. begin
  1951. if InsContainsSegRef(taicpu(p)) then
  1952. begin
  1953. p := tai(p.next);
  1954. continue;
  1955. end;
  1956. case taicpu(p).opcode Of
  1957. A_Jcc:
  1958. begin
  1959. { jb @@1 cmc
  1960. inc/dec operand --> adc/sbb operand,0
  1961. @@1:
  1962. ... and ...
  1963. jnb @@1
  1964. inc/dec operand --> adc/sbb operand,0
  1965. @@1: }
  1966. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1967. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1968. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1969. begin
  1970. carryadd_opcode:=A_NONE;
  1971. if Taicpu(p).condition in [C_NAE,C_B] then
  1972. begin
  1973. if Taicpu(hp1).opcode=A_INC then
  1974. carryadd_opcode:=A_ADC;
  1975. if Taicpu(hp1).opcode=A_DEC then
  1976. carryadd_opcode:=A_SBB;
  1977. if carryadd_opcode<>A_NONE then
  1978. begin
  1979. Taicpu(p).clearop(0);
  1980. Taicpu(p).ops:=0;
  1981. Taicpu(p).is_jmp:=false;
  1982. Taicpu(p).opcode:=A_CMC;
  1983. Taicpu(p).condition:=C_NONE;
  1984. Taicpu(hp1).ops:=2;
  1985. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1986. Taicpu(hp1).loadconst(0,0);
  1987. Taicpu(hp1).opcode:=carryadd_opcode;
  1988. continue;
  1989. end;
  1990. end;
  1991. if Taicpu(p).condition in [C_AE,C_NB] then
  1992. begin
  1993. if Taicpu(hp1).opcode=A_INC then
  1994. carryadd_opcode:=A_ADC;
  1995. if Taicpu(hp1).opcode=A_DEC then
  1996. carryadd_opcode:=A_SBB;
  1997. if carryadd_opcode<>A_NONE then
  1998. begin
  1999. asml.remove(p);
  2000. p.free;
  2001. Taicpu(hp1).ops:=2;
  2002. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2003. Taicpu(hp1).loadconst(0,0);
  2004. Taicpu(hp1).opcode:=carryadd_opcode;
  2005. p:=hp1;
  2006. continue;
  2007. end;
  2008. end;
  2009. end;
  2010. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2011. begin
  2012. { check for
  2013. jCC xxx
  2014. <several movs>
  2015. xxx:
  2016. }
  2017. l:=0;
  2018. GetNextInstruction(p, hp1);
  2019. while assigned(hp1) and
  2020. CanBeCMOV(hp1) and
  2021. { stop on labels }
  2022. not(hp1.typ=ait_label) do
  2023. begin
  2024. inc(l);
  2025. GetNextInstruction(hp1,hp1);
  2026. end;
  2027. if assigned(hp1) then
  2028. begin
  2029. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2030. begin
  2031. if (l<=4) and (l>0) then
  2032. begin
  2033. condition:=inverse_cond(taicpu(p).condition);
  2034. hp2:=p;
  2035. GetNextInstruction(p,hp1);
  2036. p:=hp1;
  2037. repeat
  2038. taicpu(hp1).opcode:=A_CMOVcc;
  2039. taicpu(hp1).condition:=condition;
  2040. GetNextInstruction(hp1,hp1);
  2041. until not(assigned(hp1)) or
  2042. not(CanBeCMOV(hp1));
  2043. { wait with removing else GetNextInstruction could
  2044. ignore the label if it was the only usage in the
  2045. jump moved away }
  2046. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2047. asml.remove(hp2);
  2048. hp2.free;
  2049. continue;
  2050. end;
  2051. end
  2052. else
  2053. begin
  2054. { check further for
  2055. jCC xxx
  2056. <several movs 1>
  2057. jmp yyy
  2058. xxx:
  2059. <several movs 2>
  2060. yyy:
  2061. }
  2062. { hp2 points to jmp yyy }
  2063. hp2:=hp1;
  2064. { skip hp1 to xxx }
  2065. GetNextInstruction(hp1, hp1);
  2066. if assigned(hp2) and
  2067. assigned(hp1) and
  2068. (l<=3) and
  2069. (hp2.typ=ait_instruction) and
  2070. (taicpu(hp2).is_jmp) and
  2071. (taicpu(hp2).condition=C_None) and
  2072. { real label and jump, no further references to the
  2073. label are allowed }
  2074. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  2075. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2076. begin
  2077. l:=0;
  2078. { skip hp1 to <several moves 2> }
  2079. GetNextInstruction(hp1, hp1);
  2080. while assigned(hp1) and
  2081. CanBeCMOV(hp1) do
  2082. begin
  2083. inc(l);
  2084. GetNextInstruction(hp1, hp1);
  2085. end;
  2086. { hp1 points to yyy: }
  2087. if assigned(hp1) and
  2088. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2089. begin
  2090. condition:=inverse_cond(taicpu(p).condition);
  2091. GetNextInstruction(p,hp1);
  2092. hp3:=p;
  2093. p:=hp1;
  2094. repeat
  2095. taicpu(hp1).opcode:=A_CMOVcc;
  2096. taicpu(hp1).condition:=condition;
  2097. GetNextInstruction(hp1,hp1);
  2098. until not(assigned(hp1)) or
  2099. not(CanBeCMOV(hp1));
  2100. { hp2 is still at jmp yyy }
  2101. GetNextInstruction(hp2,hp1);
  2102. { hp2 is now at xxx: }
  2103. condition:=inverse_cond(condition);
  2104. GetNextInstruction(hp1,hp1);
  2105. { hp1 is now at <several movs 2> }
  2106. repeat
  2107. taicpu(hp1).opcode:=A_CMOVcc;
  2108. taicpu(hp1).condition:=condition;
  2109. GetNextInstruction(hp1,hp1);
  2110. until not(assigned(hp1)) or
  2111. not(CanBeCMOV(hp1));
  2112. {
  2113. asml.remove(hp1.next)
  2114. hp1.next.free;
  2115. asml.remove(hp1);
  2116. hp1.free;
  2117. }
  2118. { remove jCC }
  2119. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2120. asml.remove(hp3);
  2121. hp3.free;
  2122. { remove jmp }
  2123. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2124. asml.remove(hp2);
  2125. hp2.free;
  2126. continue;
  2127. end;
  2128. end;
  2129. end;
  2130. end;
  2131. end;
  2132. end;
  2133. A_FSTP,A_FISTP:
  2134. if doFpuLoadStoreOpt(asmL,p) then
  2135. continue;
  2136. A_IMUL:
  2137. begin
  2138. if (taicpu(p).ops >= 2) and
  2139. ((taicpu(p).oper[0]^.typ = top_const) or
  2140. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2141. (taicpu(p).oper[1]^.typ = top_reg) and
  2142. ((taicpu(p).ops = 2) or
  2143. ((taicpu(p).oper[2]^.typ = top_reg) and
  2144. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2145. getLastInstruction(p,hp1) and
  2146. (hp1.typ = ait_instruction) and
  2147. (taicpu(hp1).opcode = A_MOV) and
  2148. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2149. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2150. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2151. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  2152. begin
  2153. taicpu(p).ops := 3;
  2154. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2155. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2156. asml.remove(hp1);
  2157. hp1.free;
  2158. end;
  2159. end;
  2160. A_MOV:
  2161. begin
  2162. if (taicpu(p).oper[0]^.typ = top_reg) and
  2163. (taicpu(p).oper[1]^.typ = top_reg) and
  2164. GetNextInstruction(p, hp1) and
  2165. (hp1.typ = ait_Instruction) and
  2166. ((taicpu(hp1).opcode = A_MOV) or
  2167. (taicpu(hp1).opcode = A_MOVZX) or
  2168. (taicpu(hp1).opcode = A_MOVSX)) and
  2169. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2170. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2171. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2172. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2173. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2174. {mov reg1, reg2
  2175. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2176. begin
  2177. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2178. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2179. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2180. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2181. asml.remove(p);
  2182. p.free;
  2183. p := hp1;
  2184. continue;
  2185. end
  2186. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2187. GetNextInstruction(p,hp1) and
  2188. (hp1.typ = ait_instruction) and
  2189. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  2190. ((taicpu(hp1).opcode=A_LEA) and
  2191. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  2192. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2193. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  2194. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2195. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  2196. )
  2197. )
  2198. ) and
  2199. GetNextInstruction(hp1,hp2) and
  2200. MatchInstruction(hp2,A_MOV,[]) and
  2201. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2202. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2203. begin
  2204. TmpUsedRegs := UsedRegs;
  2205. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2206. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2207. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2208. hp2, TmpUsedRegs))) then
  2209. { change mov (ref), reg }
  2210. { add/sub/or/... reg2/$const, reg }
  2211. { mov reg, (ref) }
  2212. { # release reg }
  2213. { to add/sub/or/... reg2/$const, (ref) }
  2214. begin
  2215. case taicpu(hp1).opcode of
  2216. A_INC,A_DEC,A_NOT,A_NEG:
  2217. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2218. A_LEA:
  2219. begin
  2220. taicpu(hp1).opcode:=A_ADD;
  2221. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  2222. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2223. else
  2224. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  2225. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2226. DebugMsg('Peephole FoldLea done',hp1);
  2227. end
  2228. else
  2229. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2230. end;
  2231. asml.remove(p);
  2232. asml.remove(hp2);
  2233. p.free;
  2234. hp2.free;
  2235. p := hp1
  2236. end;
  2237. end
  2238. end;
  2239. end;
  2240. end;
  2241. end;
  2242. p := tai(p.next)
  2243. end;
  2244. end;
  2245. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2246. var
  2247. p,hp1,hp2: tai;
  2248. IsTestConstX: boolean;
  2249. begin
  2250. p := BlockStart;
  2251. while (p <> BlockEnd) Do
  2252. begin
  2253. case p.Typ Of
  2254. Ait_Instruction:
  2255. begin
  2256. if InsContainsSegRef(taicpu(p)) then
  2257. begin
  2258. p := tai(p.next);
  2259. continue;
  2260. end;
  2261. case taicpu(p).opcode Of
  2262. A_CALL:
  2263. begin
  2264. { don't do this on modern CPUs, this really hurts them due to
  2265. broken call/ret pairing }
  2266. if (current_settings.optimizecputype < cpu_Pentium2) and
  2267. not(cs_create_pic in current_settings.moduleswitches) and
  2268. GetNextInstruction(p, hp1) and
  2269. (hp1.typ = ait_instruction) and
  2270. (taicpu(hp1).opcode = A_JMP) and
  2271. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2272. begin
  2273. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2274. InsertLLItem(asml, p.previous, p, hp2);
  2275. taicpu(p).opcode := A_JMP;
  2276. taicpu(p).is_jmp := true;
  2277. asml.remove(hp1);
  2278. hp1.free;
  2279. end
  2280. { replace
  2281. call procname
  2282. ret
  2283. by
  2284. jmp procname
  2285. this should never hurt except when pic is used, not sure
  2286. how to handle it then
  2287. but do it only on level 4 because it destroys stack back traces
  2288. }
  2289. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2290. not(cs_create_pic in current_settings.moduleswitches) and
  2291. GetNextInstruction(p, hp1) and
  2292. (hp1.typ = ait_instruction) and
  2293. (taicpu(hp1).opcode = A_RET) and
  2294. (taicpu(hp1).ops=0) then
  2295. begin
  2296. taicpu(p).opcode := A_JMP;
  2297. taicpu(p).is_jmp := true;
  2298. asml.remove(hp1);
  2299. hp1.free;
  2300. end;
  2301. end;
  2302. A_CMP:
  2303. begin
  2304. if (taicpu(p).oper[0]^.typ = top_const) and
  2305. (taicpu(p).oper[0]^.val = 0) and
  2306. (taicpu(p).oper[1]^.typ = top_reg) then
  2307. {change "cmp $0, %reg" to "test %reg, %reg"}
  2308. begin
  2309. taicpu(p).opcode := A_TEST;
  2310. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2311. continue;
  2312. end;
  2313. end;
  2314. (*
  2315. Optimization is not safe; xor clears the carry flag.
  2316. See test/tgadint64 in the test suite.
  2317. A_MOV:
  2318. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2319. (taicpu(p).oper[0]^.val = 0) and
  2320. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2321. { change "mov $0, %reg" into "xor %reg, %reg" }
  2322. begin
  2323. taicpu(p).opcode := A_XOR;
  2324. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2325. end;
  2326. *)
  2327. A_MOVZX:
  2328. { if register vars are on, it's possible there is code like }
  2329. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2330. { so we can't safely replace the movzx then with xor/mov, }
  2331. { since that would change the flags (JM) }
  2332. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2333. begin
  2334. if (taicpu(p).oper[1]^.typ = top_reg) then
  2335. if (taicpu(p).oper[0]^.typ = top_reg)
  2336. then
  2337. case taicpu(p).opsize of
  2338. S_BL:
  2339. begin
  2340. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2341. not(cs_opt_size in current_settings.optimizerswitches) and
  2342. (current_settings.optimizecputype = cpu_Pentium) then
  2343. {Change "movzbl %reg1, %reg2" to
  2344. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2345. PentiumMMX}
  2346. begin
  2347. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2348. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2349. InsertLLItem(asml,p.previous, p, hp1);
  2350. taicpu(p).opcode := A_MOV;
  2351. taicpu(p).changeopsize(S_B);
  2352. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2353. end;
  2354. end;
  2355. end
  2356. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2357. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2358. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2359. not(cs_opt_size in current_settings.optimizerswitches) and
  2360. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2361. (current_settings.optimizecputype = cpu_Pentium) and
  2362. (taicpu(p).opsize = S_BL) then
  2363. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2364. Pentium and PentiumMMX}
  2365. begin
  2366. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2367. taicpu(p).oper[1]^.reg);
  2368. taicpu(p).opcode := A_MOV;
  2369. taicpu(p).changeopsize(S_B);
  2370. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2371. InsertLLItem(asml,p.previous, p, hp1);
  2372. end;
  2373. end;
  2374. A_TEST, A_OR:
  2375. {removes the line marked with (x) from the sequence
  2376. and/or/xor/add/sub/... $x, %y
  2377. test/or %y, %y | test $-1, %y (x)
  2378. j(n)z _Label
  2379. as the first instruction already adjusts the ZF
  2380. %y operand may also be a reference }
  2381. begin
  2382. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2383. MatchOperand(taicpu(p).oper[0]^,-1);
  2384. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2385. GetLastInstruction(p, hp1) and
  2386. (tai(hp1).typ = ait_instruction) and
  2387. GetNextInstruction(p,hp2) and
  2388. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2389. case taicpu(hp1).opcode Of
  2390. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2391. begin
  2392. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2393. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2394. { and in case of carry for A(E)/B(E)/C/NC }
  2395. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2396. ((taicpu(hp1).opcode <> A_ADD) and
  2397. (taicpu(hp1).opcode <> A_SUB))) then
  2398. begin
  2399. hp1 := tai(p.next);
  2400. asml.remove(p);
  2401. p.free;
  2402. p := tai(hp1);
  2403. continue
  2404. end;
  2405. end;
  2406. A_SHL, A_SAL, A_SHR, A_SAR:
  2407. begin
  2408. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2409. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2410. { therefore, it's only safe to do this optimization for }
  2411. { shifts by a (nonzero) constant }
  2412. (taicpu(hp1).oper[0]^.typ = top_const) and
  2413. (taicpu(hp1).oper[0]^.val <> 0) and
  2414. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2415. { and in case of carry for A(E)/B(E)/C/NC }
  2416. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2417. begin
  2418. hp1 := tai(p.next);
  2419. asml.remove(p);
  2420. p.free;
  2421. p := tai(hp1);
  2422. continue
  2423. end;
  2424. end;
  2425. A_DEC, A_INC, A_NEG:
  2426. begin
  2427. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2428. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2429. { and in case of carry for A(E)/B(E)/C/NC }
  2430. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2431. begin
  2432. case taicpu(hp1).opcode Of
  2433. A_DEC, A_INC:
  2434. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2435. begin
  2436. case taicpu(hp1).opcode Of
  2437. A_DEC: taicpu(hp1).opcode := A_SUB;
  2438. A_INC: taicpu(hp1).opcode := A_ADD;
  2439. end;
  2440. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2441. taicpu(hp1).loadConst(0,1);
  2442. taicpu(hp1).ops:=2;
  2443. end
  2444. end;
  2445. hp1 := tai(p.next);
  2446. asml.remove(p);
  2447. p.free;
  2448. p := tai(hp1);
  2449. continue
  2450. end;
  2451. end
  2452. else
  2453. { change "test $-1,%reg" into "test %reg,%reg" }
  2454. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2455. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2456. end { case }
  2457. else
  2458. { change "test $-1,%reg" into "test %reg,%reg" }
  2459. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2460. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2461. end;
  2462. end;
  2463. end;
  2464. end;
  2465. p := tai(p.next)
  2466. end;
  2467. end;
  2468. end.