aoptcpu.pas 137 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function IsExitCode(p : tai) : boolean;
  33. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  34. procedure RemoveLastDeallocForFuncRes(p : tai);
  35. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  36. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  37. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  38. end;
  39. Var
  40. AsmOptimizer : TCpuAsmOptimizer;
  41. Implementation
  42. uses
  43. verbose,globtype,globals,
  44. cutils,
  45. aoptbase,
  46. cpuinfo,
  47. aasmcpu,
  48. procinfo,
  49. cgutils,cgx86,
  50. { units we should get rid off: }
  51. symsym,symconst;
  52. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  53. begin
  54. isFoldableArithOp := False;
  55. case hp1.opcode of
  56. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  57. isFoldableArithOp :=
  58. ((taicpu(hp1).oper[0]^.typ = top_const) or
  59. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  60. (taicpu(hp1).oper[0]^.reg <> reg))) and
  61. (taicpu(hp1).oper[1]^.typ = top_reg) and
  62. (taicpu(hp1).oper[1]^.reg = reg);
  63. A_INC,A_DEC,A_NEG,A_NOT:
  64. isFoldableArithOp :=
  65. (taicpu(hp1).oper[0]^.typ = top_reg) and
  66. (taicpu(hp1).oper[0]^.reg = reg);
  67. end;
  68. end;
  69. function TCpuAsmOptimizer.IsExitCode(p : tai) : boolean;
  70. var
  71. hp2,hp3 : tai;
  72. begin
  73. result:=(p.typ=ait_instruction) and
  74. ((taicpu(p).opcode = A_RET) or
  75. ((taicpu(p).opcode=A_LEAVE) and
  76. GetNextInstruction(p,hp2) and
  77. (hp2.typ=ait_instruction) and
  78. (taicpu(hp2).opcode=A_RET)
  79. ) or
  80. ((taicpu(p).opcode=A_MOV) and
  81. (taicpu(p).oper[0]^.typ=top_reg) and
  82. (taicpu(p).oper[0]^.reg=NR_EBP) and
  83. (taicpu(p).oper[1]^.typ=top_reg) and
  84. (taicpu(p).oper[1]^.reg=NR_ESP) and
  85. GetNextInstruction(p,hp2) and
  86. (hp2.typ=ait_instruction) and
  87. (taicpu(hp2).opcode=A_POP) and
  88. (taicpu(hp2).oper[0]^.typ=top_reg) and
  89. (taicpu(hp2).oper[0]^.reg=NR_EBP) and
  90. GetNextInstruction(hp2,hp3) and
  91. (hp3.typ=ait_instruction) and
  92. (taicpu(hp3).opcode=A_RET)
  93. )
  94. );
  95. end;
  96. procedure TCPUAsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  97. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  98. var
  99. hp2: tai;
  100. begin
  101. hp2 := p;
  102. repeat
  103. hp2 := tai(hp2.previous);
  104. if assigned(hp2) and
  105. (hp2.typ = ait_regalloc) and
  106. (tai_regalloc(hp2).ratype=ra_dealloc) and
  107. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  108. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  109. begin
  110. asml.remove(hp2);
  111. hp2.free;
  112. break;
  113. end;
  114. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  115. end;
  116. begin
  117. case current_procinfo.procdef.returndef.typ of
  118. arraydef,recorddef,pointerdef,
  119. stringdef,enumdef,procdef,objectdef,errordef,
  120. filedef,setdef,procvardef,
  121. classrefdef,forwarddef:
  122. DoRemoveLastDeallocForFuncRes(RS_EAX);
  123. orddef:
  124. if current_procinfo.procdef.returndef.size <> 0 then
  125. begin
  126. DoRemoveLastDeallocForFuncRes(RS_EAX);
  127. { for int64/qword }
  128. if current_procinfo.procdef.returndef.size = 8 then
  129. DoRemoveLastDeallocForFuncRes(RS_EDX);
  130. end;
  131. end;
  132. end;
  133. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  134. { returns true if a "continue" should be done after this optimization }
  135. var hp1, hp2: tai;
  136. begin
  137. DoFpuLoadStoreOpt := false;
  138. if (taicpu(p).oper[0]^.typ = top_ref) and
  139. getNextInstruction(p, hp1) and
  140. (hp1.typ = ait_instruction) and
  141. (((taicpu(hp1).opcode = A_FLD) and
  142. (taicpu(p).opcode = A_FSTP)) or
  143. ((taicpu(p).opcode = A_FISTP) and
  144. (taicpu(hp1).opcode = A_FILD))) and
  145. (taicpu(hp1).oper[0]^.typ = top_ref) and
  146. (taicpu(hp1).opsize = taicpu(p).opsize) and
  147. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  148. begin
  149. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  150. if (taicpu(p).opsize=S_FX) and
  151. getNextInstruction(hp1, hp2) and
  152. (hp2.typ = ait_instruction) and
  153. IsExitCode(hp2) and
  154. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  155. not(assigned(current_procinfo.procdef.funcretsym) and
  156. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  157. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  158. begin
  159. asml.remove(p);
  160. asml.remove(hp1);
  161. p.free;
  162. hp1.free;
  163. p := hp2;
  164. removeLastDeallocForFuncRes(p);
  165. doFPULoadStoreOpt := true;
  166. end
  167. (* can't be done because the store operation rounds
  168. else
  169. { fst can't store an extended value! }
  170. if (taicpu(p).opsize <> S_FX) and
  171. (taicpu(p).opsize <> S_IQ) then
  172. begin
  173. if (taicpu(p).opcode = A_FSTP) then
  174. taicpu(p).opcode := A_FST
  175. else taicpu(p).opcode := A_FIST;
  176. asml.remove(hp1);
  177. hp1.free;
  178. end
  179. *)
  180. end;
  181. end;
  182. { allocates register reg between (and including) instructions p1 and p2
  183. the type of p1 and p2 must not be in SkipInstr
  184. note that this routine is both called from the peephole optimizer
  185. where optinfo is not yet initialised) and from the cse (where it is) }
  186. procedure TCpuAsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  187. var
  188. hp, start: tai;
  189. removedsomething,
  190. firstRemovedWasAlloc,
  191. lastRemovedWasDealloc: boolean;
  192. begin
  193. {$ifdef EXTDEBUG}
  194. { if assigned(p1.optinfo) and
  195. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  196. internalerror(2004101010); }
  197. {$endif EXTDEBUG}
  198. start := p1;
  199. if (reg = NR_ESP) or
  200. (reg = current_procinfo.framepointer) or
  201. not(assigned(p1)) then
  202. { this happens with registers which are loaded implicitely, outside the }
  203. { current block (e.g. esi with self) }
  204. exit;
  205. { make sure we allocate it for this instruction }
  206. getnextinstruction(p2,p2);
  207. lastRemovedWasDealloc := false;
  208. removedSomething := false;
  209. firstRemovedWasAlloc := false;
  210. {$ifdef allocregdebug}
  211. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  212. ' from here...'));
  213. insertllitem(asml,p1.previous,p1,hp);
  214. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  215. ' till here...'));
  216. insertllitem(asml,p2,p2.next,hp);
  217. {$endif allocregdebug}
  218. if not(RegInUsedRegs(reg,initialusedregs)) then
  219. begin
  220. hp := tai_regalloc.alloc(reg,nil);
  221. insertllItem(p1.previous,p1,hp);
  222. IncludeRegInUsedRegs(reg,initialusedregs);
  223. end;
  224. while assigned(p1) and
  225. (p1 <> p2) do
  226. begin
  227. if assigned(p1.optinfo) then
  228. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  229. p1 := tai(p1.next);
  230. repeat
  231. while assigned(p1) and
  232. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  233. p1 := tai(p1.next);
  234. { remove all allocation/deallocation info about the register in between }
  235. if assigned(p1) and
  236. (p1.typ = ait_regalloc) then
  237. if tai_regalloc(p1).reg=reg then
  238. begin
  239. if not removedSomething then
  240. begin
  241. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  242. removedSomething := true;
  243. end;
  244. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  245. hp := tai(p1.Next);
  246. asml.Remove(p1);
  247. p1.free;
  248. p1 := hp;
  249. end
  250. else p1 := tai(p1.next);
  251. until not(assigned(p1)) or
  252. not(p1.typ in SkipInstr);
  253. end;
  254. if assigned(p1) then
  255. begin
  256. if firstRemovedWasAlloc then
  257. begin
  258. hp := tai_regalloc.Alloc(reg,nil);
  259. insertLLItem(start.previous,start,hp);
  260. end;
  261. if lastRemovedWasDealloc then
  262. begin
  263. hp := tai_regalloc.DeAlloc(reg,nil);
  264. insertLLItem(p1.previous,p1,hp);
  265. end;
  266. end;
  267. end;
  268. { converts a TChange variable to a TRegister }
  269. function tch2reg(ch: tinschange): tsuperregister;
  270. const
  271. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  272. begin
  273. if (ch <= CH_REDI) then
  274. tch2reg := ch2reg[ch]
  275. else if (ch <= CH_WEDI) then
  276. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  277. else if (ch <= CH_RWEDI) then
  278. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  279. else if (ch <= CH_MEDI) then
  280. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  281. else
  282. InternalError(2016041901)
  283. end;
  284. { Checks if the register is a 32 bit general purpose register }
  285. function isgp32reg(reg: TRegister): boolean;
  286. begin
  287. {$push}{$warnings off}
  288. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  289. {$pop}
  290. end;
  291. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  292. begin
  293. Result:=RegReadByInstruction(reg,hp);
  294. end;
  295. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  296. var
  297. p: taicpu;
  298. opcount: longint;
  299. begin
  300. RegReadByInstruction := false;
  301. if hp.typ <> ait_instruction then
  302. exit;
  303. p := taicpu(hp);
  304. case p.opcode of
  305. A_CALL:
  306. regreadbyinstruction := true;
  307. A_IMUL:
  308. case p.ops of
  309. 1:
  310. regReadByInstruction :=
  311. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  312. 2,3:
  313. regReadByInstruction :=
  314. reginop(reg,p.oper[0]^) or
  315. reginop(reg,p.oper[1]^);
  316. end;
  317. A_IDIV,A_DIV,A_MUL:
  318. begin
  319. regReadByInstruction :=
  320. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  321. end;
  322. else
  323. begin
  324. for opcount := 0 to p.ops-1 do
  325. if (p.oper[opCount]^.typ = top_ref) and
  326. RegInRef(reg,p.oper[opcount]^.ref^) then
  327. begin
  328. RegReadByInstruction := true;
  329. exit
  330. end;
  331. for opcount := 1 to maxinschanges do
  332. case insprop[p.opcode].ch[opcount] of
  333. CH_REAX..CH_REDI,CH_RWEAX..CH_MEDI:
  334. if getsupreg(reg) = tch2reg(insprop[p.opcode].ch[opcount]) then
  335. begin
  336. RegReadByInstruction := true;
  337. exit
  338. end;
  339. CH_RWOP1,CH_ROP1,CH_MOP1:
  340. if reginop(reg,p.oper[0]^) then
  341. begin
  342. RegReadByInstruction := true;
  343. exit
  344. end;
  345. Ch_RWOP2,Ch_ROP2,Ch_MOP2:
  346. if reginop(reg,p.oper[1]^) then
  347. begin
  348. RegReadByInstruction := true;
  349. exit
  350. end;
  351. Ch_RWOP3,Ch_ROP3,Ch_MOP3:
  352. if reginop(reg,p.oper[2]^) then
  353. begin
  354. RegReadByInstruction := true;
  355. exit
  356. end;
  357. Ch_RFlags,Ch_RWFlags:
  358. if reg=NR_DEFAULTFLAGS then
  359. begin
  360. RegReadByInstruction := true;
  361. exit
  362. end;
  363. end;
  364. end;
  365. end;
  366. end;
  367. { returns true if p contains a memory operand with a segment set }
  368. function InsContainsSegRef(p: taicpu): boolean;
  369. var
  370. i: longint;
  371. begin
  372. result:=true;
  373. for i:=0 to p.opercnt-1 do
  374. if (p.oper[i]^.typ=top_ref) and
  375. (p.oper[i]^.ref^.segment<>NR_NO) then
  376. exit;
  377. result:=false;
  378. end;
  379. function InstrReadsFlags(p: tai): boolean;
  380. var
  381. l: longint;
  382. begin
  383. InstrReadsFlags := true;
  384. case p.typ of
  385. ait_instruction:
  386. begin
  387. for l := 1 to maxinschanges do
  388. if InsProp[taicpu(p).opcode].Ch[l] in [Ch_RFlags,Ch_RWFlags,Ch_All] then
  389. exit;
  390. end;
  391. ait_label:
  392. exit;
  393. end;
  394. InstrReadsFlags := false;
  395. end;
  396. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  397. var
  398. p,hp1: tai;
  399. l: aint;
  400. tmpRef: treference;
  401. begin
  402. p := BlockStart;
  403. while (p <> BlockEnd) Do
  404. begin
  405. case p.Typ Of
  406. Ait_Instruction:
  407. begin
  408. if InsContainsSegRef(taicpu(p)) then
  409. begin
  410. p := tai(p.next);
  411. continue;
  412. end;
  413. case taicpu(p).opcode Of
  414. A_IMUL:
  415. {changes certain "imul const, %reg"'s to lea sequences}
  416. begin
  417. if (taicpu(p).oper[0]^.typ = Top_Const) and
  418. (taicpu(p).oper[1]^.typ = Top_Reg) and
  419. (taicpu(p).opsize = S_L) then
  420. if (taicpu(p).oper[0]^.val = 1) then
  421. if (taicpu(p).ops = 2) then
  422. {remove "imul $1, reg"}
  423. begin
  424. hp1 := tai(p.Next);
  425. asml.remove(p);
  426. p.free;
  427. p := hp1;
  428. continue;
  429. end
  430. else
  431. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  432. begin
  433. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  434. InsertLLItem(p.previous, p.next, hp1);
  435. p.free;
  436. p := hp1;
  437. end
  438. else if
  439. ((taicpu(p).ops <= 2) or
  440. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  441. (taicpu(p).oper[0]^.val <= 12) and
  442. not(cs_opt_size in current_settings.optimizerswitches) and
  443. (not(GetNextInstruction(p, hp1)) or
  444. {GetNextInstruction(p, hp1) and}
  445. not((tai(hp1).typ = ait_instruction) and
  446. ((taicpu(hp1).opcode=A_Jcc) and
  447. (taicpu(hp1).condition in [C_O,C_NO])))) then
  448. begin
  449. reference_reset(tmpref,1);
  450. case taicpu(p).oper[0]^.val Of
  451. 3: begin
  452. {imul 3, reg1, reg2 to
  453. lea (reg1,reg1,2), reg2
  454. imul 3, reg1 to
  455. lea (reg1,reg1,2), reg1}
  456. TmpRef.base := taicpu(p).oper[1]^.reg;
  457. TmpRef.index := taicpu(p).oper[1]^.reg;
  458. TmpRef.ScaleFactor := 2;
  459. if (taicpu(p).ops = 2) then
  460. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  461. else
  462. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  463. InsertLLItem(p.previous, p.next, hp1);
  464. p.free;
  465. p := hp1;
  466. end;
  467. 5: begin
  468. {imul 5, reg1, reg2 to
  469. lea (reg1,reg1,4), reg2
  470. imul 5, reg1 to
  471. lea (reg1,reg1,4), reg1}
  472. TmpRef.base := taicpu(p).oper[1]^.reg;
  473. TmpRef.index := taicpu(p).oper[1]^.reg;
  474. TmpRef.ScaleFactor := 4;
  475. if (taicpu(p).ops = 2) then
  476. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  477. else
  478. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  479. InsertLLItem(p.previous, p.next, hp1);
  480. p.free;
  481. p := hp1;
  482. end;
  483. 6: begin
  484. {imul 6, reg1, reg2 to
  485. lea (,reg1,2), reg2
  486. lea (reg2,reg1,4), reg2
  487. imul 6, reg1 to
  488. lea (reg1,reg1,2), reg1
  489. add reg1, reg1}
  490. if (current_settings.optimizecputype <= cpu_386) then
  491. begin
  492. TmpRef.index := taicpu(p).oper[1]^.reg;
  493. if (taicpu(p).ops = 3) then
  494. begin
  495. TmpRef.base := taicpu(p).oper[2]^.reg;
  496. TmpRef.ScaleFactor := 4;
  497. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  498. end
  499. else
  500. begin
  501. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  502. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  503. end;
  504. InsertLLItem(p, p.next, hp1);
  505. reference_reset(tmpref,2);
  506. TmpRef.index := taicpu(p).oper[1]^.reg;
  507. TmpRef.ScaleFactor := 2;
  508. if (taicpu(p).ops = 3) then
  509. begin
  510. TmpRef.base := NR_NO;
  511. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  512. taicpu(p).oper[2]^.reg);
  513. end
  514. else
  515. begin
  516. TmpRef.base := taicpu(p).oper[1]^.reg;
  517. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  518. end;
  519. InsertLLItem(p.previous, p.next, hp1);
  520. p.free;
  521. p := tai(hp1.next);
  522. end
  523. end;
  524. 9: begin
  525. {imul 9, reg1, reg2 to
  526. lea (reg1,reg1,8), reg2
  527. imul 9, reg1 to
  528. lea (reg1,reg1,8), reg1}
  529. TmpRef.base := taicpu(p).oper[1]^.reg;
  530. TmpRef.index := taicpu(p).oper[1]^.reg;
  531. TmpRef.ScaleFactor := 8;
  532. if (taicpu(p).ops = 2) then
  533. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  534. else
  535. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  536. InsertLLItem(p.previous, p.next, hp1);
  537. p.free;
  538. p := hp1;
  539. end;
  540. 10: begin
  541. {imul 10, reg1, reg2 to
  542. lea (reg1,reg1,4), reg2
  543. add reg2, reg2
  544. imul 10, reg1 to
  545. lea (reg1,reg1,4), reg1
  546. add reg1, reg1}
  547. if (current_settings.optimizecputype <= cpu_386) then
  548. begin
  549. if (taicpu(p).ops = 3) then
  550. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  551. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  552. else
  553. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  554. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  555. InsertLLItem(p, p.next, hp1);
  556. TmpRef.base := taicpu(p).oper[1]^.reg;
  557. TmpRef.index := taicpu(p).oper[1]^.reg;
  558. TmpRef.ScaleFactor := 4;
  559. if (taicpu(p).ops = 3) then
  560. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  561. else
  562. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  563. InsertLLItem(p.previous, p.next, hp1);
  564. p.free;
  565. p := tai(hp1.next);
  566. end
  567. end;
  568. 12: begin
  569. {imul 12, reg1, reg2 to
  570. lea (,reg1,4), reg2
  571. lea (reg2,reg1,8), reg2
  572. imul 12, reg1 to
  573. lea (reg1,reg1,2), reg1
  574. lea (,reg1,4), reg1}
  575. if (current_settings.optimizecputype <= cpu_386)
  576. then
  577. begin
  578. TmpRef.index := taicpu(p).oper[1]^.reg;
  579. if (taicpu(p).ops = 3) then
  580. begin
  581. TmpRef.base := taicpu(p).oper[2]^.reg;
  582. TmpRef.ScaleFactor := 8;
  583. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  584. end
  585. else
  586. begin
  587. TmpRef.base := NR_NO;
  588. TmpRef.ScaleFactor := 4;
  589. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  590. end;
  591. InsertLLItem(p, p.next, hp1);
  592. reference_reset(tmpref,2);
  593. TmpRef.index := taicpu(p).oper[1]^.reg;
  594. if (taicpu(p).ops = 3) then
  595. begin
  596. TmpRef.base := NR_NO;
  597. TmpRef.ScaleFactor := 4;
  598. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  599. end
  600. else
  601. begin
  602. TmpRef.base := taicpu(p).oper[1]^.reg;
  603. TmpRef.ScaleFactor := 2;
  604. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  605. end;
  606. InsertLLItem(p.previous, p.next, hp1);
  607. p.free;
  608. p := tai(hp1.next);
  609. end
  610. end
  611. end;
  612. end;
  613. end;
  614. A_SAR, A_SHR:
  615. {changes the code sequence
  616. shr/sar const1, x
  617. shl const2, x
  618. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  619. begin
  620. if GetNextInstruction(p, hp1) and
  621. (tai(hp1).typ = ait_instruction) and
  622. (taicpu(hp1).opcode = A_SHL) and
  623. (taicpu(p).oper[0]^.typ = top_const) and
  624. (taicpu(hp1).oper[0]^.typ = top_const) and
  625. (taicpu(hp1).opsize = taicpu(p).opsize) and
  626. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  627. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  628. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  629. not(cs_opt_size in current_settings.optimizerswitches) then
  630. { shr/sar const1, %reg
  631. shl const2, %reg
  632. with const1 > const2 }
  633. begin
  634. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  635. taicpu(hp1).opcode := A_AND;
  636. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  637. case taicpu(p).opsize Of
  638. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  639. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  640. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  641. end;
  642. end
  643. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  644. not(cs_opt_size in current_settings.optimizerswitches) then
  645. { shr/sar const1, %reg
  646. shl const2, %reg
  647. with const1 < const2 }
  648. begin
  649. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  650. taicpu(p).opcode := A_AND;
  651. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  652. case taicpu(p).opsize Of
  653. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  654. S_B: taicpu(p).loadConst(0,l Xor $ff);
  655. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  656. end;
  657. end
  658. else
  659. { shr/sar const1, %reg
  660. shl const2, %reg
  661. with const1 = const2 }
  662. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  663. begin
  664. taicpu(p).opcode := A_AND;
  665. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  666. case taicpu(p).opsize Of
  667. S_B: taicpu(p).loadConst(0,l Xor $ff);
  668. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  669. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  670. end;
  671. asml.remove(hp1);
  672. hp1.free;
  673. end;
  674. end;
  675. A_XOR:
  676. if (taicpu(p).oper[0]^.typ = top_reg) and
  677. (taicpu(p).oper[1]^.typ = top_reg) and
  678. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  679. { temporarily change this to 'mov reg,0' to make it easier }
  680. { for the CSE. Will be changed back in pass 2 }
  681. begin
  682. taicpu(p).opcode := A_MOV;
  683. taicpu(p).loadConst(0,0);
  684. end;
  685. end;
  686. end;
  687. end;
  688. p := tai(p.next)
  689. end;
  690. end;
  691. { skips all labels and returns the next "real" instruction }
  692. function SkipLabels(hp: tai; var hp2: tai): boolean;
  693. begin
  694. while assigned(hp.next) and
  695. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  696. hp := tai(hp.next);
  697. if assigned(hp.next) then
  698. begin
  699. SkipLabels := True;
  700. hp2 := tai(hp.next)
  701. end
  702. else
  703. begin
  704. hp2 := hp;
  705. SkipLabels := False
  706. end;
  707. end;
  708. { First pass of peephole optimizations }
  709. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  710. {$ifdef DEBUG_AOPTCPU}
  711. procedure DebugMsg(const s: string;p : tai);
  712. begin
  713. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  714. end;
  715. {$else DEBUG_AOPTCPU}
  716. procedure DebugMsg(const s: string;p : tai);inline;
  717. begin
  718. end;
  719. {$endif DEBUG_AOPTCPU}
  720. function WriteOk : Boolean;
  721. begin
  722. writeln('Ok');
  723. Result:=True;
  724. end;
  725. var
  726. l : longint;
  727. p,hp1,hp2 : tai;
  728. hp3,hp4: tai;
  729. v:aint;
  730. TmpRef: TReference;
  731. TmpUsedRegs: TAllUsedRegs;
  732. TmpBool1, TmpBool2: Boolean;
  733. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  734. {traces sucessive jumps to their final destination and sets it, e.g.
  735. je l1 je l3
  736. <code> <code>
  737. l1: becomes l1:
  738. je l2 je l3
  739. <code> <code>
  740. l2: l2:
  741. jmp l3 jmp l3
  742. the level parameter denotes how deeep we have already followed the jump,
  743. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  744. var p1, p2: tai;
  745. l: tasmlabel;
  746. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  747. begin
  748. FindAnyLabel := false;
  749. while assigned(hp.next) and
  750. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  751. hp := tai(hp.next);
  752. if assigned(hp.next) and
  753. (tai(hp.next).typ = ait_label) then
  754. begin
  755. FindAnyLabel := true;
  756. l := tai_label(hp.next).labsym;
  757. end
  758. end;
  759. begin
  760. GetfinalDestination := false;
  761. if level > 20 then
  762. exit;
  763. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  764. if assigned(p1) then
  765. begin
  766. SkipLabels(p1,p1);
  767. if (tai(p1).typ = ait_instruction) and
  768. (taicpu(p1).is_jmp) then
  769. if { the next instruction after the label where the jump hp arrives}
  770. { is unconditional or of the same type as hp, so continue }
  771. (taicpu(p1).condition in [C_None,hp.condition]) or
  772. { the next instruction after the label where the jump hp arrives}
  773. { is the opposite of hp (so this one is never taken), but after }
  774. { that one there is a branch that will be taken, so perform a }
  775. { little hack: set p1 equal to this instruction (that's what the}
  776. { last SkipLabels is for, only works with short bool evaluation)}
  777. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  778. SkipLabels(p1,p2) and
  779. (p2.typ = ait_instruction) and
  780. (taicpu(p2).is_jmp) and
  781. (taicpu(p2).condition in [C_None,hp.condition]) and
  782. SkipLabels(p1,p1)) then
  783. begin
  784. { quick check for loops of the form "l5: ; jmp l5 }
  785. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  786. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  787. exit;
  788. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  789. exit;
  790. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  791. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  792. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  793. end
  794. else
  795. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  796. if not FindAnyLabel(p1,l) then
  797. begin
  798. {$ifdef finaldestdebug}
  799. insertllitem(asml,p1,p1.next,tai_comment.Create(
  800. strpnew('previous label inserted'))));
  801. {$endif finaldestdebug}
  802. current_asmdata.getjumplabel(l);
  803. insertllitem(p1,p1.next,tai_label.Create(l));
  804. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  805. hp.oper[0]^.ref^.symbol := l;
  806. l.increfs;
  807. { this won't work, since the new label isn't in the labeltable }
  808. { so it will fail the rangecheck. Labeltable should become a }
  809. { hashtable to support this: }
  810. { GetFinalDestination(asml, hp); }
  811. end
  812. else
  813. begin
  814. {$ifdef finaldestdebug}
  815. insertllitem(asml,p1,p1.next,tai_comment.Create(
  816. strpnew('next label reused'))));
  817. {$endif finaldestdebug}
  818. l.increfs;
  819. hp.oper[0]^.ref^.symbol := l;
  820. if not GetFinalDestination(asml, hp,succ(level)) then
  821. exit;
  822. end;
  823. end;
  824. GetFinalDestination := true;
  825. end;
  826. function DoSubAddOpt(var p: tai): Boolean;
  827. begin
  828. DoSubAddOpt := False;
  829. if GetLastInstruction(p, hp1) and
  830. (hp1.typ = ait_instruction) and
  831. (taicpu(hp1).opsize = taicpu(p).opsize) then
  832. case taicpu(hp1).opcode Of
  833. A_DEC:
  834. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  835. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  836. begin
  837. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  838. asml.remove(hp1);
  839. hp1.free;
  840. end;
  841. A_SUB:
  842. if (taicpu(hp1).oper[0]^.typ = top_const) and
  843. (taicpu(hp1).oper[1]^.typ = top_reg) and
  844. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  845. begin
  846. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  847. asml.remove(hp1);
  848. hp1.free;
  849. end;
  850. A_ADD:
  851. if (taicpu(hp1).oper[0]^.typ = top_const) and
  852. (taicpu(hp1).oper[1]^.typ = top_reg) and
  853. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  854. begin
  855. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  856. asml.remove(hp1);
  857. hp1.free;
  858. if (taicpu(p).oper[0]^.val = 0) then
  859. begin
  860. hp1 := tai(p.next);
  861. asml.remove(p);
  862. p.free;
  863. if not GetLastInstruction(hp1, p) then
  864. p := hp1;
  865. DoSubAddOpt := True;
  866. end
  867. end;
  868. end;
  869. end;
  870. begin
  871. p := BlockStart;
  872. ClearUsedRegs;
  873. while (p <> BlockEnd) Do
  874. begin
  875. UpDateUsedRegs(UsedRegs, tai(p.next));
  876. case p.Typ Of
  877. ait_instruction:
  878. begin
  879. current_filepos:=taicpu(p).fileinfo;
  880. if InsContainsSegRef(taicpu(p)) then
  881. begin
  882. p := tai(p.next);
  883. continue;
  884. end;
  885. { Handle Jmp Optimizations }
  886. if taicpu(p).is_jmp then
  887. begin
  888. {the following if-block removes all code between a jmp and the next label,
  889. because it can never be executed}
  890. if (taicpu(p).opcode = A_JMP) then
  891. begin
  892. hp2:=p;
  893. while GetNextInstruction(hp2, hp1) and
  894. (hp1.typ <> ait_label) do
  895. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  896. begin
  897. { don't kill start/end of assembler block,
  898. no-line-info-start/end etc }
  899. if hp1.typ<>ait_marker then
  900. begin
  901. asml.remove(hp1);
  902. hp1.free;
  903. end
  904. else
  905. hp2:=hp1;
  906. end
  907. else break;
  908. end;
  909. { remove jumps to a label coming right after them }
  910. if GetNextInstruction(p, hp1) then
  911. begin
  912. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  913. { TODO: FIXME removing the first instruction fails}
  914. (p<>blockstart) then
  915. begin
  916. hp2:=tai(hp1.next);
  917. asml.remove(p);
  918. p.free;
  919. p:=hp2;
  920. continue;
  921. end
  922. else
  923. begin
  924. if hp1.typ = ait_label then
  925. SkipLabels(hp1,hp1);
  926. if (tai(hp1).typ=ait_instruction) and
  927. (taicpu(hp1).opcode=A_JMP) and
  928. GetNextInstruction(hp1, hp2) and
  929. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  930. begin
  931. if taicpu(p).opcode=A_Jcc then
  932. begin
  933. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  934. tai_label(hp2).labsym.decrefs;
  935. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  936. { when free'ing hp1, the ref. isn't decresed, so we don't
  937. increase it (FK)
  938. taicpu(p).oper[0]^.ref^.symbol.increfs;
  939. }
  940. asml.remove(hp1);
  941. hp1.free;
  942. GetFinalDestination(asml, taicpu(p),0);
  943. end
  944. else
  945. begin
  946. GetFinalDestination(asml, taicpu(p),0);
  947. p:=tai(p.next);
  948. continue;
  949. end;
  950. end
  951. else
  952. GetFinalDestination(asml, taicpu(p),0);
  953. end;
  954. end;
  955. end
  956. else
  957. { All other optimizes }
  958. begin
  959. for l := 0 to taicpu(p).ops-1 Do
  960. if (taicpu(p).oper[l]^.typ = top_ref) then
  961. With taicpu(p).oper[l]^.ref^ Do
  962. begin
  963. if (base = NR_NO) and
  964. (index <> NR_NO) and
  965. (scalefactor in [0,1]) then
  966. begin
  967. base := index;
  968. index := NR_NO
  969. end
  970. end;
  971. case taicpu(p).opcode Of
  972. A_AND:
  973. begin
  974. if (taicpu(p).oper[0]^.typ = top_const) and
  975. (taicpu(p).oper[1]^.typ = top_reg) and
  976. GetNextInstruction(p, hp1) and
  977. (tai(hp1).typ = ait_instruction) and
  978. (taicpu(hp1).opcode = A_AND) and
  979. (taicpu(hp1).oper[0]^.typ = top_const) and
  980. (taicpu(hp1).oper[1]^.typ = top_reg) and
  981. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  982. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) then
  983. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  984. begin
  985. taicpu(hp1).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  986. asml.remove(p);
  987. p.free;
  988. p:=hp1;
  989. end
  990. else
  991. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  992. jump, but only if it's a conditional jump (PFV) }
  993. if (taicpu(p).oper[1]^.typ = top_reg) and
  994. GetNextInstruction(p, hp1) and
  995. (hp1.typ = ait_instruction) and
  996. (taicpu(hp1).is_jmp) and
  997. (taicpu(hp1).opcode<>A_JMP) and
  998. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  999. taicpu(p).opcode := A_TEST;
  1000. end;
  1001. A_CMP:
  1002. begin
  1003. { cmp register,$8000 neg register
  1004. je target --> jo target
  1005. .... only if register is deallocated before jump.}
  1006. case Taicpu(p).opsize of
  1007. S_B: v:=$80;
  1008. S_W: v:=$8000;
  1009. S_L: v:=aint($80000000);
  1010. else
  1011. internalerror(2013112905);
  1012. end;
  1013. if (taicpu(p).oper[0]^.typ=Top_const) and
  1014. (taicpu(p).oper[0]^.val=v) and
  1015. (Taicpu(p).oper[1]^.typ=top_reg) and
  1016. GetNextInstruction(p, hp1) and
  1017. (hp1.typ=ait_instruction) and
  1018. (taicpu(hp1).opcode=A_Jcc) and
  1019. (Taicpu(hp1).condition in [C_E,C_NE]) and
  1020. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  1021. begin
  1022. Taicpu(p).opcode:=A_NEG;
  1023. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  1024. Taicpu(p).clearop(1);
  1025. Taicpu(p).ops:=1;
  1026. if Taicpu(hp1).condition=C_E then
  1027. Taicpu(hp1).condition:=C_O
  1028. else
  1029. Taicpu(hp1).condition:=C_NO;
  1030. continue;
  1031. end;
  1032. {
  1033. @@2: @@2:
  1034. .... ....
  1035. cmp operand1,0
  1036. jle/jbe @@1
  1037. dec operand1 --> sub operand1,1
  1038. jmp @@2 jge/jae @@2
  1039. @@1: @@1:
  1040. ... ....}
  1041. if (taicpu(p).oper[0]^.typ = top_const) and
  1042. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  1043. (taicpu(p).oper[0]^.val = 0) and
  1044. GetNextInstruction(p, hp1) and
  1045. (hp1.typ = ait_instruction) and
  1046. (taicpu(hp1).is_jmp) and
  1047. (taicpu(hp1).opcode=A_Jcc) and
  1048. (taicpu(hp1).condition in [C_LE,C_BE]) and
  1049. GetNextInstruction(hp1,hp2) and
  1050. (hp2.typ = ait_instruction) and
  1051. (taicpu(hp2).opcode = A_DEC) and
  1052. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  1053. GetNextInstruction(hp2, hp3) and
  1054. (hp3.typ = ait_instruction) and
  1055. (taicpu(hp3).is_jmp) and
  1056. (taicpu(hp3).opcode = A_JMP) and
  1057. GetNextInstruction(hp3, hp4) and
  1058. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  1059. begin
  1060. taicpu(hp2).Opcode := A_SUB;
  1061. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  1062. taicpu(hp2).loadConst(0,1);
  1063. taicpu(hp2).ops:=2;
  1064. taicpu(hp3).Opcode := A_Jcc;
  1065. case taicpu(hp1).condition of
  1066. C_LE: taicpu(hp3).condition := C_GE;
  1067. C_BE: taicpu(hp3).condition := C_AE;
  1068. end;
  1069. asml.remove(p);
  1070. asml.remove(hp1);
  1071. p.free;
  1072. hp1.free;
  1073. p := hp2;
  1074. continue;
  1075. end
  1076. end;
  1077. A_FLD:
  1078. begin
  1079. if (taicpu(p).oper[0]^.typ = top_reg) and
  1080. GetNextInstruction(p, hp1) and
  1081. (hp1.typ = Ait_Instruction) and
  1082. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1083. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1084. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  1085. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  1086. { change to
  1087. fld reg fxxx reg,st
  1088. fxxxp st, st1 (hp1)
  1089. Remark: non commutative operations must be reversed!
  1090. }
  1091. begin
  1092. case taicpu(hp1).opcode Of
  1093. A_FMULP,A_FADDP,
  1094. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1095. begin
  1096. case taicpu(hp1).opcode Of
  1097. A_FADDP: taicpu(hp1).opcode := A_FADD;
  1098. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  1099. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  1100. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  1101. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  1102. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  1103. end;
  1104. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  1105. taicpu(hp1).oper[1]^.reg := NR_ST;
  1106. asml.remove(p);
  1107. p.free;
  1108. p := hp1;
  1109. continue;
  1110. end;
  1111. end;
  1112. end
  1113. else
  1114. if (taicpu(p).oper[0]^.typ = top_ref) and
  1115. GetNextInstruction(p, hp2) and
  1116. (hp2.typ = Ait_Instruction) and
  1117. (taicpu(hp2).ops = 2) and
  1118. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1119. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1120. (taicpu(p).opsize in [S_FS, S_FL]) and
  1121. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  1122. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  1123. if GetLastInstruction(p, hp1) and
  1124. (hp1.typ = Ait_Instruction) and
  1125. ((taicpu(hp1).opcode = A_FLD) or
  1126. (taicpu(hp1).opcode = A_FST)) and
  1127. (taicpu(hp1).opsize = taicpu(p).opsize) and
  1128. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1129. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  1130. if ((taicpu(hp2).opcode = A_FMULP) or
  1131. (taicpu(hp2).opcode = A_FADDP)) then
  1132. { change to
  1133. fld/fst mem1 (hp1) fld/fst mem1
  1134. fld mem1 (p) fadd/
  1135. faddp/ fmul st, st
  1136. fmulp st, st1 (hp2) }
  1137. begin
  1138. asml.remove(p);
  1139. p.free;
  1140. p := hp1;
  1141. if (taicpu(hp2).opcode = A_FADDP) then
  1142. taicpu(hp2).opcode := A_FADD
  1143. else
  1144. taicpu(hp2).opcode := A_FMUL;
  1145. taicpu(hp2).oper[1]^.reg := NR_ST;
  1146. end
  1147. else
  1148. { change to
  1149. fld/fst mem1 (hp1) fld/fst mem1
  1150. fld mem1 (p) fld st}
  1151. begin
  1152. taicpu(p).changeopsize(S_FL);
  1153. taicpu(p).loadreg(0,NR_ST);
  1154. end
  1155. else
  1156. begin
  1157. case taicpu(hp2).opcode Of
  1158. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  1159. { change to
  1160. fld/fst mem1 (hp1) fld/fst mem1
  1161. fld mem2 (p) fxxx mem2
  1162. fxxxp st, st1 (hp2) }
  1163. begin
  1164. case taicpu(hp2).opcode Of
  1165. A_FADDP: taicpu(p).opcode := A_FADD;
  1166. A_FMULP: taicpu(p).opcode := A_FMUL;
  1167. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  1168. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  1169. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1170. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1171. end;
  1172. asml.remove(hp2);
  1173. hp2.free;
  1174. end
  1175. end
  1176. end
  1177. end;
  1178. A_FSTP,A_FISTP:
  1179. if doFpuLoadStoreOpt(p) then
  1180. continue;
  1181. A_LEA:
  1182. begin
  1183. {removes seg register prefixes from LEA operations, as they
  1184. don't do anything}
  1185. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1186. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1187. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1188. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1189. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1190. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1191. begin
  1192. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1193. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1194. begin
  1195. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1196. taicpu(p).oper[1]^.reg);
  1197. InsertLLItem(p.previous,p.next, hp1);
  1198. p.free;
  1199. p := hp1;
  1200. continue;
  1201. end
  1202. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1203. begin
  1204. hp1 := tai(p.Next);
  1205. asml.remove(p);
  1206. p.free;
  1207. p := hp1;
  1208. continue;
  1209. end
  1210. { continue to use lea to adjust the stack pointer,
  1211. it is the recommended way, but only if not optimizing for size }
  1212. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1213. (cs_opt_size in current_settings.optimizerswitches) then
  1214. with taicpu(p).oper[0]^.ref^ do
  1215. if (base = taicpu(p).oper[1]^.reg) then
  1216. begin
  1217. l := offset;
  1218. if (l=1) and UseIncDec then
  1219. begin
  1220. taicpu(p).opcode := A_INC;
  1221. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1222. taicpu(p).ops := 1
  1223. end
  1224. else if (l=-1) and UseIncDec then
  1225. begin
  1226. taicpu(p).opcode := A_DEC;
  1227. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1228. taicpu(p).ops := 1;
  1229. end
  1230. else
  1231. begin
  1232. if (l<0) and (l<>-2147483648) then
  1233. begin
  1234. taicpu(p).opcode := A_SUB;
  1235. taicpu(p).loadConst(0,-l);
  1236. end
  1237. else
  1238. begin
  1239. taicpu(p).opcode := A_ADD;
  1240. taicpu(p).loadConst(0,l);
  1241. end;
  1242. end;
  1243. end;
  1244. end
  1245. (*
  1246. This is unsafe, lea doesn't modify the flags but "add"
  1247. does. This breaks webtbs/tw15694.pp. The above
  1248. transformations are also unsafe, but they don't seem to
  1249. be triggered by code that FPC generators (or that at
  1250. least does not occur in the tests...). This needs to be
  1251. fixed by checking for the liveness of the flags register.
  1252. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1253. begin
  1254. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1255. taicpu(p).oper[0]^.ref^.base);
  1256. InsertLLItem(asml,p.previous,p.next, hp1);
  1257. DebugMsg('Peephole Lea2AddBase done',hp1);
  1258. p.free;
  1259. p:=hp1;
  1260. continue;
  1261. end
  1262. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1263. begin
  1264. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1265. taicpu(p).oper[0]^.ref^.index);
  1266. InsertLLItem(asml,p.previous,p.next,hp1);
  1267. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1268. p.free;
  1269. p:=hp1;
  1270. continue;
  1271. end
  1272. *)
  1273. end;
  1274. A_MOV:
  1275. begin
  1276. if (taicpu(p).oper[1]^.typ = top_reg) and
  1277. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1278. GetNextInstruction(p, hp1) and
  1279. (tai(hp1).typ = ait_instruction) and
  1280. (taicpu(hp1).opcode = A_MOV) and
  1281. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1282. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1283. begin
  1284. CopyUsedRegs(TmpUsedRegs);
  1285. {we have "mov x, %treg; mov %treg, y}
  1286. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1287. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1288. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  1289. case taicpu(p).oper[0]^.typ Of
  1290. top_reg:
  1291. begin
  1292. { change "mov %reg, %treg; mov %treg, y"
  1293. to "mov %reg, y" }
  1294. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1295. asml.remove(hp1);
  1296. hp1.free;
  1297. ReleaseUsedRegs(TmpUsedRegs);
  1298. continue;
  1299. end;
  1300. top_ref:
  1301. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1302. begin
  1303. { change "mov mem, %treg; mov %treg, %reg"
  1304. to "mov mem, %reg" }
  1305. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1306. asml.remove(hp1);
  1307. hp1.free;
  1308. ReleaseUsedRegs(TmpUsedRegs);
  1309. continue;
  1310. end;
  1311. end;
  1312. ReleaseUsedRegs(TmpUsedRegs);
  1313. end
  1314. else
  1315. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  1316. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  1317. penalty}
  1318. if (taicpu(p).oper[0]^.typ = top_reg) and
  1319. (taicpu(p).oper[1]^.typ = top_reg) and
  1320. GetNextInstruction(p,hp1) and
  1321. (tai(hp1).typ = ait_instruction) and
  1322. (taicpu(hp1).ops >= 1) and
  1323. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1324. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  1325. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  1326. begin
  1327. if ((taicpu(hp1).opcode = A_OR) or
  1328. (taicpu(hp1).opcode = A_TEST)) and
  1329. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1330. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1331. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  1332. begin
  1333. CopyUsedRegs(TmpUsedRegs);
  1334. { reg1 will be used after the first instruction, }
  1335. { so update the allocation info }
  1336. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1337. if GetNextInstruction(hp1, hp2) and
  1338. (hp2.typ = ait_instruction) and
  1339. taicpu(hp2).is_jmp and
  1340. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1341. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  1342. "test %reg1, %reg1; jxx" }
  1343. begin
  1344. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1345. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1346. asml.remove(p);
  1347. p.free;
  1348. p := hp1;
  1349. ReleaseUsedRegs(TmpUsedRegs);
  1350. continue
  1351. end
  1352. else
  1353. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1354. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1355. begin
  1356. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1357. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1358. end;
  1359. ReleaseUsedRegs(TmpUsedRegs);
  1360. end
  1361. { else
  1362. if (taicpu(p.next)^.opcode
  1363. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1364. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1365. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1366. end
  1367. else
  1368. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1369. x >= RetOffset) as it doesn't do anything (it writes either to a
  1370. parameter or to the temporary storage room for the function
  1371. result)}
  1372. if GetNextInstruction(p, hp1) and
  1373. (tai(hp1).typ = ait_instruction) then
  1374. if IsExitCode(hp1) and
  1375. (taicpu(p).oper[1]^.typ = top_ref) and
  1376. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1377. not(assigned(current_procinfo.procdef.funcretsym) and
  1378. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1379. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1380. (taicpu(p).oper[0]^.typ = top_reg) then
  1381. begin
  1382. asml.remove(p);
  1383. p.free;
  1384. p := hp1;
  1385. RemoveLastDeallocForFuncRes(p);
  1386. end
  1387. else
  1388. if (taicpu(p).oper[0]^.typ = top_reg) and
  1389. (taicpu(p).oper[1]^.typ = top_ref) and
  1390. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1391. (taicpu(hp1).opcode = A_CMP) and
  1392. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1393. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1394. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1395. begin
  1396. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1397. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1398. end;
  1399. { Next instruction is also a MOV ? }
  1400. if GetNextInstruction(p, hp1) and
  1401. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1402. begin
  1403. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1404. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1405. {mov reg1, mem1 or mov mem1, reg1
  1406. mov mem2, reg2 mov reg2, mem2}
  1407. begin
  1408. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1409. {mov reg1, mem1 or mov mem1, reg1
  1410. mov mem2, reg1 mov reg2, mem1}
  1411. begin
  1412. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1413. { Removes the second statement from
  1414. mov reg1, mem1/reg2
  1415. mov mem1/reg2, reg1 }
  1416. begin
  1417. if (taicpu(p).oper[0]^.typ = top_reg) then
  1418. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1419. asml.remove(hp1);
  1420. hp1.free;
  1421. end
  1422. else
  1423. begin
  1424. CopyUsedRegs(TmpUsedRegs);
  1425. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1426. if (taicpu(p).oper[1]^.typ = top_ref) and
  1427. { mov reg1, mem1
  1428. mov mem2, reg1 }
  1429. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1430. GetNextInstruction(hp1, hp2) and
  1431. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1432. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1433. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1434. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1435. { change to
  1436. mov reg1, mem1 mov reg1, mem1
  1437. mov mem2, reg1 cmp reg1, mem2
  1438. cmp mem1, reg1 }
  1439. begin
  1440. asml.remove(hp2);
  1441. hp2.free;
  1442. taicpu(hp1).opcode := A_CMP;
  1443. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1444. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1445. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1446. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  1447. end;
  1448. ReleaseUsedRegs(TmpUsedRegs);
  1449. end;
  1450. end
  1451. else
  1452. begin
  1453. CopyUsedRegs(TmpUsedRegs);
  1454. if GetNextInstruction(hp1, hp2) and
  1455. (taicpu(p).oper[0]^.typ = top_ref) and
  1456. (taicpu(p).oper[1]^.typ = top_reg) and
  1457. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1458. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1459. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1460. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1461. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1462. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1463. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1464. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1465. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1466. { mov mem1, %reg1
  1467. mov %reg1, mem2
  1468. mov mem2, reg2
  1469. to:
  1470. mov mem1, reg2
  1471. mov reg2, mem2}
  1472. begin
  1473. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1474. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1475. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1476. asml.remove(hp2);
  1477. hp2.free;
  1478. end
  1479. else
  1480. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1481. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1482. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1483. { mov mem1, reg1 mov mem1, reg1
  1484. mov reg1, mem2 mov reg1, mem2
  1485. mov mem2, reg2 mov mem2, reg1
  1486. to: to:
  1487. mov mem1, reg1 mov mem1, reg1
  1488. mov mem1, reg2 mov reg1, mem2
  1489. mov reg1, mem2
  1490. or (if mem1 depends on reg1
  1491. and/or if mem2 depends on reg2)
  1492. to:
  1493. mov mem1, reg1
  1494. mov reg1, mem2
  1495. mov reg1, reg2
  1496. }
  1497. begin
  1498. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1499. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1500. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1501. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1502. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1503. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1504. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1505. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1506. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1507. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1508. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1509. end
  1510. else
  1511. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1512. begin
  1513. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1514. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1515. end
  1516. else
  1517. begin
  1518. asml.remove(hp2);
  1519. hp2.free;
  1520. end;
  1521. ReleaseUsedRegs(TmpUsedRegs);
  1522. end;
  1523. end
  1524. else
  1525. (* {movl [mem1],reg1
  1526. movl [mem1],reg2
  1527. to:
  1528. movl [mem1],reg1
  1529. movl reg1,reg2 }
  1530. if (taicpu(p).oper[0]^.typ = top_ref) and
  1531. (taicpu(p).oper[1]^.typ = top_reg) and
  1532. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1533. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1534. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1535. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1536. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1537. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1538. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1539. else*)
  1540. { movl const1,[mem1]
  1541. movl [mem1],reg1
  1542. to:
  1543. movl const1,reg1
  1544. movl reg1,[mem1] }
  1545. if (taicpu(p).oper[0]^.typ = top_const) and
  1546. (taicpu(p).oper[1]^.typ = top_ref) and
  1547. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1548. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1549. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1550. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1551. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1552. begin
  1553. allocregbetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1554. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1555. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1556. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1557. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1558. end
  1559. end;
  1560. if GetNextInstruction(p, hp1) and
  1561. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1562. GetNextInstruction(hp1, hp2) and
  1563. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1564. MatchOperand(Taicpu(p).oper[0]^,0) and
  1565. (Taicpu(p).oper[1]^.typ = top_reg) and
  1566. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1567. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1568. {mov reg1,0
  1569. bts reg1,operand1 --> mov reg1,operand2
  1570. or reg1,operand2 bts reg1,operand1}
  1571. begin
  1572. Taicpu(hp2).opcode:=A_MOV;
  1573. asml.remove(hp1);
  1574. insertllitem(hp2,hp2.next,hp1);
  1575. asml.remove(p);
  1576. p.free;
  1577. p:=hp1;
  1578. end;
  1579. if GetNextInstruction(p, hp1) and
  1580. MatchInstruction(hp1,A_LEA,[S_L]) and
  1581. (Taicpu(p).oper[0]^.typ = top_ref) and
  1582. (Taicpu(p).oper[1]^.typ = top_reg) and
  1583. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1584. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1585. ) or
  1586. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1587. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1588. )
  1589. ) then
  1590. {mov reg1,ref
  1591. lea reg2,[reg1,reg2] --> add reg2,ref}
  1592. begin
  1593. CopyUsedRegs(TmpUsedRegs);
  1594. { reg1 may not be used afterwards }
  1595. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1596. begin
  1597. Taicpu(hp1).opcode:=A_ADD;
  1598. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1599. DebugMsg('Peephole MovLea2Add done',hp1);
  1600. asml.remove(p);
  1601. p.free;
  1602. p:=hp1;
  1603. end;
  1604. ReleaseUsedRegs(TmpUsedRegs);
  1605. end;
  1606. end;
  1607. A_MOVSX,
  1608. A_MOVZX :
  1609. begin
  1610. if (taicpu(p).oper[1]^.typ = top_reg) and
  1611. GetNextInstruction(p,hp1) and
  1612. (hp1.typ = ait_instruction) and
  1613. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1614. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1615. GetNextInstruction(hp1,hp2) and
  1616. MatchInstruction(hp2,A_MOV,[]) and
  1617. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1618. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1619. (((taicpu(hp1).ops=2) and
  1620. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1621. ((taicpu(hp1).ops=1) and
  1622. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1623. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1624. { change movsX/movzX reg/ref, reg2 }
  1625. { add/sub/or/... reg3/$const, reg2 }
  1626. { mov reg2 reg/ref }
  1627. { to add/sub/or/... reg3/$const, reg/ref }
  1628. begin
  1629. { by example:
  1630. movswl %si,%eax movswl %si,%eax p
  1631. decl %eax addl %edx,%eax hp1
  1632. movw %ax,%si movw %ax,%si hp2
  1633. ->
  1634. movswl %si,%eax movswl %si,%eax p
  1635. decw %eax addw %edx,%eax hp1
  1636. movw %ax,%si movw %ax,%si hp2
  1637. }
  1638. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1639. {
  1640. ->
  1641. movswl %si,%eax movswl %si,%eax p
  1642. decw %si addw %dx,%si hp1
  1643. movw %ax,%si movw %ax,%si hp2
  1644. }
  1645. case taicpu(hp1).ops of
  1646. 1:
  1647. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1648. 2:
  1649. begin
  1650. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1651. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1652. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1653. end;
  1654. else
  1655. internalerror(2008042701);
  1656. end;
  1657. {
  1658. ->
  1659. decw %si addw %dx,%si p
  1660. }
  1661. asml.remove(p);
  1662. asml.remove(hp2);
  1663. p.free;
  1664. hp2.free;
  1665. p := hp1
  1666. end
  1667. { removes superfluous And's after movzx's }
  1668. else if taicpu(p).opcode=A_MOVZX then
  1669. begin
  1670. if (taicpu(p).oper[1]^.typ = top_reg) and
  1671. GetNextInstruction(p, hp1) and
  1672. (tai(hp1).typ = ait_instruction) and
  1673. (taicpu(hp1).opcode = A_AND) and
  1674. (taicpu(hp1).oper[0]^.typ = top_const) and
  1675. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1676. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1677. case taicpu(p).opsize Of
  1678. S_BL, S_BW:
  1679. if (taicpu(hp1).oper[0]^.val = $ff) then
  1680. begin
  1681. asml.remove(hp1);
  1682. hp1.free;
  1683. end;
  1684. S_WL:
  1685. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1686. begin
  1687. asml.remove(hp1);
  1688. hp1.free;
  1689. end;
  1690. end;
  1691. {changes some movzx constructs to faster synonims (all examples
  1692. are given with eax/ax, but are also valid for other registers)}
  1693. if (taicpu(p).oper[1]^.typ = top_reg) then
  1694. if (taicpu(p).oper[0]^.typ = top_reg) then
  1695. case taicpu(p).opsize of
  1696. S_BW:
  1697. begin
  1698. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1699. not(cs_opt_size in current_settings.optimizerswitches) then
  1700. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1701. begin
  1702. taicpu(p).opcode := A_AND;
  1703. taicpu(p).changeopsize(S_W);
  1704. taicpu(p).loadConst(0,$ff);
  1705. end
  1706. else if GetNextInstruction(p, hp1) and
  1707. (tai(hp1).typ = ait_instruction) and
  1708. (taicpu(hp1).opcode = A_AND) and
  1709. (taicpu(hp1).oper[0]^.typ = top_const) and
  1710. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1711. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1712. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1713. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1714. begin
  1715. taicpu(p).opcode := A_MOV;
  1716. taicpu(p).changeopsize(S_W);
  1717. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1718. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1719. end;
  1720. end;
  1721. S_BL:
  1722. begin
  1723. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1724. not(cs_opt_size in current_settings.optimizerswitches) then
  1725. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1726. begin
  1727. taicpu(p).opcode := A_AND;
  1728. taicpu(p).changeopsize(S_L);
  1729. taicpu(p).loadConst(0,$ff)
  1730. end
  1731. else if GetNextInstruction(p, hp1) and
  1732. (tai(hp1).typ = ait_instruction) and
  1733. (taicpu(hp1).opcode = A_AND) and
  1734. (taicpu(hp1).oper[0]^.typ = top_const) and
  1735. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1736. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1737. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1738. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1739. begin
  1740. taicpu(p).opcode := A_MOV;
  1741. taicpu(p).changeopsize(S_L);
  1742. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1743. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1744. end
  1745. end;
  1746. S_WL:
  1747. begin
  1748. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1749. not(cs_opt_size in current_settings.optimizerswitches) then
  1750. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1751. begin
  1752. taicpu(p).opcode := A_AND;
  1753. taicpu(p).changeopsize(S_L);
  1754. taicpu(p).loadConst(0,$ffff);
  1755. end
  1756. else if GetNextInstruction(p, hp1) and
  1757. (tai(hp1).typ = ait_instruction) and
  1758. (taicpu(hp1).opcode = A_AND) and
  1759. (taicpu(hp1).oper[0]^.typ = top_const) and
  1760. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1761. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1762. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1763. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1764. begin
  1765. taicpu(p).opcode := A_MOV;
  1766. taicpu(p).changeopsize(S_L);
  1767. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1768. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1769. end;
  1770. end;
  1771. end
  1772. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1773. begin
  1774. if GetNextInstruction(p, hp1) and
  1775. (tai(hp1).typ = ait_instruction) and
  1776. (taicpu(hp1).opcode = A_AND) and
  1777. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1778. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1779. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1780. begin
  1781. taicpu(p).opcode := A_MOV;
  1782. case taicpu(p).opsize Of
  1783. S_BL:
  1784. begin
  1785. taicpu(p).changeopsize(S_L);
  1786. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1787. end;
  1788. S_WL:
  1789. begin
  1790. taicpu(p).changeopsize(S_L);
  1791. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1792. end;
  1793. S_BW:
  1794. begin
  1795. taicpu(p).changeopsize(S_W);
  1796. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1797. end;
  1798. end;
  1799. end;
  1800. end;
  1801. end;
  1802. end;
  1803. (* should not be generated anymore by the current code generator
  1804. A_POP:
  1805. begin
  1806. if target_info.system=system_i386_go32v2 then
  1807. begin
  1808. { Transform a series of pop/pop/pop/push/push/push to }
  1809. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1810. { because I'm not sure whether they can cope with }
  1811. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1812. { such a problem when using esp as frame pointer (JM) }
  1813. if (taicpu(p).oper[0]^.typ = top_reg) then
  1814. begin
  1815. hp1 := p;
  1816. hp2 := p;
  1817. l := 0;
  1818. while getNextInstruction(hp1,hp1) and
  1819. (hp1.typ = ait_instruction) and
  1820. (taicpu(hp1).opcode = A_POP) and
  1821. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1822. begin
  1823. hp2 := hp1;
  1824. inc(l,4);
  1825. end;
  1826. getLastInstruction(p,hp3);
  1827. l1 := 0;
  1828. while (hp2 <> hp3) and
  1829. assigned(hp1) and
  1830. (hp1.typ = ait_instruction) and
  1831. (taicpu(hp1).opcode = A_PUSH) and
  1832. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1833. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1834. begin
  1835. { change it to a two op operation }
  1836. taicpu(hp2).oper[1]^.typ:=top_none;
  1837. taicpu(hp2).ops:=2;
  1838. taicpu(hp2).opcode := A_MOV;
  1839. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1840. reference_reset(tmpref);
  1841. tmpRef.base.enum:=R_INTREGISTER;
  1842. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1843. convert_register_to_enum(tmpref.base);
  1844. tmpRef.offset := l;
  1845. taicpu(hp2).loadRef(0,tmpRef);
  1846. hp4 := hp1;
  1847. getNextInstruction(hp1,hp1);
  1848. asml.remove(hp4);
  1849. hp4.free;
  1850. getLastInstruction(hp2,hp2);
  1851. dec(l,4);
  1852. inc(l1);
  1853. end;
  1854. if l <> -4 then
  1855. begin
  1856. inc(l,4);
  1857. for l1 := l1 downto 1 do
  1858. begin
  1859. getNextInstruction(hp2,hp2);
  1860. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1861. end
  1862. end
  1863. end
  1864. end
  1865. else
  1866. begin
  1867. if (taicpu(p).oper[0]^.typ = top_reg) and
  1868. GetNextInstruction(p, hp1) and
  1869. (tai(hp1).typ=ait_instruction) and
  1870. (taicpu(hp1).opcode=A_PUSH) and
  1871. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1872. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1873. begin
  1874. { change it to a two op operation }
  1875. taicpu(p).oper[1]^.typ:=top_none;
  1876. taicpu(p).ops:=2;
  1877. taicpu(p).opcode := A_MOV;
  1878. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1879. reference_reset(tmpref);
  1880. TmpRef.base.enum := R_ESP;
  1881. taicpu(p).loadRef(0,TmpRef);
  1882. asml.remove(hp1);
  1883. hp1.free;
  1884. end;
  1885. end;
  1886. end;
  1887. *)
  1888. A_PUSH:
  1889. begin
  1890. if (taicpu(p).opsize = S_W) and
  1891. (taicpu(p).oper[0]^.typ = Top_Const) and
  1892. GetNextInstruction(p, hp1) and
  1893. (tai(hp1).typ = ait_instruction) and
  1894. (taicpu(hp1).opcode = A_PUSH) and
  1895. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1896. (taicpu(hp1).opsize = S_W) then
  1897. begin
  1898. taicpu(p).changeopsize(S_L);
  1899. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1900. asml.remove(hp1);
  1901. hp1.free;
  1902. end;
  1903. end;
  1904. A_SHL, A_SAL:
  1905. begin
  1906. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1907. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1908. (taicpu(p).opsize = S_L) and
  1909. (taicpu(p).oper[0]^.val <= 3) then
  1910. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1911. begin
  1912. TmpBool1 := True; {should we check the next instruction?}
  1913. TmpBool2 := False; {have we found an add/sub which could be
  1914. integrated in the lea?}
  1915. reference_reset(tmpref,2);
  1916. TmpRef.index := taicpu(p).oper[1]^.reg;
  1917. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1918. while TmpBool1 and
  1919. GetNextInstruction(p, hp1) and
  1920. (tai(hp1).typ = ait_instruction) and
  1921. ((((taicpu(hp1).opcode = A_ADD) or
  1922. (taicpu(hp1).opcode = A_SUB)) and
  1923. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1924. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1925. (((taicpu(hp1).opcode = A_INC) or
  1926. (taicpu(hp1).opcode = A_DEC)) and
  1927. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1928. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1929. (not GetNextInstruction(hp1,hp2) or
  1930. not instrReadsFlags(hp2)) Do
  1931. begin
  1932. TmpBool1 := False;
  1933. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1934. begin
  1935. TmpBool1 := True;
  1936. TmpBool2 := True;
  1937. case taicpu(hp1).opcode of
  1938. A_ADD:
  1939. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1940. A_SUB:
  1941. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1942. end;
  1943. asml.remove(hp1);
  1944. hp1.free;
  1945. end
  1946. else
  1947. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1948. (((taicpu(hp1).opcode = A_ADD) and
  1949. (TmpRef.base = NR_NO)) or
  1950. (taicpu(hp1).opcode = A_INC) or
  1951. (taicpu(hp1).opcode = A_DEC)) then
  1952. begin
  1953. TmpBool1 := True;
  1954. TmpBool2 := True;
  1955. case taicpu(hp1).opcode of
  1956. A_ADD:
  1957. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1958. A_INC:
  1959. inc(TmpRef.offset);
  1960. A_DEC:
  1961. dec(TmpRef.offset);
  1962. end;
  1963. asml.remove(hp1);
  1964. hp1.free;
  1965. end;
  1966. end;
  1967. if TmpBool2 or
  1968. ((current_settings.optimizecputype < cpu_Pentium2) and
  1969. (taicpu(p).oper[0]^.val <= 3) and
  1970. not(cs_opt_size in current_settings.optimizerswitches)) then
  1971. begin
  1972. if not(TmpBool2) and
  1973. (taicpu(p).oper[0]^.val = 1) then
  1974. begin
  1975. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1976. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1977. end
  1978. else
  1979. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1980. taicpu(p).oper[1]^.reg);
  1981. InsertLLItem(p.previous, p.next, hp1);
  1982. p.free;
  1983. p := hp1;
  1984. end;
  1985. end
  1986. else
  1987. if (current_settings.optimizecputype < cpu_Pentium2) and
  1988. (taicpu(p).oper[0]^.typ = top_const) and
  1989. (taicpu(p).oper[1]^.typ = top_reg) then
  1990. if (taicpu(p).oper[0]^.val = 1) then
  1991. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1992. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1993. (unlike shl, which is only Tairable in the U pipe)}
  1994. begin
  1995. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1996. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1997. InsertLLItem(p.previous, p.next, hp1);
  1998. p.free;
  1999. p := hp1;
  2000. end
  2001. else if (taicpu(p).opsize = S_L) and
  2002. (taicpu(p).oper[0]^.val<= 3) then
  2003. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2004. "shl $3, %reg" to "lea (,%reg,8), %reg}
  2005. begin
  2006. reference_reset(tmpref,2);
  2007. TmpRef.index := taicpu(p).oper[1]^.reg;
  2008. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2009. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2010. InsertLLItem(p.previous, p.next, hp1);
  2011. p.free;
  2012. p := hp1;
  2013. end
  2014. end;
  2015. A_SETcc :
  2016. { changes
  2017. setcc (funcres) setcc reg
  2018. movb (funcres), reg to leave/ret
  2019. leave/ret }
  2020. begin
  2021. if (taicpu(p).oper[0]^.typ = top_ref) and
  2022. GetNextInstruction(p, hp1) and
  2023. GetNextInstruction(hp1, hp2) and
  2024. IsExitCode(hp2) and
  2025. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  2026. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  2027. not(assigned(current_procinfo.procdef.funcretsym) and
  2028. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  2029. (hp1.typ = ait_instruction) and
  2030. (taicpu(hp1).opcode = A_MOV) and
  2031. (taicpu(hp1).opsize = S_B) and
  2032. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2033. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  2034. begin
  2035. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  2036. asml.remove(hp1);
  2037. hp1.free;
  2038. end
  2039. end;
  2040. A_SUB:
  2041. { * change "subl $2, %esp; pushw x" to "pushl x"}
  2042. { * change "sub/add const1, reg" or "dec reg" followed by
  2043. "sub const2, reg" to one "sub ..., reg" }
  2044. begin
  2045. if (taicpu(p).oper[0]^.typ = top_const) and
  2046. (taicpu(p).oper[1]^.typ = top_reg) then
  2047. if (taicpu(p).oper[0]^.val = 2) and
  2048. (taicpu(p).oper[1]^.reg = NR_ESP) and
  2049. { Don't do the sub/push optimization if the sub }
  2050. { comes from setting up the stack frame (JM) }
  2051. (not getLastInstruction(p,hp1) or
  2052. (hp1.typ <> ait_instruction) or
  2053. (taicpu(hp1).opcode <> A_MOV) or
  2054. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  2055. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  2056. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  2057. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  2058. begin
  2059. hp1 := tai(p.next);
  2060. while Assigned(hp1) and
  2061. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  2062. not RegReadByInstruction(NR_ESP,hp1) and
  2063. not RegModifiedByInstruction(NR_ESP,hp1) do
  2064. hp1 := tai(hp1.next);
  2065. if Assigned(hp1) and
  2066. (tai(hp1).typ = ait_instruction) and
  2067. (taicpu(hp1).opcode = A_PUSH) and
  2068. (taicpu(hp1).opsize = S_W) then
  2069. begin
  2070. taicpu(hp1).changeopsize(S_L);
  2071. if taicpu(hp1).oper[0]^.typ=top_reg then
  2072. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2073. hp1 := tai(p.next);
  2074. asml.remove(p);
  2075. p.free;
  2076. p := hp1;
  2077. continue
  2078. end;
  2079. if DoSubAddOpt(p) then
  2080. continue;
  2081. end
  2082. else if DoSubAddOpt(p) then
  2083. continue
  2084. end;
  2085. A_VMOVAPS,
  2086. A_VMOVAPD:
  2087. if OptPass1VMOVAP(p) then
  2088. continue;
  2089. end;
  2090. end; { if is_jmp }
  2091. end;
  2092. end;
  2093. updateUsedRegs(UsedRegs,p);
  2094. p:=tai(p.next);
  2095. end;
  2096. end;
  2097. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  2098. {$ifdef DEBUG_AOPTCPU}
  2099. procedure DebugMsg(const s: string;p : tai);
  2100. begin
  2101. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  2102. end;
  2103. {$else DEBUG_AOPTCPU}
  2104. procedure DebugMsg(const s: string;p : tai);inline;
  2105. begin
  2106. end;
  2107. {$endif DEBUG_AOPTCPU}
  2108. function CanBeCMOV(p : tai) : boolean;
  2109. begin
  2110. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  2111. (taicpu(p).opcode=A_MOV) and
  2112. (taicpu(p).opsize in [S_L,S_W]) and
  2113. ((taicpu(p).oper[0]^.typ = top_reg)
  2114. { we can't use cmov ref,reg because
  2115. ref could be nil and cmov still throws an exception
  2116. if ref=nil but the mov isn't done (FK)
  2117. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2118. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2119. }
  2120. ) and
  2121. (taicpu(p).oper[1]^.typ in [top_reg]);
  2122. end;
  2123. var
  2124. p,hp1,hp2,hp3: tai;
  2125. l : longint;
  2126. condition : tasmcond;
  2127. TmpUsedRegs: TAllUsedRegs;
  2128. carryadd_opcode: Tasmop;
  2129. begin
  2130. p := BlockStart;
  2131. ClearUsedRegs;
  2132. while (p <> BlockEnd) Do
  2133. begin
  2134. UpdateUsedRegs(UsedRegs, tai(p.next));
  2135. case p.Typ Of
  2136. Ait_Instruction:
  2137. begin
  2138. if InsContainsSegRef(taicpu(p)) then
  2139. begin
  2140. p := tai(p.next);
  2141. continue;
  2142. end;
  2143. case taicpu(p).opcode Of
  2144. A_Jcc:
  2145. begin
  2146. { jb @@1 cmc
  2147. inc/dec operand --> adc/sbb operand,0
  2148. @@1:
  2149. ... and ...
  2150. jnb @@1
  2151. inc/dec operand --> adc/sbb operand,0
  2152. @@1: }
  2153. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  2154. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2155. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  2156. begin
  2157. carryadd_opcode:=A_NONE;
  2158. if Taicpu(p).condition in [C_NAE,C_B] then
  2159. begin
  2160. if Taicpu(hp1).opcode=A_INC then
  2161. carryadd_opcode:=A_ADC;
  2162. if Taicpu(hp1).opcode=A_DEC then
  2163. carryadd_opcode:=A_SBB;
  2164. if carryadd_opcode<>A_NONE then
  2165. begin
  2166. Taicpu(p).clearop(0);
  2167. Taicpu(p).ops:=0;
  2168. Taicpu(p).is_jmp:=false;
  2169. Taicpu(p).opcode:=A_CMC;
  2170. Taicpu(p).condition:=C_NONE;
  2171. Taicpu(hp1).ops:=2;
  2172. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2173. Taicpu(hp1).loadconst(0,0);
  2174. Taicpu(hp1).opcode:=carryadd_opcode;
  2175. continue;
  2176. end;
  2177. end;
  2178. if Taicpu(p).condition in [C_AE,C_NB] then
  2179. begin
  2180. if Taicpu(hp1).opcode=A_INC then
  2181. carryadd_opcode:=A_ADC;
  2182. if Taicpu(hp1).opcode=A_DEC then
  2183. carryadd_opcode:=A_SBB;
  2184. if carryadd_opcode<>A_NONE then
  2185. begin
  2186. asml.remove(p);
  2187. p.free;
  2188. Taicpu(hp1).ops:=2;
  2189. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2190. Taicpu(hp1).loadconst(0,0);
  2191. Taicpu(hp1).opcode:=carryadd_opcode;
  2192. p:=hp1;
  2193. continue;
  2194. end;
  2195. end;
  2196. end;
  2197. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2198. begin
  2199. { check for
  2200. jCC xxx
  2201. <several movs>
  2202. xxx:
  2203. }
  2204. l:=0;
  2205. GetNextInstruction(p, hp1);
  2206. while assigned(hp1) and
  2207. CanBeCMOV(hp1) and
  2208. { stop on labels }
  2209. not(hp1.typ=ait_label) do
  2210. begin
  2211. inc(l);
  2212. GetNextInstruction(hp1,hp1);
  2213. end;
  2214. if assigned(hp1) then
  2215. begin
  2216. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2217. begin
  2218. if (l<=4) and (l>0) then
  2219. begin
  2220. condition:=inverse_cond(taicpu(p).condition);
  2221. hp2:=p;
  2222. GetNextInstruction(p,hp1);
  2223. p:=hp1;
  2224. repeat
  2225. taicpu(hp1).opcode:=A_CMOVcc;
  2226. taicpu(hp1).condition:=condition;
  2227. GetNextInstruction(hp1,hp1);
  2228. until not(assigned(hp1)) or
  2229. not(CanBeCMOV(hp1));
  2230. { wait with removing else GetNextInstruction could
  2231. ignore the label if it was the only usage in the
  2232. jump moved away }
  2233. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2234. asml.remove(hp2);
  2235. hp2.free;
  2236. continue;
  2237. end;
  2238. end
  2239. else
  2240. begin
  2241. { check further for
  2242. jCC xxx
  2243. <several movs 1>
  2244. jmp yyy
  2245. xxx:
  2246. <several movs 2>
  2247. yyy:
  2248. }
  2249. { hp2 points to jmp yyy }
  2250. hp2:=hp1;
  2251. { skip hp1 to xxx }
  2252. GetNextInstruction(hp1, hp1);
  2253. if assigned(hp2) and
  2254. assigned(hp1) and
  2255. (l<=3) and
  2256. (hp2.typ=ait_instruction) and
  2257. (taicpu(hp2).is_jmp) and
  2258. (taicpu(hp2).condition=C_None) and
  2259. { real label and jump, no further references to the
  2260. label are allowed }
  2261. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2262. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2263. begin
  2264. l:=0;
  2265. { skip hp1 to <several moves 2> }
  2266. GetNextInstruction(hp1, hp1);
  2267. while assigned(hp1) and
  2268. CanBeCMOV(hp1) do
  2269. begin
  2270. inc(l);
  2271. GetNextInstruction(hp1, hp1);
  2272. end;
  2273. { hp1 points to yyy: }
  2274. if assigned(hp1) and
  2275. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2276. begin
  2277. condition:=inverse_cond(taicpu(p).condition);
  2278. GetNextInstruction(p,hp1);
  2279. hp3:=p;
  2280. p:=hp1;
  2281. repeat
  2282. taicpu(hp1).opcode:=A_CMOVcc;
  2283. taicpu(hp1).condition:=condition;
  2284. GetNextInstruction(hp1,hp1);
  2285. until not(assigned(hp1)) or
  2286. not(CanBeCMOV(hp1));
  2287. { hp2 is still at jmp yyy }
  2288. GetNextInstruction(hp2,hp1);
  2289. { hp2 is now at xxx: }
  2290. condition:=inverse_cond(condition);
  2291. GetNextInstruction(hp1,hp1);
  2292. { hp1 is now at <several movs 2> }
  2293. repeat
  2294. taicpu(hp1).opcode:=A_CMOVcc;
  2295. taicpu(hp1).condition:=condition;
  2296. GetNextInstruction(hp1,hp1);
  2297. until not(assigned(hp1)) or
  2298. not(CanBeCMOV(hp1));
  2299. {
  2300. asml.remove(hp1.next)
  2301. hp1.next.free;
  2302. asml.remove(hp1);
  2303. hp1.free;
  2304. }
  2305. { remove jCC }
  2306. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2307. asml.remove(hp3);
  2308. hp3.free;
  2309. { remove jmp }
  2310. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2311. asml.remove(hp2);
  2312. hp2.free;
  2313. continue;
  2314. end;
  2315. end;
  2316. end;
  2317. end;
  2318. end;
  2319. end;
  2320. A_FSTP,A_FISTP:
  2321. if DoFpuLoadStoreOpt(p) then
  2322. continue;
  2323. A_IMUL:
  2324. begin
  2325. if (taicpu(p).ops >= 2) and
  2326. ((taicpu(p).oper[0]^.typ = top_const) or
  2327. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2328. (taicpu(p).oper[1]^.typ = top_reg) and
  2329. ((taicpu(p).ops = 2) or
  2330. ((taicpu(p).oper[2]^.typ = top_reg) and
  2331. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2332. getLastInstruction(p,hp1) and
  2333. (hp1.typ = ait_instruction) and
  2334. (taicpu(hp1).opcode = A_MOV) and
  2335. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2336. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2337. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2338. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  2339. begin
  2340. taicpu(p).ops := 3;
  2341. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2342. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2343. asml.remove(hp1);
  2344. hp1.free;
  2345. end;
  2346. end;
  2347. A_JMP:
  2348. {
  2349. change
  2350. jmp .L1
  2351. ...
  2352. .L1:
  2353. ret
  2354. into
  2355. ret
  2356. }
  2357. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  2358. begin
  2359. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  2360. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  2361. begin
  2362. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  2363. taicpu(p).opcode:=A_RET;
  2364. taicpu(p).is_jmp:=false;
  2365. taicpu(p).ops:=taicpu(hp1).ops;
  2366. case taicpu(hp1).ops of
  2367. 0:
  2368. taicpu(p).clearop(0);
  2369. 1:
  2370. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2371. else
  2372. internalerror(2016041301);
  2373. end;
  2374. continue;
  2375. end;
  2376. end;
  2377. A_MOV:
  2378. begin
  2379. if (taicpu(p).oper[0]^.typ = top_reg) and
  2380. (taicpu(p).oper[1]^.typ = top_reg) and
  2381. GetNextInstruction(p, hp1) and
  2382. (hp1.typ = ait_Instruction) and
  2383. ((taicpu(hp1).opcode = A_MOV) or
  2384. (taicpu(hp1).opcode = A_MOVZX) or
  2385. (taicpu(hp1).opcode = A_MOVSX)) and
  2386. (taicpu(hp1).oper[0]^.typ = top_ref) and
  2387. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2388. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  2389. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  2390. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2391. {mov reg1, reg2
  2392. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2393. begin
  2394. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2395. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2396. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2397. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2398. asml.remove(p);
  2399. p.free;
  2400. p := hp1;
  2401. continue;
  2402. end
  2403. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2404. GetNextInstruction(p,hp1) and
  2405. (hp1.typ = ait_instruction) and
  2406. (IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) or
  2407. ((taicpu(hp1).opcode=A_LEA) and
  2408. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) and
  2409. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2410. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)) or
  2411. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2412. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg))
  2413. )
  2414. )
  2415. ) and
  2416. GetNextInstruction(hp1,hp2) and
  2417. MatchInstruction(hp2,A_MOV,[]) and
  2418. MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  2419. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2420. begin
  2421. CopyUsedRegs(TmpUsedRegs);
  2422. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2423. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2424. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2425. hp2, TmpUsedRegs))) then
  2426. { change mov (ref), reg }
  2427. { add/sub/or/... reg2/$const, reg }
  2428. { mov reg, (ref) }
  2429. { # release reg }
  2430. { to add/sub/or/... reg2/$const, (ref) }
  2431. begin
  2432. case taicpu(hp1).opcode of
  2433. A_INC,A_DEC,A_NOT,A_NEG:
  2434. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2435. A_LEA:
  2436. begin
  2437. taicpu(hp1).opcode:=A_ADD;
  2438. if taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg then
  2439. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2440. else
  2441. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base);
  2442. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2443. DebugMsg('Peephole FoldLea done',hp1);
  2444. end
  2445. else
  2446. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2447. end;
  2448. asml.remove(p);
  2449. asml.remove(hp2);
  2450. p.free;
  2451. hp2.free;
  2452. p := hp1
  2453. end;
  2454. ReleaseUsedRegs(TmpUsedRegs);
  2455. end
  2456. end;
  2457. end;
  2458. end;
  2459. end;
  2460. p := tai(p.next)
  2461. end;
  2462. end;
  2463. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  2464. var
  2465. p,hp1,hp2: tai;
  2466. IsTestConstX: boolean;
  2467. begin
  2468. p := BlockStart;
  2469. ClearUsedRegs;
  2470. while (p <> BlockEnd) Do
  2471. begin
  2472. UpdateUsedRegs(UsedRegs, tai(p.next));
  2473. case p.Typ Of
  2474. Ait_Instruction:
  2475. begin
  2476. if InsContainsSegRef(taicpu(p)) then
  2477. begin
  2478. p := tai(p.next);
  2479. continue;
  2480. end;
  2481. case taicpu(p).opcode Of
  2482. A_CALL:
  2483. begin
  2484. { don't do this on modern CPUs, this really hurts them due to
  2485. broken call/ret pairing }
  2486. if (current_settings.optimizecputype < cpu_Pentium2) and
  2487. not(cs_create_pic in current_settings.moduleswitches) and
  2488. GetNextInstruction(p, hp1) and
  2489. (hp1.typ = ait_instruction) and
  2490. (taicpu(hp1).opcode = A_JMP) and
  2491. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2492. begin
  2493. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2494. InsertLLItem(p.previous, p, hp2);
  2495. taicpu(p).opcode := A_JMP;
  2496. taicpu(p).is_jmp := true;
  2497. asml.remove(hp1);
  2498. hp1.free;
  2499. end
  2500. { replace
  2501. call procname
  2502. ret
  2503. by
  2504. jmp procname
  2505. this should never hurt except when pic is used, not sure
  2506. how to handle it then
  2507. but do it only on level 4 because it destroys stack back traces
  2508. }
  2509. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  2510. not(cs_create_pic in current_settings.moduleswitches) and
  2511. GetNextInstruction(p, hp1) and
  2512. (hp1.typ = ait_instruction) and
  2513. (taicpu(hp1).opcode = A_RET) and
  2514. (taicpu(hp1).ops=0) then
  2515. begin
  2516. taicpu(p).opcode := A_JMP;
  2517. taicpu(p).is_jmp := true;
  2518. asml.remove(hp1);
  2519. hp1.free;
  2520. end;
  2521. end;
  2522. A_CMP:
  2523. begin
  2524. if (taicpu(p).oper[0]^.typ = top_const) and
  2525. (taicpu(p).oper[0]^.val = 0) and
  2526. (taicpu(p).oper[1]^.typ = top_reg) then
  2527. {change "cmp $0, %reg" to "test %reg, %reg"}
  2528. begin
  2529. taicpu(p).opcode := A_TEST;
  2530. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2531. continue;
  2532. end;
  2533. end;
  2534. A_MOV:
  2535. PostPeepholeOptMov(p);
  2536. A_MOVZX:
  2537. { if register vars are on, it's possible there is code like }
  2538. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2539. { so we can't safely replace the movzx then with xor/mov, }
  2540. { since that would change the flags (JM) }
  2541. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2542. begin
  2543. if (taicpu(p).oper[1]^.typ = top_reg) then
  2544. if (taicpu(p).oper[0]^.typ = top_reg)
  2545. then
  2546. case taicpu(p).opsize of
  2547. S_BL:
  2548. begin
  2549. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2550. not(cs_opt_size in current_settings.optimizerswitches) and
  2551. (current_settings.optimizecputype = cpu_Pentium) then
  2552. {Change "movzbl %reg1, %reg2" to
  2553. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2554. PentiumMMX}
  2555. begin
  2556. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2557. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2558. InsertLLItem(p.previous, p, hp1);
  2559. taicpu(p).opcode := A_MOV;
  2560. taicpu(p).changeopsize(S_B);
  2561. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2562. end;
  2563. end;
  2564. end
  2565. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2566. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2567. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2568. not(cs_opt_size in current_settings.optimizerswitches) and
  2569. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  2570. (current_settings.optimizecputype = cpu_Pentium) and
  2571. (taicpu(p).opsize = S_BL) then
  2572. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2573. Pentium and PentiumMMX}
  2574. begin
  2575. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2576. taicpu(p).oper[1]^.reg);
  2577. taicpu(p).opcode := A_MOV;
  2578. taicpu(p).changeopsize(S_B);
  2579. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2580. InsertLLItem(p.previous, p, hp1);
  2581. end;
  2582. end;
  2583. A_TEST, A_OR:
  2584. {removes the line marked with (x) from the sequence
  2585. and/or/xor/add/sub/... $x, %y
  2586. test/or %y, %y | test $-1, %y (x)
  2587. j(n)z _Label
  2588. as the first instruction already adjusts the ZF
  2589. %y operand may also be a reference }
  2590. begin
  2591. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2592. MatchOperand(taicpu(p).oper[0]^,-1);
  2593. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2594. GetLastInstruction(p, hp1) and
  2595. (tai(hp1).typ = ait_instruction) and
  2596. GetNextInstruction(p,hp2) and
  2597. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2598. case taicpu(hp1).opcode Of
  2599. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2600. begin
  2601. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2602. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2603. { and in case of carry for A(E)/B(E)/C/NC }
  2604. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2605. ((taicpu(hp1).opcode <> A_ADD) and
  2606. (taicpu(hp1).opcode <> A_SUB))) then
  2607. begin
  2608. hp1 := tai(p.next);
  2609. asml.remove(p);
  2610. p.free;
  2611. p := tai(hp1);
  2612. continue
  2613. end;
  2614. end;
  2615. A_SHL, A_SAL, A_SHR, A_SAR:
  2616. begin
  2617. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2618. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2619. { therefore, it's only safe to do this optimization for }
  2620. { shifts by a (nonzero) constant }
  2621. (taicpu(hp1).oper[0]^.typ = top_const) and
  2622. (taicpu(hp1).oper[0]^.val <> 0) and
  2623. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2624. { and in case of carry for A(E)/B(E)/C/NC }
  2625. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2626. begin
  2627. hp1 := tai(p.next);
  2628. asml.remove(p);
  2629. p.free;
  2630. p := tai(hp1);
  2631. continue
  2632. end;
  2633. end;
  2634. A_DEC, A_INC, A_NEG:
  2635. begin
  2636. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2637. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2638. { and in case of carry for A(E)/B(E)/C/NC }
  2639. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2640. begin
  2641. case taicpu(hp1).opcode Of
  2642. A_DEC, A_INC:
  2643. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2644. begin
  2645. case taicpu(hp1).opcode Of
  2646. A_DEC: taicpu(hp1).opcode := A_SUB;
  2647. A_INC: taicpu(hp1).opcode := A_ADD;
  2648. end;
  2649. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2650. taicpu(hp1).loadConst(0,1);
  2651. taicpu(hp1).ops:=2;
  2652. end
  2653. end;
  2654. hp1 := tai(p.next);
  2655. asml.remove(p);
  2656. p.free;
  2657. p := tai(hp1);
  2658. continue
  2659. end;
  2660. end
  2661. else
  2662. { change "test $-1,%reg" into "test %reg,%reg" }
  2663. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2664. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2665. end { case }
  2666. else
  2667. { change "test $-1,%reg" into "test %reg,%reg" }
  2668. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2669. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2670. end;
  2671. end;
  2672. end;
  2673. end;
  2674. p := tai(p.next)
  2675. end;
  2676. end;
  2677. Procedure TCpuAsmOptimizer.Optimize;
  2678. Var
  2679. HP: Tai;
  2680. pass: longint;
  2681. slowopt, changed, lastLoop: boolean;
  2682. Begin
  2683. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2684. pass := 0;
  2685. changed := false;
  2686. repeat
  2687. lastLoop :=
  2688. not(slowopt) or
  2689. (not changed and (pass > 2)) or
  2690. { prevent endless loops }
  2691. (pass = 4);
  2692. changed := false;
  2693. { Setup labeltable, always necessary }
  2694. blockstart := tai(asml.first);
  2695. pass_1;
  2696. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2697. { or nil }
  2698. While Assigned(BlockStart) Do
  2699. Begin
  2700. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2701. begin
  2702. if (pass = 0) then
  2703. PrePeepHoleOpts;
  2704. { Peephole optimizations }
  2705. PeepHoleOptPass1;
  2706. { Only perform them twice in the first pass }
  2707. if pass = 0 then
  2708. PeepHoleOptPass1;
  2709. end;
  2710. { More peephole optimizations }
  2711. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2712. begin
  2713. PeepHoleOptPass2;
  2714. if lastLoop then
  2715. PostPeepHoleOpts;
  2716. end;
  2717. { Continue where we left off, BlockEnd is either the start of an }
  2718. { assembler block or nil }
  2719. BlockStart := BlockEnd;
  2720. While Assigned(BlockStart) And
  2721. (BlockStart.typ = ait_Marker) And
  2722. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2723. Begin
  2724. { We stopped at an assembler block, so skip it }
  2725. Repeat
  2726. BlockStart := Tai(BlockStart.Next);
  2727. Until (BlockStart.Typ = Ait_Marker) And
  2728. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2729. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2730. If GetNextInstruction(BlockStart, HP) And
  2731. ((HP.typ <> ait_Marker) Or
  2732. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2733. { There is no assembler block anymore after the current one, so }
  2734. { optimize the next block of "normal" instructions }
  2735. pass_1
  2736. { Otherwise, skip the next assembler block }
  2737. else
  2738. blockStart := hp;
  2739. End;
  2740. End;
  2741. inc(pass);
  2742. until lastLoop;
  2743. dfa.free;
  2744. End;
  2745. begin
  2746. casmoptimizer:=TCpuAsmOptimizer;
  2747. end.