aoptx86.pas 119 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function DoSubAddOpt(var p : tai) : Boolean;
  46. function PrePeepholeOptSxx(var p : tai) : boolean;
  47. function OptPass1AND(var p : tai) : boolean;
  48. function OptPass1VMOVAP(var p : tai) : boolean;
  49. function OptPass1VOP(const p : tai) : boolean;
  50. function OptPass1MOV(var p : tai) : boolean;
  51. function OptPass1Movx(var p : tai) : boolean;
  52. function OptPass1MOVAP(var p : tai) : boolean;
  53. function OptPass1MOVXX(var p : tai) : boolean;
  54. function OptPass1OP(const p : tai) : boolean;
  55. function OptPass1LEA(var p : tai) : boolean;
  56. function OptPass1Sub(var p : tai) : boolean;
  57. function OptPass2MOV(var p : tai) : boolean;
  58. function OptPass2Imul(var p : tai) : boolean;
  59. function OptPass2Jmp(var p : tai) : boolean;
  60. function OptPass2Jcc(var p : tai) : boolean;
  61. function PostPeepholeOptMov(const p : tai) : Boolean;
  62. function PostPeepholeOptCmp(var p : tai) : Boolean;
  63. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  64. procedure OptReferences;
  65. end;
  66. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  67. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  68. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  69. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  70. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  71. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  72. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  73. function RefsEqual(const r1, r2: treference): boolean;
  74. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  75. { returns true, if ref is a reference using only the registers passed as base and index
  76. and having an offset }
  77. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  78. implementation
  79. uses
  80. cutils,verbose,
  81. globals,
  82. cpuinfo,
  83. procinfo,
  84. aasmbase,
  85. aoptutils,
  86. symconst,symsym,
  87. cgx86,
  88. itcpugas;
  89. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  90. begin
  91. result :=
  92. (instr.typ = ait_instruction) and
  93. (taicpu(instr).opcode = op) and
  94. ((opsize = []) or (taicpu(instr).opsize in opsize));
  95. end;
  96. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  97. begin
  98. result :=
  99. (instr.typ = ait_instruction) and
  100. ((taicpu(instr).opcode = op1) or
  101. (taicpu(instr).opcode = op2)
  102. ) and
  103. ((opsize = []) or (taicpu(instr).opsize in opsize));
  104. end;
  105. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  106. begin
  107. result :=
  108. (instr.typ = ait_instruction) and
  109. ((taicpu(instr).opcode = op1) or
  110. (taicpu(instr).opcode = op2) or
  111. (taicpu(instr).opcode = op3)
  112. ) and
  113. ((opsize = []) or (taicpu(instr).opsize in opsize));
  114. end;
  115. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  116. const opsize : topsizes) : boolean;
  117. var
  118. op : TAsmOp;
  119. begin
  120. result:=false;
  121. for op in ops do
  122. begin
  123. if (instr.typ = ait_instruction) and
  124. (taicpu(instr).opcode = op) and
  125. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  126. begin
  127. result:=true;
  128. exit;
  129. end;
  130. end;
  131. end;
  132. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  133. begin
  134. result := (oper.typ = top_reg) and (oper.reg = reg);
  135. end;
  136. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  137. begin
  138. result := (oper.typ = top_const) and (oper.val = a);
  139. end;
  140. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  141. begin
  142. result := oper1.typ = oper2.typ;
  143. if result then
  144. case oper1.typ of
  145. top_const:
  146. Result:=oper1.val = oper2.val;
  147. top_reg:
  148. Result:=oper1.reg = oper2.reg;
  149. top_ref:
  150. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  151. else
  152. internalerror(2013102801);
  153. end
  154. end;
  155. function RefsEqual(const r1, r2: treference): boolean;
  156. begin
  157. RefsEqual :=
  158. (r1.offset = r2.offset) and
  159. (r1.segment = r2.segment) and (r1.base = r2.base) and
  160. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  161. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  162. (r1.relsymbol = r2.relsymbol);
  163. end;
  164. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  165. begin
  166. Result:=(ref.offset=0) and
  167. (ref.scalefactor in [0,1]) and
  168. (ref.segment=NR_NO) and
  169. (ref.symbol=nil) and
  170. (ref.relsymbol=nil) and
  171. ((base=NR_INVALID) or
  172. (ref.base=base)) and
  173. ((index=NR_INVALID) or
  174. (ref.index=index));
  175. end;
  176. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  177. begin
  178. Result:=(ref.scalefactor in [0,1]) and
  179. (ref.segment=NR_NO) and
  180. (ref.symbol=nil) and
  181. (ref.relsymbol=nil) and
  182. ((base=NR_INVALID) or
  183. (ref.base=base)) and
  184. ((index=NR_INVALID) or
  185. (ref.index=index));
  186. end;
  187. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  188. begin
  189. Result:=RegReadByInstruction(reg,hp);
  190. end;
  191. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  192. var
  193. p: taicpu;
  194. opcount: longint;
  195. begin
  196. RegReadByInstruction := false;
  197. if hp.typ <> ait_instruction then
  198. exit;
  199. p := taicpu(hp);
  200. case p.opcode of
  201. A_CALL:
  202. regreadbyinstruction := true;
  203. A_IMUL:
  204. case p.ops of
  205. 1:
  206. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  207. (
  208. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  209. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  210. );
  211. 2,3:
  212. regReadByInstruction :=
  213. reginop(reg,p.oper[0]^) or
  214. reginop(reg,p.oper[1]^);
  215. end;
  216. A_MUL:
  217. begin
  218. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  219. (
  220. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  221. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  222. );
  223. end;
  224. A_IDIV,A_DIV:
  225. begin
  226. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  227. (
  228. (getregtype(reg)=R_INTREGISTER) and
  229. (
  230. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  231. )
  232. );
  233. end;
  234. else
  235. begin
  236. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  237. begin
  238. RegReadByInstruction := false;
  239. exit;
  240. end;
  241. for opcount := 0 to p.ops-1 do
  242. if (p.oper[opCount]^.typ = top_ref) and
  243. RegInRef(reg,p.oper[opcount]^.ref^) then
  244. begin
  245. RegReadByInstruction := true;
  246. exit
  247. end;
  248. { special handling for SSE MOVSD }
  249. if (p.opcode=A_MOVSD) and (p.ops>0) then
  250. begin
  251. if p.ops<>2 then
  252. internalerror(2017042702);
  253. regReadByInstruction := reginop(reg,p.oper[0]^) or
  254. (
  255. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  256. );
  257. exit;
  258. end;
  259. with insprop[p.opcode] do
  260. begin
  261. if getregtype(reg)=R_INTREGISTER then
  262. begin
  263. case getsupreg(reg) of
  264. RS_EAX:
  265. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  266. begin
  267. RegReadByInstruction := true;
  268. exit
  269. end;
  270. RS_ECX:
  271. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  272. begin
  273. RegReadByInstruction := true;
  274. exit
  275. end;
  276. RS_EDX:
  277. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  278. begin
  279. RegReadByInstruction := true;
  280. exit
  281. end;
  282. RS_EBX:
  283. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  284. begin
  285. RegReadByInstruction := true;
  286. exit
  287. end;
  288. RS_ESP:
  289. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  290. begin
  291. RegReadByInstruction := true;
  292. exit
  293. end;
  294. RS_EBP:
  295. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  296. begin
  297. RegReadByInstruction := true;
  298. exit
  299. end;
  300. RS_ESI:
  301. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  302. begin
  303. RegReadByInstruction := true;
  304. exit
  305. end;
  306. RS_EDI:
  307. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  308. begin
  309. RegReadByInstruction := true;
  310. exit
  311. end;
  312. end;
  313. end;
  314. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  315. begin
  316. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  317. begin
  318. case p.condition of
  319. C_A,C_NBE, { CF=0 and ZF=0 }
  320. C_BE,C_NA: { CF=1 or ZF=1 }
  321. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  322. C_AE,C_NB,C_NC, { CF=0 }
  323. C_B,C_NAE,C_C: { CF=1 }
  324. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  325. C_NE,C_NZ, { ZF=0 }
  326. C_E,C_Z: { ZF=1 }
  327. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  328. C_G,C_NLE, { ZF=0 and SF=OF }
  329. C_LE,C_NG: { ZF=1 or SF<>OF }
  330. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  331. C_GE,C_NL, { SF=OF }
  332. C_L,C_NGE: { SF<>OF }
  333. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  334. C_NO, { OF=0 }
  335. C_O: { OF=1 }
  336. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  337. C_NP,C_PO, { PF=0 }
  338. C_P,C_PE: { PF=1 }
  339. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  340. C_NS, { SF=0 }
  341. C_S: { SF=1 }
  342. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  343. else
  344. internalerror(2017042701);
  345. end;
  346. if RegReadByInstruction then
  347. exit;
  348. end;
  349. case getsubreg(reg) of
  350. R_SUBW,R_SUBD,R_SUBQ:
  351. RegReadByInstruction :=
  352. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  353. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  354. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  355. R_SUBFLAGCARRY:
  356. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGPARITY:
  358. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGAUXILIARY:
  360. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. R_SUBFLAGZERO:
  362. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  363. R_SUBFLAGSIGN:
  364. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  365. R_SUBFLAGOVERFLOW:
  366. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  367. R_SUBFLAGINTERRUPT:
  368. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  369. R_SUBFLAGDIRECTION:
  370. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  371. else
  372. internalerror(2017042601);
  373. end;
  374. exit;
  375. end;
  376. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  377. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  378. (p.oper[0]^.reg=p.oper[1]^.reg) then
  379. exit;
  380. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  381. begin
  382. RegReadByInstruction := true;
  383. exit
  384. end;
  385. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  386. begin
  387. RegReadByInstruction := true;
  388. exit
  389. end;
  390. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  391. begin
  392. RegReadByInstruction := true;
  393. exit
  394. end;
  395. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  396. begin
  397. RegReadByInstruction := true;
  398. exit
  399. end;
  400. end;
  401. end;
  402. end;
  403. end;
  404. {$ifdef DEBUG_AOPTCPU}
  405. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  406. begin
  407. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  408. end;
  409. {$else DEBUG_AOPTCPU}
  410. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  411. begin
  412. end;
  413. {$endif DEBUG_AOPTCPU}
  414. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  415. begin
  416. if not SuperRegistersEqual(reg1,reg2) then
  417. exit(false);
  418. if getregtype(reg1)<>R_INTREGISTER then
  419. exit(true); {because SuperRegisterEqual is true}
  420. case getsubreg(reg1) of
  421. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  422. higher, it preserves the high bits, so the new value depends on
  423. reg2's previous value. In other words, it is equivalent to doing:
  424. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  425. R_SUBL:
  426. exit(getsubreg(reg2)=R_SUBL);
  427. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  428. higher, it actually does a:
  429. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  430. R_SUBH:
  431. exit(getsubreg(reg2)=R_SUBH);
  432. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  433. bits of reg2:
  434. reg2 := (reg2 and $ffff0000) or word(reg1); }
  435. R_SUBW:
  436. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  437. { a write to R_SUBD always overwrites every other subregister,
  438. because it clears the high 32 bits of R_SUBQ on x86_64 }
  439. R_SUBD,
  440. R_SUBQ:
  441. exit(true);
  442. else
  443. internalerror(2017042801);
  444. end;
  445. end;
  446. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  447. begin
  448. if not SuperRegistersEqual(reg1,reg2) then
  449. exit(false);
  450. if getregtype(reg1)<>R_INTREGISTER then
  451. exit(true); {because SuperRegisterEqual is true}
  452. case getsubreg(reg1) of
  453. R_SUBL:
  454. exit(getsubreg(reg2)<>R_SUBH);
  455. R_SUBH:
  456. exit(getsubreg(reg2)<>R_SUBL);
  457. R_SUBW,
  458. R_SUBD,
  459. R_SUBQ:
  460. exit(true);
  461. else
  462. internalerror(2017042802);
  463. end;
  464. end;
  465. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  466. var
  467. hp1 : tai;
  468. l : TCGInt;
  469. begin
  470. result:=false;
  471. { changes the code sequence
  472. shr/sar const1, x
  473. shl const2, x
  474. to
  475. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  476. if GetNextInstruction(p, hp1) and
  477. MatchInstruction(hp1,A_SHL,[]) and
  478. (taicpu(p).oper[0]^.typ = top_const) and
  479. (taicpu(hp1).oper[0]^.typ = top_const) and
  480. (taicpu(hp1).opsize = taicpu(p).opsize) and
  481. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  482. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  483. begin
  484. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  485. not(cs_opt_size in current_settings.optimizerswitches) then
  486. begin
  487. { shr/sar const1, %reg
  488. shl const2, %reg
  489. with const1 > const2 }
  490. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  491. taicpu(hp1).opcode := A_AND;
  492. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  493. case taicpu(p).opsize Of
  494. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  495. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  496. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  497. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  498. else
  499. Internalerror(2017050703)
  500. end;
  501. end
  502. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  503. not(cs_opt_size in current_settings.optimizerswitches) then
  504. begin
  505. { shr/sar const1, %reg
  506. shl const2, %reg
  507. with const1 < const2 }
  508. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  509. taicpu(p).opcode := A_AND;
  510. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  511. case taicpu(p).opsize Of
  512. S_B: taicpu(p).loadConst(0,l Xor $ff);
  513. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  514. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  515. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  516. else
  517. Internalerror(2017050702)
  518. end;
  519. end
  520. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  521. begin
  522. { shr/sar const1, %reg
  523. shl const2, %reg
  524. with const1 = const2 }
  525. taicpu(p).opcode := A_AND;
  526. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  527. case taicpu(p).opsize Of
  528. S_B: taicpu(p).loadConst(0,l Xor $ff);
  529. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  530. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  531. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  532. else
  533. Internalerror(2017050701)
  534. end;
  535. asml.remove(hp1);
  536. hp1.free;
  537. end;
  538. end;
  539. end;
  540. { allocates register reg between (and including) instructions p1 and p2
  541. the type of p1 and p2 must not be in SkipInstr
  542. note that this routine is both called from the peephole optimizer
  543. where optinfo is not yet initialised) and from the cse (where it is) }
  544. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  545. var
  546. hp, start: tai;
  547. removedsomething,
  548. firstRemovedWasAlloc,
  549. lastRemovedWasDealloc: boolean;
  550. begin
  551. {$ifdef EXTDEBUG}
  552. { if assigned(p1.optinfo) and
  553. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  554. internalerror(2004101010); }
  555. {$endif EXTDEBUG}
  556. start := p1;
  557. if (reg = NR_ESP) or
  558. (reg = current_procinfo.framepointer) or
  559. not(assigned(p1)) then
  560. { this happens with registers which are loaded implicitely, outside the }
  561. { current block (e.g. esi with self) }
  562. exit;
  563. { make sure we allocate it for this instruction }
  564. getnextinstruction(p2,p2);
  565. lastRemovedWasDealloc := false;
  566. removedSomething := false;
  567. firstRemovedWasAlloc := false;
  568. {$ifdef allocregdebug}
  569. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  570. ' from here...'));
  571. insertllitem(asml,p1.previous,p1,hp);
  572. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  573. ' till here...'));
  574. insertllitem(asml,p2,p2.next,hp);
  575. {$endif allocregdebug}
  576. { do it the safe way: always allocate the full super register,
  577. as we do no register re-allocation in the peephole optimizer,
  578. this does not hurt
  579. }
  580. case getregtype(reg) of
  581. R_MMREGISTER:
  582. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  583. R_INTREGISTER:
  584. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  585. end;
  586. if not(RegInUsedRegs(reg,initialusedregs)) then
  587. begin
  588. hp := tai_regalloc.alloc(reg,nil);
  589. insertllItem(p1.previous,p1,hp);
  590. IncludeRegInUsedRegs(reg,initialusedregs);
  591. end;
  592. while assigned(p1) and
  593. (p1 <> p2) do
  594. begin
  595. if assigned(p1.optinfo) then
  596. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  597. p1 := tai(p1.next);
  598. repeat
  599. while assigned(p1) and
  600. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  601. p1 := tai(p1.next);
  602. { remove all allocation/deallocation info about the register in between }
  603. if assigned(p1) and
  604. (p1.typ = ait_regalloc) then
  605. begin
  606. { same super register, different sub register? }
  607. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  608. begin
  609. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  610. internalerror(2016101501);
  611. tai_regalloc(p1).reg:=reg;
  612. end;
  613. if tai_regalloc(p1).reg=reg then
  614. begin
  615. if not removedSomething then
  616. begin
  617. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  618. removedSomething := true;
  619. end;
  620. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  621. hp := tai(p1.Next);
  622. asml.Remove(p1);
  623. p1.free;
  624. p1 := hp;
  625. end
  626. else
  627. p1 := tai(p1.next);
  628. end;
  629. until not(assigned(p1)) or
  630. not(p1.typ in SkipInstr);
  631. end;
  632. if assigned(p1) then
  633. begin
  634. if firstRemovedWasAlloc then
  635. begin
  636. hp := tai_regalloc.Alloc(reg,nil);
  637. insertLLItem(start.previous,start,hp);
  638. end;
  639. if lastRemovedWasDealloc then
  640. begin
  641. hp := tai_regalloc.DeAlloc(reg,nil);
  642. insertLLItem(p1.previous,p1,hp);
  643. end;
  644. end;
  645. end;
  646. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  647. var
  648. p: taicpu;
  649. begin
  650. if not assigned(hp) or
  651. (hp.typ <> ait_instruction) then
  652. begin
  653. Result := false;
  654. exit;
  655. end;
  656. p := taicpu(hp);
  657. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  658. with insprop[p.opcode] do
  659. begin
  660. case getsubreg(reg) of
  661. R_SUBW,R_SUBD,R_SUBQ:
  662. Result:=
  663. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  664. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  665. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  666. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  667. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  668. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  669. R_SUBFLAGCARRY:
  670. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  671. R_SUBFLAGPARITY:
  672. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  673. R_SUBFLAGAUXILIARY:
  674. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  675. R_SUBFLAGZERO:
  676. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  677. R_SUBFLAGSIGN:
  678. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  679. R_SUBFLAGOVERFLOW:
  680. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  681. R_SUBFLAGINTERRUPT:
  682. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  683. R_SUBFLAGDIRECTION:
  684. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  685. else
  686. internalerror(2017050501);
  687. end;
  688. exit;
  689. end;
  690. Result :=
  691. (((p.opcode = A_MOV) or
  692. (p.opcode = A_MOVZX) or
  693. (p.opcode = A_MOVSX) or
  694. (p.opcode = A_LEA) or
  695. (p.opcode = A_VMOVSS) or
  696. (p.opcode = A_VMOVSD) or
  697. (p.opcode = A_VMOVAPD) or
  698. (p.opcode = A_VMOVAPS) or
  699. (p.opcode = A_VMOVQ) or
  700. (p.opcode = A_MOVSS) or
  701. (p.opcode = A_MOVSD) or
  702. (p.opcode = A_MOVQ) or
  703. (p.opcode = A_MOVAPD) or
  704. (p.opcode = A_MOVAPS) or
  705. {$ifndef x86_64}
  706. (p.opcode = A_LDS) or
  707. (p.opcode = A_LES) or
  708. {$endif not x86_64}
  709. (p.opcode = A_LFS) or
  710. (p.opcode = A_LGS) or
  711. (p.opcode = A_LSS)) and
  712. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  713. (p.oper[1]^.typ = top_reg) and
  714. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  715. ((p.oper[0]^.typ = top_const) or
  716. ((p.oper[0]^.typ = top_reg) and
  717. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  718. ((p.oper[0]^.typ = top_ref) and
  719. not RegInRef(reg,p.oper[0]^.ref^)))) or
  720. ((p.opcode = A_POP) and
  721. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  722. ((p.opcode = A_IMUL) and
  723. (p.ops=3) and
  724. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  725. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  726. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  727. ((((p.opcode = A_IMUL) or
  728. (p.opcode = A_MUL)) and
  729. (p.ops=1)) and
  730. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  731. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  732. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  733. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  734. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  735. {$ifdef x86_64}
  736. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  737. {$endif x86_64}
  738. )) or
  739. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  740. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  741. {$ifdef x86_64}
  742. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  743. {$endif x86_64}
  744. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  745. {$ifndef x86_64}
  746. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  747. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  748. {$endif not x86_64}
  749. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  750. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  751. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  752. {$ifndef x86_64}
  753. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  754. {$endif not x86_64}
  755. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  756. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  757. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  758. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  759. {$ifdef x86_64}
  760. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  761. {$endif x86_64}
  762. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  763. (((p.opcode = A_FSTSW) or
  764. (p.opcode = A_FNSTSW)) and
  765. (p.oper[0]^.typ=top_reg) and
  766. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  767. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  768. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  769. (p.oper[0]^.reg=p.oper[1]^.reg) and
  770. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  771. end;
  772. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  773. var
  774. hp2,hp3 : tai;
  775. begin
  776. { some x86-64 issue a NOP before the real exit code }
  777. if MatchInstruction(p,A_NOP,[]) then
  778. GetNextInstruction(p,p);
  779. result:=assigned(p) and (p.typ=ait_instruction) and
  780. ((taicpu(p).opcode = A_RET) or
  781. ((taicpu(p).opcode=A_LEAVE) and
  782. GetNextInstruction(p,hp2) and
  783. MatchInstruction(hp2,A_RET,[S_NO])
  784. ) or
  785. ((((taicpu(p).opcode=A_MOV) and
  786. MatchOpType(taicpu(p),top_reg,top_reg) and
  787. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  788. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  789. ((taicpu(p).opcode=A_LEA) and
  790. MatchOpType(taicpu(p),top_ref,top_reg) and
  791. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  792. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  793. )
  794. ) and
  795. GetNextInstruction(p,hp2) and
  796. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  797. MatchOpType(taicpu(hp2),top_reg) and
  798. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  799. GetNextInstruction(hp2,hp3) and
  800. MatchInstruction(hp3,A_RET,[S_NO])
  801. )
  802. );
  803. end;
  804. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  805. begin
  806. isFoldableArithOp := False;
  807. case hp1.opcode of
  808. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  809. isFoldableArithOp :=
  810. ((taicpu(hp1).oper[0]^.typ = top_const) or
  811. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  812. (taicpu(hp1).oper[0]^.reg <> reg))) and
  813. (taicpu(hp1).oper[1]^.typ = top_reg) and
  814. (taicpu(hp1).oper[1]^.reg = reg);
  815. A_INC,A_DEC,A_NEG,A_NOT:
  816. isFoldableArithOp :=
  817. (taicpu(hp1).oper[0]^.typ = top_reg) and
  818. (taicpu(hp1).oper[0]^.reg = reg);
  819. end;
  820. end;
  821. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  822. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  823. var
  824. hp2: tai;
  825. begin
  826. hp2 := p;
  827. repeat
  828. hp2 := tai(hp2.previous);
  829. if assigned(hp2) and
  830. (hp2.typ = ait_regalloc) and
  831. (tai_regalloc(hp2).ratype=ra_dealloc) and
  832. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  833. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  834. begin
  835. asml.remove(hp2);
  836. hp2.free;
  837. break;
  838. end;
  839. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  840. end;
  841. begin
  842. case current_procinfo.procdef.returndef.typ of
  843. arraydef,recorddef,pointerdef,
  844. stringdef,enumdef,procdef,objectdef,errordef,
  845. filedef,setdef,procvardef,
  846. classrefdef,forwarddef:
  847. DoRemoveLastDeallocForFuncRes(RS_EAX);
  848. orddef:
  849. if current_procinfo.procdef.returndef.size <> 0 then
  850. begin
  851. DoRemoveLastDeallocForFuncRes(RS_EAX);
  852. { for int64/qword }
  853. if current_procinfo.procdef.returndef.size = 8 then
  854. DoRemoveLastDeallocForFuncRes(RS_EDX);
  855. end;
  856. end;
  857. end;
  858. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  859. var
  860. TmpUsedRegs : TAllUsedRegs;
  861. hp1,hp2 : tai;
  862. alloc ,dealloc: tai_regalloc;
  863. begin
  864. result:=false;
  865. if MatchOpType(taicpu(p),top_reg,top_reg) and
  866. GetNextInstruction(p, hp1) and
  867. (hp1.typ = ait_instruction) and
  868. GetNextInstruction(hp1, hp2) and
  869. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  870. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  871. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  872. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  873. (((taicpu(p).opcode=A_MOVAPS) and
  874. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  875. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  876. ((taicpu(p).opcode=A_MOVAPD) and
  877. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  878. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  879. ) then
  880. { change
  881. movapX reg,reg2
  882. addsX/subsX/... reg3, reg2
  883. movapX reg2,reg
  884. to
  885. addsX/subsX/... reg3,reg
  886. }
  887. begin
  888. CopyUsedRegs(TmpUsedRegs);
  889. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  890. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  891. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  892. begin
  893. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  894. std_op2str[taicpu(p).opcode]+' '+
  895. std_op2str[taicpu(hp1).opcode]+' '+
  896. std_op2str[taicpu(hp2).opcode]+') done',p);
  897. { we cannot eliminate the first move if
  898. the operations uses the same register for source and dest }
  899. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  900. begin
  901. asml.remove(p);
  902. p.Free;
  903. end;
  904. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  905. asml.remove(hp2);
  906. hp2.Free;
  907. p:=hp1;
  908. result:=true;
  909. end;
  910. ReleaseUsedRegs(TmpUsedRegs);
  911. end
  912. end;
  913. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  914. var
  915. TmpUsedRegs : TAllUsedRegs;
  916. hp1,hp2 : tai;
  917. begin
  918. result:=false;
  919. if MatchOpType(taicpu(p),top_reg,top_reg) then
  920. begin
  921. { vmova* reg1,reg1
  922. =>
  923. <nop> }
  924. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  925. begin
  926. GetNextInstruction(p,hp1);
  927. asml.Remove(p);
  928. p.Free;
  929. p:=hp1;
  930. result:=true;
  931. end
  932. else if GetNextInstruction(p,hp1) then
  933. begin
  934. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  935. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  936. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  937. begin
  938. { vmova* reg1,reg2
  939. vmova* reg2,reg3
  940. dealloc reg2
  941. =>
  942. vmova* reg1,reg3 }
  943. CopyUsedRegs(TmpUsedRegs);
  944. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  945. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  946. begin
  947. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  948. asml.Remove(hp1);
  949. hp1.Free;
  950. result:=true;
  951. end
  952. { special case:
  953. vmova* reg1,reg2
  954. vmova* reg2,reg1
  955. =>
  956. vmova* reg1,reg2 }
  957. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  958. begin
  959. asml.Remove(hp1);
  960. hp1.Free;
  961. result:=true;
  962. end
  963. end
  964. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  965. { we mix single and double opperations here because we assume that the compiler
  966. generates vmovapd only after double operations and vmovaps only after single operations }
  967. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  968. GetNextInstruction(hp1,hp2) and
  969. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  970. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  971. begin
  972. CopyUsedRegs(TmpUsedRegs);
  973. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  974. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  975. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  976. then
  977. begin
  978. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  979. asml.Remove(p);
  980. p.Free;
  981. asml.Remove(hp2);
  982. hp2.Free;
  983. p:=hp1;
  984. end;
  985. end;
  986. end;
  987. end;
  988. end;
  989. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  990. var
  991. TmpUsedRegs : TAllUsedRegs;
  992. hp1 : tai;
  993. begin
  994. result:=false;
  995. { replace
  996. V<Op>X %mreg1,%mreg2,%mreg3
  997. VMovX %mreg3,%mreg4
  998. dealloc %mreg3
  999. by
  1000. V<Op>X %mreg1,%mreg2,%mreg4
  1001. ?
  1002. }
  1003. if GetNextInstruction(p,hp1) and
  1004. { we mix single and double operations here because we assume that the compiler
  1005. generates vmovapd only after double operations and vmovaps only after single operations }
  1006. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1007. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1008. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1009. begin
  1010. CopyUsedRegs(TmpUsedRegs);
  1011. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1012. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1013. ) then
  1014. begin
  1015. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1016. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1017. asml.Remove(hp1);
  1018. hp1.Free;
  1019. result:=true;
  1020. end;
  1021. end;
  1022. end;
  1023. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1024. var
  1025. hp1, hp2: tai;
  1026. TmpUsedRegs : TAllUsedRegs;
  1027. GetNextInstruction_p : Boolean;
  1028. begin
  1029. Result:=false;
  1030. { remove mov reg1,reg1? }
  1031. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1032. begin
  1033. GetNextInstruction(p, hp1);
  1034. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1035. asml.remove(p);
  1036. p.free;
  1037. p:=hp1;
  1038. Result:=true;
  1039. exit;
  1040. end;
  1041. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1042. if GetNextInstruction_p and
  1043. MatchInstruction(hp1,A_AND,[]) and
  1044. (taicpu(p).oper[1]^.typ = top_reg) and
  1045. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1046. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1047. case taicpu(p).opsize Of
  1048. S_L:
  1049. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1050. begin
  1051. { Optimize out:
  1052. mov x, %reg
  1053. and ffffffffh, %reg
  1054. }
  1055. DebugMsg('PeepHole Optimization MovAnd2Mov 1 done',p);
  1056. asml.remove(hp1);
  1057. hp1.free;
  1058. Result:=true;
  1059. exit;
  1060. end;
  1061. S_Q: { TODO: Confirm if this is even possible }
  1062. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1063. begin
  1064. { Optimize out:
  1065. mov x, %reg
  1066. and ffffffffffffffffh, %reg
  1067. }
  1068. DebugMsg('PeepHole Optimization MovAnd2Mov 2 done',p);
  1069. asml.remove(hp1);
  1070. hp1.free;
  1071. Result:=true;
  1072. exit;
  1073. end;
  1074. end
  1075. else if GetNextInstruction_p and
  1076. MatchInstruction(hp1,A_MOV,[]) and
  1077. (taicpu(p).oper[1]^.typ = top_reg) and
  1078. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1079. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1080. begin
  1081. CopyUsedRegs(TmpUsedRegs);
  1082. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1083. { we have
  1084. mov x, %treg
  1085. mov %treg, y
  1086. }
  1087. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1088. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1089. { we've got
  1090. mov x, %treg
  1091. mov %treg, y
  1092. with %treg is not used after }
  1093. case taicpu(p).oper[0]^.typ Of
  1094. top_reg:
  1095. begin
  1096. { change
  1097. mov %reg, %treg
  1098. mov %treg, y
  1099. to
  1100. mov %reg, y
  1101. }
  1102. if taicpu(hp1).oper[1]^.typ=top_reg then
  1103. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1104. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1105. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1106. asml.remove(hp1);
  1107. hp1.free;
  1108. ReleaseUsedRegs(TmpUsedRegs);
  1109. Result:=true;
  1110. Exit;
  1111. end;
  1112. top_ref:
  1113. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1114. begin
  1115. { change
  1116. mov mem, %treg
  1117. mov %treg, %reg
  1118. to
  1119. mov mem, %reg"
  1120. }
  1121. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1122. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1123. asml.remove(hp1);
  1124. hp1.free;
  1125. ReleaseUsedRegs(TmpUsedRegs);
  1126. Result:=true;
  1127. Exit;
  1128. end;
  1129. end;
  1130. ReleaseUsedRegs(TmpUsedRegs);
  1131. end
  1132. else
  1133. { Change
  1134. mov %reg1, %reg2
  1135. xxx %reg2, ???
  1136. to
  1137. mov %reg1, %reg2
  1138. xxx %reg1, ???
  1139. to avoid a write/read penalty
  1140. }
  1141. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1142. GetNextInstruction(p,hp1) and
  1143. (tai(hp1).typ = ait_instruction) and
  1144. (taicpu(hp1).ops >= 1) and
  1145. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1146. { we have
  1147. mov %reg1, %reg2
  1148. XXX %reg2, ???
  1149. }
  1150. begin
  1151. if ((taicpu(hp1).opcode = A_OR) or
  1152. (taicpu(hp1).opcode = A_AND) or
  1153. (taicpu(hp1).opcode = A_TEST)) and
  1154. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1155. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1156. { we have
  1157. mov %reg1, %reg2
  1158. test/or/and %reg2, %reg2
  1159. }
  1160. begin
  1161. CopyUsedRegs(TmpUsedRegs);
  1162. { reg1 will be used after the first instruction,
  1163. so update the allocation info }
  1164. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1165. if GetNextInstruction(hp1, hp2) and
  1166. (hp2.typ = ait_instruction) and
  1167. taicpu(hp2).is_jmp and
  1168. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1169. { change
  1170. mov %reg1, %reg2
  1171. test/or/and %reg2, %reg2
  1172. jxx
  1173. to
  1174. test %reg1, %reg1
  1175. jxx
  1176. }
  1177. begin
  1178. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1179. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1180. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1181. asml.remove(p);
  1182. p.free;
  1183. p := hp1;
  1184. ReleaseUsedRegs(TmpUsedRegs);
  1185. Exit;
  1186. end
  1187. else
  1188. { change
  1189. mov %reg1, %reg2
  1190. test/or/and %reg2, %reg2
  1191. to
  1192. mov %reg1, %reg2
  1193. test/or/and %reg1, %reg1
  1194. }
  1195. begin
  1196. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1197. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1198. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1199. end;
  1200. ReleaseUsedRegs(TmpUsedRegs);
  1201. end
  1202. end
  1203. else
  1204. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1205. x >= RetOffset) as it doesn't do anything (it writes either to a
  1206. parameter or to the temporary storage room for the function
  1207. result)
  1208. }
  1209. if GetNextInstruction_p and
  1210. (tai(hp1).typ = ait_instruction) then
  1211. begin
  1212. if IsExitCode(hp1) and
  1213. MatchOpType(taicpu(p),top_reg,top_ref) and
  1214. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1215. not(assigned(current_procinfo.procdef.funcretsym) and
  1216. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1217. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1218. begin
  1219. asml.remove(p);
  1220. p.free;
  1221. p:=hp1;
  1222. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1223. RemoveLastDeallocForFuncRes(p);
  1224. exit;
  1225. end
  1226. { change
  1227. mov reg1, mem1
  1228. test/cmp x, mem1
  1229. to
  1230. mov reg1, mem1
  1231. test/cmp x, reg1
  1232. }
  1233. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1234. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1235. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1236. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1237. begin
  1238. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1239. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1240. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1241. end;
  1242. end;
  1243. { Next instruction is also a MOV ? }
  1244. if GetNextInstruction_p and
  1245. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1246. begin
  1247. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1248. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1249. { mov reg1, mem1 or mov mem1, reg1
  1250. mov mem2, reg2 mov reg2, mem2}
  1251. begin
  1252. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1253. { mov reg1, mem1 or mov mem1, reg1
  1254. mov mem2, reg1 mov reg2, mem1}
  1255. begin
  1256. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1257. { Removes the second statement from
  1258. mov reg1, mem1/reg2
  1259. mov mem1/reg2, reg1 }
  1260. begin
  1261. if taicpu(p).oper[0]^.typ=top_reg then
  1262. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1263. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1264. asml.remove(hp1);
  1265. hp1.free;
  1266. Result:=true;
  1267. exit;
  1268. end
  1269. else
  1270. begin
  1271. CopyUsedRegs(TmpUsedRegs);
  1272. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1273. if (taicpu(p).oper[1]^.typ = top_ref) and
  1274. { mov reg1, mem1
  1275. mov mem2, reg1 }
  1276. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1277. GetNextInstruction(hp1, hp2) and
  1278. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1279. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1280. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1281. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1282. { change to
  1283. mov reg1, mem1 mov reg1, mem1
  1284. mov mem2, reg1 cmp reg1, mem2
  1285. cmp mem1, reg1
  1286. }
  1287. begin
  1288. asml.remove(hp2);
  1289. hp2.free;
  1290. taicpu(hp1).opcode := A_CMP;
  1291. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1292. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1293. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1294. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1295. end;
  1296. ReleaseUsedRegs(TmpUsedRegs);
  1297. end;
  1298. end
  1299. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1300. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1301. begin
  1302. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1303. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1304. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1305. end
  1306. else
  1307. begin
  1308. CopyUsedRegs(TmpUsedRegs);
  1309. if GetNextInstruction(hp1, hp2) and
  1310. MatchOpType(taicpu(p),top_ref,top_reg) and
  1311. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1312. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1313. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1314. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1315. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1316. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1317. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1318. { mov mem1, %reg1
  1319. mov %reg1, mem2
  1320. mov mem2, reg2
  1321. to:
  1322. mov mem1, reg2
  1323. mov reg2, mem2}
  1324. begin
  1325. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1326. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1327. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1328. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1329. asml.remove(hp2);
  1330. hp2.free;
  1331. end
  1332. {$ifdef i386}
  1333. { this is enabled for i386 only, as the rules to create the reg sets below
  1334. are too complicated for x86-64, so this makes this code too error prone
  1335. on x86-64
  1336. }
  1337. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1338. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1339. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1340. { mov mem1, reg1 mov mem1, reg1
  1341. mov reg1, mem2 mov reg1, mem2
  1342. mov mem2, reg2 mov mem2, reg1
  1343. to: to:
  1344. mov mem1, reg1 mov mem1, reg1
  1345. mov mem1, reg2 mov reg1, mem2
  1346. mov reg1, mem2
  1347. or (if mem1 depends on reg1
  1348. and/or if mem2 depends on reg2)
  1349. to:
  1350. mov mem1, reg1
  1351. mov reg1, mem2
  1352. mov reg1, reg2
  1353. }
  1354. begin
  1355. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1356. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1357. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1358. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1359. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1360. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1361. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1362. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1363. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1364. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1365. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1366. end
  1367. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1368. begin
  1369. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1370. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1371. end
  1372. else
  1373. begin
  1374. asml.remove(hp2);
  1375. hp2.free;
  1376. end
  1377. {$endif i386}
  1378. ;
  1379. ReleaseUsedRegs(TmpUsedRegs);
  1380. end;
  1381. end
  1382. (* { movl [mem1],reg1
  1383. movl [mem1],reg2
  1384. to
  1385. movl [mem1],reg1
  1386. movl reg1,reg2
  1387. }
  1388. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1389. (taicpu(p).oper[1]^.typ = top_reg) and
  1390. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1391. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1392. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1393. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1394. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1395. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1396. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1397. else*)
  1398. { movl const1,[mem1]
  1399. movl [mem1],reg1
  1400. to
  1401. movl const1,reg1
  1402. movl reg1,[mem1]
  1403. }
  1404. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1405. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1406. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1407. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1408. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1409. begin
  1410. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1411. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1412. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1413. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1414. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1415. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1416. end
  1417. end
  1418. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1419. GetNextInstruction_p and
  1420. (hp1.typ = ait_instruction) and
  1421. GetNextInstruction(hp1, hp2) and
  1422. MatchInstruction(hp2,A_MOV,[]) and
  1423. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1424. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1425. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1426. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1427. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1428. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1429. ) then
  1430. { change movsX/movzX reg/ref, reg2
  1431. add/sub/or/... reg3/$const, reg2
  1432. mov reg2 reg/ref
  1433. to add/sub/or/... reg3/$const, reg/ref }
  1434. begin
  1435. CopyUsedRegs(TmpUsedRegs);
  1436. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1437. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1438. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1439. begin
  1440. { by example:
  1441. movswl %si,%eax movswl %si,%eax p
  1442. decl %eax addl %edx,%eax hp1
  1443. movw %ax,%si movw %ax,%si hp2
  1444. ->
  1445. movswl %si,%eax movswl %si,%eax p
  1446. decw %eax addw %edx,%eax hp1
  1447. movw %ax,%si movw %ax,%si hp2
  1448. }
  1449. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1450. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1451. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1452. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1453. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1454. {
  1455. ->
  1456. movswl %si,%eax movswl %si,%eax p
  1457. decw %si addw %dx,%si hp1
  1458. movw %ax,%si movw %ax,%si hp2
  1459. }
  1460. case taicpu(hp1).ops of
  1461. 1:
  1462. begin
  1463. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1464. if taicpu(hp1).oper[0]^.typ=top_reg then
  1465. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1466. end;
  1467. 2:
  1468. begin
  1469. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1470. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1471. (taicpu(hp1).opcode<>A_SHL) and
  1472. (taicpu(hp1).opcode<>A_SHR) and
  1473. (taicpu(hp1).opcode<>A_SAR) then
  1474. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1475. end;
  1476. else
  1477. internalerror(2008042701);
  1478. end;
  1479. {
  1480. ->
  1481. decw %si addw %dx,%si p
  1482. }
  1483. asml.remove(p);
  1484. asml.remove(hp2);
  1485. p.Free;
  1486. hp2.Free;
  1487. p := hp1;
  1488. end;
  1489. ReleaseUsedRegs(TmpUsedRegs);
  1490. end
  1491. else if GetNextInstruction_p and
  1492. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1493. GetNextInstruction(hp1, hp2) and
  1494. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1495. MatchOperand(Taicpu(p).oper[0]^,0) and
  1496. (Taicpu(p).oper[1]^.typ = top_reg) and
  1497. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1498. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1499. { mov reg1,0
  1500. bts reg1,operand1 --> mov reg1,operand2
  1501. or reg1,operand2 bts reg1,operand1}
  1502. begin
  1503. Taicpu(hp2).opcode:=A_MOV;
  1504. asml.remove(hp1);
  1505. insertllitem(hp2,hp2.next,hp1);
  1506. asml.remove(p);
  1507. p.free;
  1508. p:=hp1;
  1509. end
  1510. else if GetNextInstruction_p and
  1511. MatchInstruction(hp1,A_LEA,[S_L]) and
  1512. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1513. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1514. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1515. ) or
  1516. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1517. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1518. )
  1519. ) then
  1520. { mov reg1,ref
  1521. lea reg2,[reg1,reg2]
  1522. to
  1523. add reg2,ref}
  1524. begin
  1525. CopyUsedRegs(TmpUsedRegs);
  1526. { reg1 may not be used afterwards }
  1527. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1528. begin
  1529. Taicpu(hp1).opcode:=A_ADD;
  1530. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1531. DebugMsg('Peephole MovLea2Add done',hp1);
  1532. asml.remove(p);
  1533. p.free;
  1534. p:=hp1;
  1535. end;
  1536. ReleaseUsedRegs(TmpUsedRegs);
  1537. end;
  1538. end;
  1539. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1540. var
  1541. hp1 : tai;
  1542. begin
  1543. Result:=false;
  1544. if taicpu(p).ops <> 2 then
  1545. exit;
  1546. if GetNextInstruction(p,hp1) and
  1547. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1548. (taicpu(hp1).ops = 2) then
  1549. begin
  1550. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1551. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1552. { movXX reg1, mem1 or movXX mem1, reg1
  1553. movXX mem2, reg2 movXX reg2, mem2}
  1554. begin
  1555. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1556. { movXX reg1, mem1 or movXX mem1, reg1
  1557. movXX mem2, reg1 movXX reg2, mem1}
  1558. begin
  1559. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1560. begin
  1561. { Removes the second statement from
  1562. movXX reg1, mem1/reg2
  1563. movXX mem1/reg2, reg1
  1564. }
  1565. if taicpu(p).oper[0]^.typ=top_reg then
  1566. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1567. { Removes the second statement from
  1568. movXX mem1/reg1, reg2
  1569. movXX reg2, mem1/reg1
  1570. }
  1571. if (taicpu(p).oper[1]^.typ=top_reg) and
  1572. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1573. begin
  1574. asml.remove(p);
  1575. p.free;
  1576. GetNextInstruction(hp1,p);
  1577. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1578. end
  1579. else
  1580. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1581. asml.remove(hp1);
  1582. hp1.free;
  1583. Result:=true;
  1584. exit;
  1585. end
  1586. end;
  1587. end;
  1588. end;
  1589. end;
  1590. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1591. var
  1592. TmpUsedRegs : TAllUsedRegs;
  1593. hp1 : tai;
  1594. begin
  1595. result:=false;
  1596. { replace
  1597. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1598. MovX %mreg2,%mreg1
  1599. dealloc %mreg2
  1600. by
  1601. <Op>X %mreg2,%mreg1
  1602. ?
  1603. }
  1604. if GetNextInstruction(p,hp1) and
  1605. { we mix single and double opperations here because we assume that the compiler
  1606. generates vmovapd only after double operations and vmovaps only after single operations }
  1607. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1608. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1609. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1610. (taicpu(p).oper[0]^.typ=top_reg) then
  1611. begin
  1612. CopyUsedRegs(TmpUsedRegs);
  1613. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1614. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1615. begin
  1616. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1617. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1618. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1619. asml.Remove(hp1);
  1620. hp1.Free;
  1621. result:=true;
  1622. end;
  1623. ReleaseUsedRegs(TmpUsedRegs);
  1624. end;
  1625. end;
  1626. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1627. var
  1628. hp1 : tai;
  1629. l : ASizeInt;
  1630. TmpUsedRegs : TAllUsedRegs;
  1631. begin
  1632. Result:=false;
  1633. { removes seg register prefixes from LEA operations, as they
  1634. don't do anything}
  1635. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1636. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1637. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1638. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1639. { do not mess with leas acessing the stack pointer }
  1640. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1641. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1642. begin
  1643. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1644. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1645. begin
  1646. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1647. taicpu(p).oper[1]^.reg);
  1648. InsertLLItem(p.previous,p.next, hp1);
  1649. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1650. p.free;
  1651. p:=hp1;
  1652. Result:=true;
  1653. exit;
  1654. end
  1655. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1656. begin
  1657. hp1:=taicpu(p.Next);
  1658. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1659. asml.remove(p);
  1660. p.free;
  1661. p:=hp1;
  1662. Result:=true;
  1663. exit;
  1664. end
  1665. { continue to use lea to adjust the stack pointer,
  1666. it is the recommended way, but only if not optimizing for size }
  1667. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1668. (cs_opt_size in current_settings.optimizerswitches) then
  1669. with taicpu(p).oper[0]^.ref^ do
  1670. if (base = taicpu(p).oper[1]^.reg) then
  1671. begin
  1672. l:=offset;
  1673. if (l=1) and UseIncDec then
  1674. begin
  1675. taicpu(p).opcode:=A_INC;
  1676. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1677. taicpu(p).ops:=1;
  1678. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1679. end
  1680. else if (l=-1) and UseIncDec then
  1681. begin
  1682. taicpu(p).opcode:=A_DEC;
  1683. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1684. taicpu(p).ops:=1;
  1685. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1686. end
  1687. else
  1688. begin
  1689. if (l<0) and (l<>-2147483648) then
  1690. begin
  1691. taicpu(p).opcode:=A_SUB;
  1692. taicpu(p).loadConst(0,-l);
  1693. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1694. end
  1695. else
  1696. begin
  1697. taicpu(p).opcode:=A_ADD;
  1698. taicpu(p).loadConst(0,l);
  1699. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1700. end;
  1701. end;
  1702. Result:=true;
  1703. exit;
  1704. end;
  1705. end;
  1706. if GetNextInstruction(p,hp1) and
  1707. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1708. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1709. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1710. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1711. begin
  1712. CopyUsedRegs(TmpUsedRegs);
  1713. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1714. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1715. begin
  1716. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1717. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1718. asml.Remove(hp1);
  1719. hp1.Free;
  1720. result:=true;
  1721. end;
  1722. ReleaseUsedRegs(TmpUsedRegs);
  1723. end;
  1724. (*
  1725. This is unsafe, lea doesn't modify the flags but "add"
  1726. does. This breaks webtbs/tw15694.pp. The above
  1727. transformations are also unsafe, but they don't seem to
  1728. be triggered by code that FPC generators (or that at
  1729. least does not occur in the tests...). This needs to be
  1730. fixed by checking for the liveness of the flags register.
  1731. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1732. begin
  1733. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1734. taicpu(p).oper[0]^.ref^.base);
  1735. InsertLLItem(asml,p.previous,p.next, hp1);
  1736. DebugMsg('Peephole Lea2AddBase done',hp1);
  1737. p.free;
  1738. p:=hp1;
  1739. continue;
  1740. end
  1741. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1742. begin
  1743. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1744. taicpu(p).oper[0]^.ref^.index);
  1745. InsertLLItem(asml,p.previous,p.next,hp1);
  1746. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1747. p.free;
  1748. p:=hp1;
  1749. continue;
  1750. end
  1751. *)
  1752. end;
  1753. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  1754. var
  1755. hp1 : tai;
  1756. begin
  1757. DoSubAddOpt := False;
  1758. if GetLastInstruction(p, hp1) and
  1759. (hp1.typ = ait_instruction) and
  1760. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1761. case taicpu(hp1).opcode Of
  1762. A_DEC:
  1763. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  1764. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1765. begin
  1766. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  1767. asml.remove(hp1);
  1768. hp1.free;
  1769. end;
  1770. A_SUB:
  1771. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1772. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1773. begin
  1774. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  1775. asml.remove(hp1);
  1776. hp1.free;
  1777. end;
  1778. A_ADD:
  1779. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1780. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1781. begin
  1782. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1783. asml.remove(hp1);
  1784. hp1.free;
  1785. if (taicpu(p).oper[0]^.val = 0) then
  1786. begin
  1787. hp1 := tai(p.next);
  1788. asml.remove(p);
  1789. p.free;
  1790. if not GetLastInstruction(hp1, p) then
  1791. p := hp1;
  1792. DoSubAddOpt := True;
  1793. end
  1794. end;
  1795. end;
  1796. end;
  1797. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  1798. var
  1799. hp1 : tai;
  1800. begin
  1801. Result:=false;
  1802. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1803. { * change "sub/add const1, reg" or "dec reg" followed by
  1804. "sub const2, reg" to one "sub ..., reg" }
  1805. if MatchOpType(taicpu(p),top_const,top_reg) then
  1806. begin
  1807. {$ifdef i386}
  1808. if (taicpu(p).oper[0]^.val = 2) and
  1809. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1810. { Don't do the sub/push optimization if the sub }
  1811. { comes from setting up the stack frame (JM) }
  1812. (not(GetLastInstruction(p,hp1)) or
  1813. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  1814. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  1815. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  1816. begin
  1817. hp1 := tai(p.next);
  1818. while Assigned(hp1) and
  1819. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1820. not RegReadByInstruction(NR_ESP,hp1) and
  1821. not RegModifiedByInstruction(NR_ESP,hp1) do
  1822. hp1 := tai(hp1.next);
  1823. if Assigned(hp1) and
  1824. MatchInstruction(hp1,A_PUSH,[S_W]) then
  1825. begin
  1826. taicpu(hp1).changeopsize(S_L);
  1827. if taicpu(hp1).oper[0]^.typ=top_reg then
  1828. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1829. hp1 := tai(p.next);
  1830. asml.remove(p);
  1831. p.free;
  1832. p := hp1;
  1833. Result:=true;
  1834. exit;
  1835. end;
  1836. end;
  1837. {$endif i386}
  1838. if DoSubAddOpt(p) then
  1839. Result:=true;
  1840. end;
  1841. end;
  1842. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1843. var
  1844. TmpUsedRegs : TAllUsedRegs;
  1845. hp1,hp2: tai;
  1846. begin
  1847. Result:=false;
  1848. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1849. GetNextInstruction(p, hp1) and
  1850. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1851. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1852. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1853. or
  1854. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1855. ) and
  1856. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1857. { mov reg1, reg2
  1858. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1859. begin
  1860. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1861. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1862. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1863. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1864. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1865. asml.remove(p);
  1866. p.free;
  1867. p := hp1;
  1868. Result:=true;
  1869. exit;
  1870. end
  1871. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1872. GetNextInstruction(p,hp1) and
  1873. (hp1.typ = ait_instruction) and
  1874. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1875. doing it separately in both branches allows to do the cheap checks
  1876. with low probability earlier }
  1877. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1878. GetNextInstruction(hp1,hp2) and
  1879. MatchInstruction(hp2,A_MOV,[])
  1880. ) or
  1881. ((taicpu(hp1).opcode=A_LEA) and
  1882. GetNextInstruction(hp1,hp2) and
  1883. MatchInstruction(hp2,A_MOV,[]) and
  1884. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1885. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1886. ) or
  1887. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1888. taicpu(p).oper[1]^.reg) and
  1889. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1890. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1891. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1892. ) and
  1893. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1894. )
  1895. ) and
  1896. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1897. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1898. begin
  1899. CopyUsedRegs(TmpUsedRegs);
  1900. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1901. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1902. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1903. { change mov (ref), reg
  1904. add/sub/or/... reg2/$const, reg
  1905. mov reg, (ref)
  1906. # release reg
  1907. to add/sub/or/... reg2/$const, (ref) }
  1908. begin
  1909. case taicpu(hp1).opcode of
  1910. A_INC,A_DEC,A_NOT,A_NEG :
  1911. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1912. A_LEA :
  1913. begin
  1914. taicpu(hp1).opcode:=A_ADD;
  1915. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1916. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1917. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1918. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1919. else
  1920. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1921. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1922. DebugMsg('Peephole FoldLea done',hp1);
  1923. end
  1924. else
  1925. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1926. end;
  1927. asml.remove(p);
  1928. asml.remove(hp2);
  1929. p.free;
  1930. hp2.free;
  1931. p := hp1
  1932. end;
  1933. ReleaseUsedRegs(TmpUsedRegs);
  1934. end;
  1935. end;
  1936. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1937. var
  1938. TmpUsedRegs : TAllUsedRegs;
  1939. hp1 : tai;
  1940. begin
  1941. Result:=false;
  1942. if (taicpu(p).ops >= 2) and
  1943. ((taicpu(p).oper[0]^.typ = top_const) or
  1944. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1945. (taicpu(p).oper[1]^.typ = top_reg) and
  1946. ((taicpu(p).ops = 2) or
  1947. ((taicpu(p).oper[2]^.typ = top_reg) and
  1948. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1949. GetLastInstruction(p,hp1) and
  1950. MatchInstruction(hp1,A_MOV,[]) and
  1951. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1952. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1953. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1954. begin
  1955. CopyUsedRegs(TmpUsedRegs);
  1956. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1957. { change
  1958. mov reg1,reg2
  1959. imul y,reg2 to imul y,reg1,reg2 }
  1960. begin
  1961. taicpu(p).ops := 3;
  1962. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1963. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1964. DebugMsg('Peephole MovImul2Imul done',p);
  1965. asml.remove(hp1);
  1966. hp1.free;
  1967. result:=true;
  1968. end;
  1969. ReleaseUsedRegs(TmpUsedRegs);
  1970. end;
  1971. end;
  1972. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1973. var
  1974. hp1 : tai;
  1975. begin
  1976. {
  1977. change
  1978. jmp .L1
  1979. ...
  1980. .L1:
  1981. ret
  1982. into
  1983. ret
  1984. }
  1985. result:=false;
  1986. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1987. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1988. begin
  1989. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1990. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1991. MatchInstruction(hp1,A_RET,[S_NO]) then
  1992. begin
  1993. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1994. taicpu(p).opcode:=A_RET;
  1995. taicpu(p).is_jmp:=false;
  1996. taicpu(p).ops:=taicpu(hp1).ops;
  1997. case taicpu(hp1).ops of
  1998. 0:
  1999. taicpu(p).clearop(0);
  2000. 1:
  2001. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2002. else
  2003. internalerror(2016041301);
  2004. end;
  2005. result:=true;
  2006. end;
  2007. end;
  2008. end;
  2009. function CanBeCMOV(p : tai) : boolean;
  2010. begin
  2011. CanBeCMOV:=assigned(p) and
  2012. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  2013. { we can't use cmov ref,reg because
  2014. ref could be nil and cmov still throws an exception
  2015. if ref=nil but the mov isn't done (FK)
  2016. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2017. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2018. }
  2019. MatchOpType(taicpu(p),top_reg,top_reg);
  2020. end;
  2021. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  2022. var
  2023. hp1,hp2,hp3: tai;
  2024. carryadd_opcode : TAsmOp;
  2025. l : Longint;
  2026. condition : TAsmCond;
  2027. begin
  2028. { jb @@1 cmc
  2029. inc/dec operand --> adc/sbb operand,0
  2030. @@1:
  2031. ... and ...
  2032. jnb @@1
  2033. inc/dec operand --> adc/sbb operand,0
  2034. @@1: }
  2035. result:=false;
  2036. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  2037. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2038. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  2039. begin
  2040. carryadd_opcode:=A_NONE;
  2041. if Taicpu(p).condition in [C_NAE,C_B] then
  2042. begin
  2043. if Taicpu(hp1).opcode=A_INC then
  2044. carryadd_opcode:=A_ADC;
  2045. if Taicpu(hp1).opcode=A_DEC then
  2046. carryadd_opcode:=A_SBB;
  2047. if carryadd_opcode<>A_NONE then
  2048. begin
  2049. Taicpu(p).clearop(0);
  2050. Taicpu(p).ops:=0;
  2051. Taicpu(p).is_jmp:=false;
  2052. Taicpu(p).opcode:=A_CMC;
  2053. Taicpu(p).condition:=C_NONE;
  2054. Taicpu(hp1).ops:=2;
  2055. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2056. Taicpu(hp1).loadconst(0,0);
  2057. Taicpu(hp1).opcode:=carryadd_opcode;
  2058. result:=true;
  2059. exit;
  2060. end;
  2061. end;
  2062. if Taicpu(p).condition in [C_AE,C_NB] then
  2063. begin
  2064. if Taicpu(hp1).opcode=A_INC then
  2065. carryadd_opcode:=A_ADC;
  2066. if Taicpu(hp1).opcode=A_DEC then
  2067. carryadd_opcode:=A_SBB;
  2068. if carryadd_opcode<>A_NONE then
  2069. begin
  2070. asml.remove(p);
  2071. p.free;
  2072. Taicpu(hp1).ops:=2;
  2073. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2074. Taicpu(hp1).loadconst(0,0);
  2075. Taicpu(hp1).opcode:=carryadd_opcode;
  2076. p:=hp1;
  2077. result:=true;
  2078. exit;
  2079. end;
  2080. end;
  2081. end;
  2082. {$ifndef i8086}
  2083. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2084. begin
  2085. { check for
  2086. jCC xxx
  2087. <several movs>
  2088. xxx:
  2089. }
  2090. l:=0;
  2091. GetNextInstruction(p, hp1);
  2092. while assigned(hp1) and
  2093. CanBeCMOV(hp1) and
  2094. { stop on labels }
  2095. not(hp1.typ=ait_label) do
  2096. begin
  2097. inc(l);
  2098. GetNextInstruction(hp1,hp1);
  2099. end;
  2100. if assigned(hp1) then
  2101. begin
  2102. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2103. begin
  2104. if (l<=4) and (l>0) then
  2105. begin
  2106. condition:=inverse_cond(taicpu(p).condition);
  2107. hp2:=p;
  2108. GetNextInstruction(p,hp1);
  2109. p:=hp1;
  2110. repeat
  2111. taicpu(hp1).opcode:=A_CMOVcc;
  2112. taicpu(hp1).condition:=condition;
  2113. GetNextInstruction(hp1,hp1);
  2114. until not(assigned(hp1)) or
  2115. not(CanBeCMOV(hp1));
  2116. { wait with removing else GetNextInstruction could
  2117. ignore the label if it was the only usage in the
  2118. jump moved away }
  2119. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2120. { if the label refs. reach zero, remove any alignment before the label }
  2121. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2122. begin
  2123. asml.Remove(hp1);
  2124. hp1.Free;
  2125. end;
  2126. asml.remove(hp2);
  2127. hp2.free;
  2128. result:=true;
  2129. exit;
  2130. end;
  2131. end
  2132. else
  2133. begin
  2134. { check further for
  2135. jCC xxx
  2136. <several movs 1>
  2137. jmp yyy
  2138. xxx:
  2139. <several movs 2>
  2140. yyy:
  2141. }
  2142. { hp2 points to jmp yyy }
  2143. hp2:=hp1;
  2144. { skip hp1 to xxx }
  2145. GetNextInstruction(hp1, hp1);
  2146. if assigned(hp2) and
  2147. assigned(hp1) and
  2148. (l<=3) and
  2149. (hp2.typ=ait_instruction) and
  2150. (taicpu(hp2).is_jmp) and
  2151. (taicpu(hp2).condition=C_None) and
  2152. { real label and jump, no further references to the
  2153. label are allowed }
  2154. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2155. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2156. begin
  2157. l:=0;
  2158. { skip hp1 to <several moves 2> }
  2159. GetNextInstruction(hp1, hp1);
  2160. while assigned(hp1) and
  2161. CanBeCMOV(hp1) do
  2162. begin
  2163. inc(l);
  2164. GetNextInstruction(hp1, hp1);
  2165. end;
  2166. { hp1 points to yyy: }
  2167. if assigned(hp1) and
  2168. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2169. begin
  2170. condition:=inverse_cond(taicpu(p).condition);
  2171. GetNextInstruction(p,hp1);
  2172. hp3:=p;
  2173. p:=hp1;
  2174. repeat
  2175. taicpu(hp1).opcode:=A_CMOVcc;
  2176. taicpu(hp1).condition:=condition;
  2177. GetNextInstruction(hp1,hp1);
  2178. until not(assigned(hp1)) or
  2179. not(CanBeCMOV(hp1));
  2180. { hp2 is still at jmp yyy }
  2181. GetNextInstruction(hp2,hp1);
  2182. { hp2 is now at xxx: }
  2183. condition:=inverse_cond(condition);
  2184. GetNextInstruction(hp1,hp1);
  2185. { hp1 is now at <several movs 2> }
  2186. repeat
  2187. taicpu(hp1).opcode:=A_CMOVcc;
  2188. taicpu(hp1).condition:=condition;
  2189. GetNextInstruction(hp1,hp1);
  2190. until not(assigned(hp1)) or
  2191. not(CanBeCMOV(hp1));
  2192. {
  2193. asml.remove(hp1.next)
  2194. hp1.next.free;
  2195. asml.remove(hp1);
  2196. hp1.free;
  2197. }
  2198. { remove jCC }
  2199. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2200. asml.remove(hp3);
  2201. hp3.free;
  2202. { remove jmp }
  2203. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2204. asml.remove(hp2);
  2205. hp2.free;
  2206. result:=true;
  2207. exit;
  2208. end;
  2209. end;
  2210. end;
  2211. end;
  2212. end;
  2213. {$endif i8086}
  2214. end;
  2215. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2216. var
  2217. hp1,hp2: tai;
  2218. begin
  2219. result:=false;
  2220. if (taicpu(p).oper[1]^.typ = top_reg) and
  2221. GetNextInstruction(p,hp1) and
  2222. (hp1.typ = ait_instruction) and
  2223. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2224. GetNextInstruction(hp1,hp2) and
  2225. MatchInstruction(hp2,A_MOV,[]) and
  2226. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2227. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2228. {$ifdef i386}
  2229. { not all registers have byte size sub registers on i386 }
  2230. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2231. {$endif i386}
  2232. (((taicpu(hp1).ops=2) and
  2233. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2234. ((taicpu(hp1).ops=1) and
  2235. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2236. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2237. begin
  2238. { change movsX/movzX reg/ref, reg2
  2239. add/sub/or/... reg3/$const, reg2
  2240. mov reg2 reg/ref
  2241. to add/sub/or/... reg3/$const, reg/ref }
  2242. { by example:
  2243. movswl %si,%eax movswl %si,%eax p
  2244. decl %eax addl %edx,%eax hp1
  2245. movw %ax,%si movw %ax,%si hp2
  2246. ->
  2247. movswl %si,%eax movswl %si,%eax p
  2248. decw %eax addw %edx,%eax hp1
  2249. movw %ax,%si movw %ax,%si hp2
  2250. }
  2251. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2252. {
  2253. ->
  2254. movswl %si,%eax movswl %si,%eax p
  2255. decw %si addw %dx,%si hp1
  2256. movw %ax,%si movw %ax,%si hp2
  2257. }
  2258. case taicpu(hp1).ops of
  2259. 1:
  2260. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2261. 2:
  2262. begin
  2263. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2264. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2265. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2266. end;
  2267. else
  2268. internalerror(2008042701);
  2269. end;
  2270. {
  2271. ->
  2272. decw %si addw %dx,%si p
  2273. }
  2274. DebugMsg('PeepHole Optimization,var3',p);
  2275. asml.remove(p);
  2276. asml.remove(hp2);
  2277. p.free;
  2278. hp2.free;
  2279. p:=hp1;
  2280. end
  2281. { removes superfluous And's after movzx's }
  2282. else if taicpu(p).opcode=A_MOVZX then
  2283. begin
  2284. if (taicpu(p).oper[1]^.typ = top_reg) and
  2285. GetNextInstruction(p, hp1) and
  2286. (tai(hp1).typ = ait_instruction) and
  2287. (taicpu(hp1).opcode = A_AND) and
  2288. (taicpu(hp1).oper[0]^.typ = top_const) and
  2289. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2290. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2291. begin
  2292. case taicpu(p).opsize Of
  2293. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2294. if (taicpu(hp1).oper[0]^.val = $ff) then
  2295. begin
  2296. DebugMsg('PeepHole Optimization,var4',p);
  2297. asml.remove(hp1);
  2298. hp1.free;
  2299. end;
  2300. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2301. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2302. begin
  2303. DebugMsg('PeepHole Optimization,var5',p);
  2304. asml.remove(hp1);
  2305. hp1.free;
  2306. end;
  2307. {$ifdef x86_64}
  2308. S_LQ:
  2309. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2310. begin
  2311. if (cs_asm_source in current_settings.globalswitches) then
  2312. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2313. asml.remove(hp1);
  2314. hp1.Free;
  2315. end;
  2316. {$endif x86_64}
  2317. end;
  2318. end;
  2319. { changes some movzx constructs to faster synonims (all examples
  2320. are given with eax/ax, but are also valid for other registers)}
  2321. if (taicpu(p).oper[1]^.typ = top_reg) then
  2322. if (taicpu(p).oper[0]^.typ = top_reg) then
  2323. case taicpu(p).opsize of
  2324. S_BW:
  2325. begin
  2326. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2327. not(cs_opt_size in current_settings.optimizerswitches) then
  2328. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2329. begin
  2330. taicpu(p).opcode := A_AND;
  2331. taicpu(p).changeopsize(S_W);
  2332. taicpu(p).loadConst(0,$ff);
  2333. DebugMsg('PeepHole Optimization,var7',p);
  2334. end
  2335. else if GetNextInstruction(p, hp1) and
  2336. (tai(hp1).typ = ait_instruction) and
  2337. (taicpu(hp1).opcode = A_AND) and
  2338. (taicpu(hp1).oper[0]^.typ = top_const) and
  2339. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2340. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2341. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2342. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2343. begin
  2344. DebugMsg('PeepHole Optimization,var8',p);
  2345. taicpu(p).opcode := A_MOV;
  2346. taicpu(p).changeopsize(S_W);
  2347. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2348. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2349. end;
  2350. end;
  2351. S_BL:
  2352. begin
  2353. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2354. not(cs_opt_size in current_settings.optimizerswitches) then
  2355. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2356. begin
  2357. taicpu(p).opcode := A_AND;
  2358. taicpu(p).changeopsize(S_L);
  2359. taicpu(p).loadConst(0,$ff)
  2360. end
  2361. else if GetNextInstruction(p, hp1) and
  2362. (tai(hp1).typ = ait_instruction) and
  2363. (taicpu(hp1).opcode = A_AND) and
  2364. (taicpu(hp1).oper[0]^.typ = top_const) and
  2365. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2366. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2367. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2368. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2369. begin
  2370. DebugMsg('PeepHole Optimization,var10',p);
  2371. taicpu(p).opcode := A_MOV;
  2372. taicpu(p).changeopsize(S_L);
  2373. { do not use R_SUBWHOLE
  2374. as movl %rdx,%eax
  2375. is invalid in assembler PM }
  2376. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2377. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2378. end
  2379. end;
  2380. {$ifndef i8086}
  2381. S_WL:
  2382. begin
  2383. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2384. not(cs_opt_size in current_settings.optimizerswitches) then
  2385. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2386. begin
  2387. DebugMsg('PeepHole Optimization,var11',p);
  2388. taicpu(p).opcode := A_AND;
  2389. taicpu(p).changeopsize(S_L);
  2390. taicpu(p).loadConst(0,$ffff);
  2391. end
  2392. else if GetNextInstruction(p, hp1) and
  2393. (tai(hp1).typ = ait_instruction) and
  2394. (taicpu(hp1).opcode = A_AND) and
  2395. (taicpu(hp1).oper[0]^.typ = top_const) and
  2396. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2397. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2398. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2399. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2400. begin
  2401. DebugMsg('PeepHole Optimization,var12',p);
  2402. taicpu(p).opcode := A_MOV;
  2403. taicpu(p).changeopsize(S_L);
  2404. { do not use R_SUBWHOLE
  2405. as movl %rdx,%eax
  2406. is invalid in assembler PM }
  2407. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2408. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2409. end;
  2410. end;
  2411. {$endif i8086}
  2412. end
  2413. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2414. begin
  2415. if GetNextInstruction(p, hp1) and
  2416. (tai(hp1).typ = ait_instruction) and
  2417. (taicpu(hp1).opcode = A_AND) and
  2418. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2419. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2420. begin
  2421. taicpu(p).opcode := A_MOV;
  2422. case taicpu(p).opsize Of
  2423. S_BL:
  2424. begin
  2425. DebugMsg('PeepHole Optimization,var13',p);
  2426. taicpu(p).changeopsize(S_L);
  2427. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2428. end;
  2429. S_WL:
  2430. begin
  2431. DebugMsg('PeepHole Optimization,var14',p);
  2432. taicpu(p).changeopsize(S_L);
  2433. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2434. end;
  2435. S_BW:
  2436. begin
  2437. DebugMsg('PeepHole Optimization,var15',p);
  2438. taicpu(p).changeopsize(S_W);
  2439. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2440. end;
  2441. {$ifdef x86_64}
  2442. S_BQ:
  2443. begin
  2444. DebugMsg('PeepHole Optimization,var16',p);
  2445. taicpu(p).changeopsize(S_Q);
  2446. taicpu(hp1).loadConst(
  2447. 0, taicpu(hp1).oper[0]^.val and $ff);
  2448. end;
  2449. S_WQ:
  2450. begin
  2451. DebugMsg('PeepHole Optimization,var17',p);
  2452. taicpu(p).changeopsize(S_Q);
  2453. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2454. end;
  2455. S_LQ:
  2456. begin
  2457. DebugMsg('PeepHole Optimization,var18',p);
  2458. taicpu(p).changeopsize(S_Q);
  2459. taicpu(hp1).loadConst(
  2460. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2461. end;
  2462. {$endif x86_64}
  2463. else
  2464. Internalerror(2017050704)
  2465. end;
  2466. end;
  2467. end;
  2468. end;
  2469. end;
  2470. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2471. var
  2472. hp1 : tai;
  2473. begin
  2474. Result:=false;
  2475. if not(GetNextInstruction(p, hp1)) then
  2476. exit;
  2477. if MatchOpType(taicpu(p),top_const,top_reg) and
  2478. MatchInstruction(hp1,A_AND,[]) and
  2479. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2480. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2481. { the second register must contain the first one, so compare their subreg types }
  2482. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2483. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2484. { change
  2485. and const1, reg
  2486. and const2, reg
  2487. to
  2488. and (const1 and const2), reg
  2489. }
  2490. begin
  2491. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2492. DebugMsg('Peephole AndAnd2And done',hp1);
  2493. asml.remove(p);
  2494. p.Free;
  2495. p:=hp1;
  2496. Result:=true;
  2497. exit;
  2498. end
  2499. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2500. MatchInstruction(hp1,A_MOVZX,[]) and
  2501. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2502. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2503. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2504. (((taicpu(p).opsize=S_W) and
  2505. (taicpu(hp1).opsize=S_BW)) or
  2506. ((taicpu(p).opsize=S_L) and
  2507. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2508. {$ifdef x86_64}
  2509. or
  2510. ((taicpu(p).opsize=S_Q) and
  2511. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2512. {$endif x86_64}
  2513. ) then
  2514. begin
  2515. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2516. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2517. ) or
  2518. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2519. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2520. {$ifdef x86_64}
  2521. or
  2522. (((taicpu(hp1).opsize)=S_LQ) and
  2523. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2524. )
  2525. {$endif x86_64}
  2526. then
  2527. begin
  2528. DebugMsg('Peephole AndMovzToAnd done',p);
  2529. asml.remove(hp1);
  2530. hp1.free;
  2531. end;
  2532. end
  2533. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2534. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2535. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2536. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2537. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2538. (((taicpu(p).opsize=S_W) and
  2539. (taicpu(hp1).opsize=S_BW)) or
  2540. ((taicpu(p).opsize=S_L) and
  2541. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2542. {$ifdef x86_64}
  2543. or
  2544. ((taicpu(p).opsize=S_Q) and
  2545. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2546. {$endif x86_64}
  2547. ) then
  2548. begin
  2549. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2550. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2551. ) or
  2552. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2553. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2554. {$ifdef x86_64}
  2555. or
  2556. (((taicpu(hp1).opsize)=S_LQ) and
  2557. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2558. )
  2559. {$endif x86_64}
  2560. then
  2561. begin
  2562. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2563. asml.remove(hp1);
  2564. hp1.free;
  2565. end;
  2566. end
  2567. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2568. (hp1.typ = ait_instruction) and
  2569. (taicpu(hp1).is_jmp) and
  2570. (taicpu(hp1).opcode<>A_JMP) and
  2571. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2572. { change
  2573. and x, reg
  2574. jxx
  2575. to
  2576. test x, reg
  2577. jxx
  2578. if reg is deallocated before the
  2579. jump, but only if it's a conditional jump (PFV)
  2580. }
  2581. taicpu(p).opcode := A_TEST;
  2582. end;
  2583. function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
  2584. begin
  2585. Result:=false;
  2586. if (taicpu(p).oper[1]^.typ = Top_Reg) and
  2587. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2588. begin
  2589. if (taicpu(p).oper[0]^.typ = top_const) then
  2590. begin
  2591. case taicpu(p).oper[0]^.val of
  2592. 0:
  2593. begin
  2594. { change "mov $0,%reg" into "xor %reg,%reg" }
  2595. taicpu(p).opcode := A_XOR;
  2596. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2597. end;
  2598. $1..$FFFFFFFF:
  2599. begin
  2600. { Code size reduction by J. Gareth "Kit" Moreton }
  2601. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2602. case taicpu(p).opsize of
  2603. S_Q:
  2604. begin
  2605. DebugMsg('Peephole Optimization: movq x,%reg -> movd x,%reg (x is a 32-bit constant)', p);
  2606. TRegisterRec(taicpu(p).oper[1]^.reg).subreg := R_SUBD;
  2607. taicpu(p).opsize := S_L;
  2608. end;
  2609. end;
  2610. end;
  2611. end;
  2612. end;
  2613. end;
  2614. end;
  2615. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  2616. begin
  2617. Result:=false;
  2618. { change "cmp $0, %reg" to "test %reg, %reg" }
  2619. if MatchOpType(taicpu(p),top_const,top_reg) and
  2620. (taicpu(p).oper[0]^.val = 0) then
  2621. begin
  2622. taicpu(p).opcode := A_TEST;
  2623. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2624. Result:=true;
  2625. end;
  2626. end;
  2627. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  2628. var
  2629. IsTestConstX : Boolean;
  2630. hp1,hp2 : tai;
  2631. begin
  2632. Result:=false;
  2633. { removes the line marked with (x) from the sequence
  2634. and/or/xor/add/sub/... $x, %y
  2635. test/or %y, %y | test $-1, %y (x)
  2636. j(n)z _Label
  2637. as the first instruction already adjusts the ZF
  2638. %y operand may also be a reference }
  2639. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2640. MatchOperand(taicpu(p).oper[0]^,-1);
  2641. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2642. GetLastInstruction(p, hp1) and
  2643. (tai(hp1).typ = ait_instruction) and
  2644. GetNextInstruction(p,hp2) and
  2645. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2646. case taicpu(hp1).opcode Of
  2647. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2648. begin
  2649. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2650. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2651. { and in case of carry for A(E)/B(E)/C/NC }
  2652. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2653. ((taicpu(hp1).opcode <> A_ADD) and
  2654. (taicpu(hp1).opcode <> A_SUB))) then
  2655. begin
  2656. hp1 := tai(p.next);
  2657. asml.remove(p);
  2658. p.free;
  2659. p := tai(hp1);
  2660. Result:=true;
  2661. end;
  2662. end;
  2663. A_SHL, A_SAL, A_SHR, A_SAR:
  2664. begin
  2665. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2666. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2667. { therefore, it's only safe to do this optimization for }
  2668. { shifts by a (nonzero) constant }
  2669. (taicpu(hp1).oper[0]^.typ = top_const) and
  2670. (taicpu(hp1).oper[0]^.val <> 0) and
  2671. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2672. { and in case of carry for A(E)/B(E)/C/NC }
  2673. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2674. begin
  2675. hp1 := tai(p.next);
  2676. asml.remove(p);
  2677. p.free;
  2678. p := tai(hp1);
  2679. Result:=true;
  2680. end;
  2681. end;
  2682. A_DEC, A_INC, A_NEG:
  2683. begin
  2684. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2685. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2686. { and in case of carry for A(E)/B(E)/C/NC }
  2687. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2688. begin
  2689. case taicpu(hp1).opcode Of
  2690. A_DEC, A_INC:
  2691. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  2692. begin
  2693. case taicpu(hp1).opcode Of
  2694. A_DEC: taicpu(hp1).opcode := A_SUB;
  2695. A_INC: taicpu(hp1).opcode := A_ADD;
  2696. end;
  2697. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2698. taicpu(hp1).loadConst(0,1);
  2699. taicpu(hp1).ops:=2;
  2700. end
  2701. end;
  2702. hp1 := tai(p.next);
  2703. asml.remove(p);
  2704. p.free;
  2705. p := tai(hp1);
  2706. Result:=true;
  2707. end;
  2708. end
  2709. else
  2710. { change "test $-1,%reg" into "test %reg,%reg" }
  2711. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2712. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2713. end { case }
  2714. { change "test $-1,%reg" into "test %reg,%reg" }
  2715. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2716. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2717. end;
  2718. procedure TX86AsmOptimizer.OptReferences;
  2719. var
  2720. p: tai;
  2721. i: Integer;
  2722. begin
  2723. p := BlockStart;
  2724. while (p <> BlockEnd) Do
  2725. begin
  2726. if p.typ=ait_instruction then
  2727. begin
  2728. for i:=0 to taicpu(p).ops-1 do
  2729. if taicpu(p).oper[i]^.typ=top_ref then
  2730. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  2731. end;
  2732. p:=tai(p.next);
  2733. end;
  2734. end;
  2735. end.