aoptx86.pas 116 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass2MOV(var p : tai) : boolean;
  56. function OptPass2Imul(var p : tai) : boolean;
  57. function OptPass2Jmp(var p : tai) : boolean;
  58. function OptPass2Jcc(var p : tai) : boolean;
  59. function PostPeepholeOptMov(const p : tai) : Boolean;
  60. function PostPeepholeOptCmp(var p : tai) : Boolean;
  61. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  62. procedure OptReferences;
  63. end;
  64. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  65. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  66. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  67. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  68. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  69. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  70. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  71. function RefsEqual(const r1, r2: treference): boolean;
  72. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  73. { returns true, if ref is a reference using only the registers passed as base and index
  74. and having an offset }
  75. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  76. implementation
  77. uses
  78. cutils,verbose,
  79. globals,
  80. cpuinfo,
  81. procinfo,
  82. aasmbase,
  83. aoptutils,
  84. symconst,symsym,
  85. cgx86,
  86. itcpugas;
  87. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  88. begin
  89. result :=
  90. (instr.typ = ait_instruction) and
  91. (taicpu(instr).opcode = op) and
  92. ((opsize = []) or (taicpu(instr).opsize in opsize));
  93. end;
  94. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  95. begin
  96. result :=
  97. (instr.typ = ait_instruction) and
  98. ((taicpu(instr).opcode = op1) or
  99. (taicpu(instr).opcode = op2)
  100. ) and
  101. ((opsize = []) or (taicpu(instr).opsize in opsize));
  102. end;
  103. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. ((taicpu(instr).opcode = op1) or
  108. (taicpu(instr).opcode = op2) or
  109. (taicpu(instr).opcode = op3)
  110. ) and
  111. ((opsize = []) or (taicpu(instr).opsize in opsize));
  112. end;
  113. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  114. const opsize : topsizes) : boolean;
  115. var
  116. op : TAsmOp;
  117. begin
  118. result:=false;
  119. for op in ops do
  120. begin
  121. if (instr.typ = ait_instruction) and
  122. (taicpu(instr).opcode = op) and
  123. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  124. begin
  125. result:=true;
  126. exit;
  127. end;
  128. end;
  129. end;
  130. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  131. begin
  132. result := (oper.typ = top_reg) and (oper.reg = reg);
  133. end;
  134. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  135. begin
  136. result := (oper.typ = top_const) and (oper.val = a);
  137. end;
  138. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  139. begin
  140. result := oper1.typ = oper2.typ;
  141. if result then
  142. case oper1.typ of
  143. top_const:
  144. Result:=oper1.val = oper2.val;
  145. top_reg:
  146. Result:=oper1.reg = oper2.reg;
  147. top_ref:
  148. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  149. else
  150. internalerror(2013102801);
  151. end
  152. end;
  153. function RefsEqual(const r1, r2: treference): boolean;
  154. begin
  155. RefsEqual :=
  156. (r1.offset = r2.offset) and
  157. (r1.segment = r2.segment) and (r1.base = r2.base) and
  158. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  159. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  160. (r1.relsymbol = r2.relsymbol);
  161. end;
  162. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  163. begin
  164. Result:=(ref.offset=0) and
  165. (ref.scalefactor in [0,1]) and
  166. (ref.segment=NR_NO) and
  167. (ref.symbol=nil) and
  168. (ref.relsymbol=nil) and
  169. ((base=NR_INVALID) or
  170. (ref.base=base)) and
  171. ((index=NR_INVALID) or
  172. (ref.index=index));
  173. end;
  174. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  175. begin
  176. Result:=(ref.scalefactor in [0,1]) and
  177. (ref.segment=NR_NO) and
  178. (ref.symbol=nil) and
  179. (ref.relsymbol=nil) and
  180. ((base=NR_INVALID) or
  181. (ref.base=base)) and
  182. ((index=NR_INVALID) or
  183. (ref.index=index));
  184. end;
  185. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  186. begin
  187. Result:=RegReadByInstruction(reg,hp);
  188. end;
  189. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  190. var
  191. p: taicpu;
  192. opcount: longint;
  193. begin
  194. RegReadByInstruction := false;
  195. if hp.typ <> ait_instruction then
  196. exit;
  197. p := taicpu(hp);
  198. case p.opcode of
  199. A_CALL:
  200. regreadbyinstruction := true;
  201. A_IMUL:
  202. case p.ops of
  203. 1:
  204. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  205. (
  206. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  207. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  208. );
  209. 2,3:
  210. regReadByInstruction :=
  211. reginop(reg,p.oper[0]^) or
  212. reginop(reg,p.oper[1]^);
  213. end;
  214. A_MUL:
  215. begin
  216. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  217. (
  218. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  219. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  220. );
  221. end;
  222. A_IDIV,A_DIV:
  223. begin
  224. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  225. (
  226. (getregtype(reg)=R_INTREGISTER) and
  227. (
  228. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  229. )
  230. );
  231. end;
  232. else
  233. begin
  234. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  235. begin
  236. RegReadByInstruction := false;
  237. exit;
  238. end;
  239. for opcount := 0 to p.ops-1 do
  240. if (p.oper[opCount]^.typ = top_ref) and
  241. RegInRef(reg,p.oper[opcount]^.ref^) then
  242. begin
  243. RegReadByInstruction := true;
  244. exit
  245. end;
  246. { special handling for SSE MOVSD }
  247. if (p.opcode=A_MOVSD) and (p.ops>0) then
  248. begin
  249. if p.ops<>2 then
  250. internalerror(2017042702);
  251. regReadByInstruction := reginop(reg,p.oper[0]^) or
  252. (
  253. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  254. );
  255. exit;
  256. end;
  257. with insprop[p.opcode] do
  258. begin
  259. if getregtype(reg)=R_INTREGISTER then
  260. begin
  261. case getsupreg(reg) of
  262. RS_EAX:
  263. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  264. begin
  265. RegReadByInstruction := true;
  266. exit
  267. end;
  268. RS_ECX:
  269. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  270. begin
  271. RegReadByInstruction := true;
  272. exit
  273. end;
  274. RS_EDX:
  275. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  276. begin
  277. RegReadByInstruction := true;
  278. exit
  279. end;
  280. RS_EBX:
  281. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  282. begin
  283. RegReadByInstruction := true;
  284. exit
  285. end;
  286. RS_ESP:
  287. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  288. begin
  289. RegReadByInstruction := true;
  290. exit
  291. end;
  292. RS_EBP:
  293. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  294. begin
  295. RegReadByInstruction := true;
  296. exit
  297. end;
  298. RS_ESI:
  299. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  300. begin
  301. RegReadByInstruction := true;
  302. exit
  303. end;
  304. RS_EDI:
  305. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  306. begin
  307. RegReadByInstruction := true;
  308. exit
  309. end;
  310. end;
  311. end;
  312. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  313. begin
  314. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  315. begin
  316. case p.condition of
  317. C_A,C_NBE, { CF=0 and ZF=0 }
  318. C_BE,C_NA: { CF=1 or ZF=1 }
  319. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  320. C_AE,C_NB,C_NC, { CF=0 }
  321. C_B,C_NAE,C_C: { CF=1 }
  322. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  323. C_NE,C_NZ, { ZF=0 }
  324. C_E,C_Z: { ZF=1 }
  325. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  326. C_G,C_NLE, { ZF=0 and SF=OF }
  327. C_LE,C_NG: { ZF=1 or SF<>OF }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  329. C_GE,C_NL, { SF=OF }
  330. C_L,C_NGE: { SF<>OF }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  332. C_NO, { OF=0 }
  333. C_O: { OF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  335. C_NP,C_PO, { PF=0 }
  336. C_P,C_PE: { PF=1 }
  337. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  338. C_NS, { SF=0 }
  339. C_S: { SF=1 }
  340. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  341. else
  342. internalerror(2017042701);
  343. end;
  344. if RegReadByInstruction then
  345. exit;
  346. end;
  347. case getsubreg(reg) of
  348. R_SUBW,R_SUBD,R_SUBQ:
  349. RegReadByInstruction :=
  350. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  351. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  352. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  353. R_SUBFLAGCARRY:
  354. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  355. R_SUBFLAGPARITY:
  356. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGAUXILIARY:
  358. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGZERO:
  360. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. R_SUBFLAGSIGN:
  362. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  363. R_SUBFLAGOVERFLOW:
  364. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  365. R_SUBFLAGINTERRUPT:
  366. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  367. R_SUBFLAGDIRECTION:
  368. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  369. else
  370. internalerror(2017042601);
  371. end;
  372. exit;
  373. end;
  374. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  375. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  376. (p.oper[0]^.reg=p.oper[1]^.reg) then
  377. exit;
  378. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  379. begin
  380. RegReadByInstruction := true;
  381. exit
  382. end;
  383. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  384. begin
  385. RegReadByInstruction := true;
  386. exit
  387. end;
  388. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  389. begin
  390. RegReadByInstruction := true;
  391. exit
  392. end;
  393. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  394. begin
  395. RegReadByInstruction := true;
  396. exit
  397. end;
  398. end;
  399. end;
  400. end;
  401. end;
  402. {$ifdef DEBUG_AOPTCPU}
  403. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  404. begin
  405. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  406. end;
  407. {$else DEBUG_AOPTCPU}
  408. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  409. begin
  410. end;
  411. {$endif DEBUG_AOPTCPU}
  412. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  413. begin
  414. if not SuperRegistersEqual(reg1,reg2) then
  415. exit(false);
  416. if getregtype(reg1)<>R_INTREGISTER then
  417. exit(true); {because SuperRegisterEqual is true}
  418. case getsubreg(reg1) of
  419. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  420. higher, it preserves the high bits, so the new value depends on
  421. reg2's previous value. In other words, it is equivalent to doing:
  422. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  423. R_SUBL:
  424. exit(getsubreg(reg2)=R_SUBL);
  425. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  426. higher, it actually does a:
  427. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  428. R_SUBH:
  429. exit(getsubreg(reg2)=R_SUBH);
  430. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  431. bits of reg2:
  432. reg2 := (reg2 and $ffff0000) or word(reg1); }
  433. R_SUBW:
  434. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  435. { a write to R_SUBD always overwrites every other subregister,
  436. because it clears the high 32 bits of R_SUBQ on x86_64 }
  437. R_SUBD,
  438. R_SUBQ:
  439. exit(true);
  440. else
  441. internalerror(2017042801);
  442. end;
  443. end;
  444. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  445. begin
  446. if not SuperRegistersEqual(reg1,reg2) then
  447. exit(false);
  448. if getregtype(reg1)<>R_INTREGISTER then
  449. exit(true); {because SuperRegisterEqual is true}
  450. case getsubreg(reg1) of
  451. R_SUBL:
  452. exit(getsubreg(reg2)<>R_SUBH);
  453. R_SUBH:
  454. exit(getsubreg(reg2)<>R_SUBL);
  455. R_SUBW,
  456. R_SUBD,
  457. R_SUBQ:
  458. exit(true);
  459. else
  460. internalerror(2017042802);
  461. end;
  462. end;
  463. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  464. var
  465. hp1 : tai;
  466. l : TCGInt;
  467. begin
  468. result:=false;
  469. { changes the code sequence
  470. shr/sar const1, x
  471. shl const2, x
  472. to
  473. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  474. if GetNextInstruction(p, hp1) and
  475. MatchInstruction(hp1,A_SHL,[]) and
  476. (taicpu(p).oper[0]^.typ = top_const) and
  477. (taicpu(hp1).oper[0]^.typ = top_const) and
  478. (taicpu(hp1).opsize = taicpu(p).opsize) and
  479. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  480. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  481. begin
  482. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  483. not(cs_opt_size in current_settings.optimizerswitches) then
  484. begin
  485. { shr/sar const1, %reg
  486. shl const2, %reg
  487. with const1 > const2 }
  488. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  489. taicpu(hp1).opcode := A_AND;
  490. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  491. case taicpu(p).opsize Of
  492. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  493. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  494. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  495. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  496. else
  497. Internalerror(2017050703)
  498. end;
  499. end
  500. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  501. not(cs_opt_size in current_settings.optimizerswitches) then
  502. begin
  503. { shr/sar const1, %reg
  504. shl const2, %reg
  505. with const1 < const2 }
  506. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  507. taicpu(p).opcode := A_AND;
  508. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  509. case taicpu(p).opsize Of
  510. S_B: taicpu(p).loadConst(0,l Xor $ff);
  511. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  512. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  513. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  514. else
  515. Internalerror(2017050702)
  516. end;
  517. end
  518. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  519. begin
  520. { shr/sar const1, %reg
  521. shl const2, %reg
  522. with const1 = const2 }
  523. taicpu(p).opcode := A_AND;
  524. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  525. case taicpu(p).opsize Of
  526. S_B: taicpu(p).loadConst(0,l Xor $ff);
  527. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  528. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  529. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  530. else
  531. Internalerror(2017050701)
  532. end;
  533. asml.remove(hp1);
  534. hp1.free;
  535. end;
  536. end;
  537. end;
  538. { allocates register reg between (and including) instructions p1 and p2
  539. the type of p1 and p2 must not be in SkipInstr
  540. note that this routine is both called from the peephole optimizer
  541. where optinfo is not yet initialised) and from the cse (where it is) }
  542. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  543. var
  544. hp, start: tai;
  545. removedsomething,
  546. firstRemovedWasAlloc,
  547. lastRemovedWasDealloc: boolean;
  548. begin
  549. {$ifdef EXTDEBUG}
  550. { if assigned(p1.optinfo) and
  551. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  552. internalerror(2004101010); }
  553. {$endif EXTDEBUG}
  554. start := p1;
  555. if (reg = NR_ESP) or
  556. (reg = current_procinfo.framepointer) or
  557. not(assigned(p1)) then
  558. { this happens with registers which are loaded implicitely, outside the }
  559. { current block (e.g. esi with self) }
  560. exit;
  561. { make sure we allocate it for this instruction }
  562. getnextinstruction(p2,p2);
  563. lastRemovedWasDealloc := false;
  564. removedSomething := false;
  565. firstRemovedWasAlloc := false;
  566. {$ifdef allocregdebug}
  567. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  568. ' from here...'));
  569. insertllitem(asml,p1.previous,p1,hp);
  570. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  571. ' till here...'));
  572. insertllitem(asml,p2,p2.next,hp);
  573. {$endif allocregdebug}
  574. { do it the safe way: always allocate the full super register,
  575. as we do no register re-allocation in the peephole optimizer,
  576. this does not hurt
  577. }
  578. case getregtype(reg) of
  579. R_MMREGISTER:
  580. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  581. R_INTREGISTER:
  582. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  583. end;
  584. if not(RegInUsedRegs(reg,initialusedregs)) then
  585. begin
  586. hp := tai_regalloc.alloc(reg,nil);
  587. insertllItem(p1.previous,p1,hp);
  588. IncludeRegInUsedRegs(reg,initialusedregs);
  589. end;
  590. while assigned(p1) and
  591. (p1 <> p2) do
  592. begin
  593. if assigned(p1.optinfo) then
  594. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  595. p1 := tai(p1.next);
  596. repeat
  597. while assigned(p1) and
  598. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  599. p1 := tai(p1.next);
  600. { remove all allocation/deallocation info about the register in between }
  601. if assigned(p1) and
  602. (p1.typ = ait_regalloc) then
  603. begin
  604. { same super register, different sub register? }
  605. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  606. begin
  607. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  608. internalerror(2016101501);
  609. tai_regalloc(p1).reg:=reg;
  610. end;
  611. if tai_regalloc(p1).reg=reg then
  612. begin
  613. if not removedSomething then
  614. begin
  615. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  616. removedSomething := true;
  617. end;
  618. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  619. hp := tai(p1.Next);
  620. asml.Remove(p1);
  621. p1.free;
  622. p1 := hp;
  623. end
  624. else
  625. p1 := tai(p1.next);
  626. end;
  627. until not(assigned(p1)) or
  628. not(p1.typ in SkipInstr);
  629. end;
  630. if assigned(p1) then
  631. begin
  632. if firstRemovedWasAlloc then
  633. begin
  634. hp := tai_regalloc.Alloc(reg,nil);
  635. insertLLItem(start.previous,start,hp);
  636. end;
  637. if lastRemovedWasDealloc then
  638. begin
  639. hp := tai_regalloc.DeAlloc(reg,nil);
  640. insertLLItem(p1.previous,p1,hp);
  641. end;
  642. end;
  643. end;
  644. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  645. var
  646. p: taicpu;
  647. begin
  648. if not assigned(hp) or
  649. (hp.typ <> ait_instruction) then
  650. begin
  651. Result := false;
  652. exit;
  653. end;
  654. p := taicpu(hp);
  655. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  656. with insprop[p.opcode] do
  657. begin
  658. case getsubreg(reg) of
  659. R_SUBW,R_SUBD,R_SUBQ:
  660. Result:=
  661. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  662. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  663. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  664. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  665. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  666. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  667. R_SUBFLAGCARRY:
  668. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  669. R_SUBFLAGPARITY:
  670. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  671. R_SUBFLAGAUXILIARY:
  672. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  673. R_SUBFLAGZERO:
  674. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  675. R_SUBFLAGSIGN:
  676. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  677. R_SUBFLAGOVERFLOW:
  678. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  679. R_SUBFLAGINTERRUPT:
  680. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  681. R_SUBFLAGDIRECTION:
  682. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  683. else
  684. internalerror(2017050501);
  685. end;
  686. exit;
  687. end;
  688. Result :=
  689. (((p.opcode = A_MOV) or
  690. (p.opcode = A_MOVZX) or
  691. (p.opcode = A_MOVSX) or
  692. (p.opcode = A_LEA) or
  693. (p.opcode = A_VMOVSS) or
  694. (p.opcode = A_VMOVSD) or
  695. (p.opcode = A_VMOVAPD) or
  696. (p.opcode = A_VMOVAPS) or
  697. (p.opcode = A_VMOVQ) or
  698. (p.opcode = A_MOVSS) or
  699. (p.opcode = A_MOVSD) or
  700. (p.opcode = A_MOVQ) or
  701. (p.opcode = A_MOVAPD) or
  702. (p.opcode = A_MOVAPS) or
  703. {$ifndef x86_64}
  704. (p.opcode = A_LDS) or
  705. (p.opcode = A_LES) or
  706. {$endif not x86_64}
  707. (p.opcode = A_LFS) or
  708. (p.opcode = A_LGS) or
  709. (p.opcode = A_LSS)) and
  710. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  711. (p.oper[1]^.typ = top_reg) and
  712. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  713. ((p.oper[0]^.typ = top_const) or
  714. ((p.oper[0]^.typ = top_reg) and
  715. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  716. ((p.oper[0]^.typ = top_ref) and
  717. not RegInRef(reg,p.oper[0]^.ref^)))) or
  718. ((p.opcode = A_POP) and
  719. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  720. ((p.opcode = A_IMUL) and
  721. (p.ops=3) and
  722. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  723. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  724. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  725. ((((p.opcode = A_IMUL) or
  726. (p.opcode = A_MUL)) and
  727. (p.ops=1)) and
  728. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  729. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  730. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  731. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  732. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  733. {$ifdef x86_64}
  734. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  735. {$endif x86_64}
  736. )) or
  737. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  738. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  739. {$ifdef x86_64}
  740. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  741. {$endif x86_64}
  742. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  743. {$ifndef x86_64}
  744. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  745. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  746. {$endif not x86_64}
  747. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  748. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  749. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  750. {$ifndef x86_64}
  751. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  752. {$endif not x86_64}
  753. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  754. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  755. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  756. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  757. {$ifdef x86_64}
  758. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  759. {$endif x86_64}
  760. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  761. (((p.opcode = A_FSTSW) or
  762. (p.opcode = A_FNSTSW)) and
  763. (p.oper[0]^.typ=top_reg) and
  764. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  765. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  766. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  767. (p.oper[0]^.reg=p.oper[1]^.reg) and
  768. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  769. end;
  770. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  771. var
  772. hp2,hp3 : tai;
  773. begin
  774. { some x86-64 issue a NOP before the real exit code }
  775. if MatchInstruction(p,A_NOP,[]) then
  776. GetNextInstruction(p,p);
  777. result:=assigned(p) and (p.typ=ait_instruction) and
  778. ((taicpu(p).opcode = A_RET) or
  779. ((taicpu(p).opcode=A_LEAVE) and
  780. GetNextInstruction(p,hp2) and
  781. MatchInstruction(hp2,A_RET,[S_NO])
  782. ) or
  783. ((((taicpu(p).opcode=A_MOV) and
  784. MatchOpType(taicpu(p),top_reg,top_reg) and
  785. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  786. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  787. ((taicpu(p).opcode=A_LEA) and
  788. MatchOpType(taicpu(p),top_ref,top_reg) and
  789. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  790. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  791. )
  792. ) and
  793. GetNextInstruction(p,hp2) and
  794. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  795. MatchOpType(taicpu(hp2),top_reg) and
  796. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  797. GetNextInstruction(hp2,hp3) and
  798. MatchInstruction(hp3,A_RET,[S_NO])
  799. )
  800. );
  801. end;
  802. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  803. begin
  804. isFoldableArithOp := False;
  805. case hp1.opcode of
  806. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  807. isFoldableArithOp :=
  808. ((taicpu(hp1).oper[0]^.typ = top_const) or
  809. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  810. (taicpu(hp1).oper[0]^.reg <> reg))) and
  811. (taicpu(hp1).oper[1]^.typ = top_reg) and
  812. (taicpu(hp1).oper[1]^.reg = reg);
  813. A_INC,A_DEC,A_NEG,A_NOT:
  814. isFoldableArithOp :=
  815. (taicpu(hp1).oper[0]^.typ = top_reg) and
  816. (taicpu(hp1).oper[0]^.reg = reg);
  817. end;
  818. end;
  819. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  820. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  821. var
  822. hp2: tai;
  823. begin
  824. hp2 := p;
  825. repeat
  826. hp2 := tai(hp2.previous);
  827. if assigned(hp2) and
  828. (hp2.typ = ait_regalloc) and
  829. (tai_regalloc(hp2).ratype=ra_dealloc) and
  830. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  831. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  832. begin
  833. asml.remove(hp2);
  834. hp2.free;
  835. break;
  836. end;
  837. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  838. end;
  839. begin
  840. case current_procinfo.procdef.returndef.typ of
  841. arraydef,recorddef,pointerdef,
  842. stringdef,enumdef,procdef,objectdef,errordef,
  843. filedef,setdef,procvardef,
  844. classrefdef,forwarddef:
  845. DoRemoveLastDeallocForFuncRes(RS_EAX);
  846. orddef:
  847. if current_procinfo.procdef.returndef.size <> 0 then
  848. begin
  849. DoRemoveLastDeallocForFuncRes(RS_EAX);
  850. { for int64/qword }
  851. if current_procinfo.procdef.returndef.size = 8 then
  852. DoRemoveLastDeallocForFuncRes(RS_EDX);
  853. end;
  854. end;
  855. end;
  856. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  857. var
  858. TmpUsedRegs : TAllUsedRegs;
  859. hp1,hp2 : tai;
  860. alloc ,dealloc: tai_regalloc;
  861. begin
  862. result:=false;
  863. if MatchOpType(taicpu(p),top_reg,top_reg) and
  864. GetNextInstruction(p, hp1) and
  865. (hp1.typ = ait_instruction) and
  866. GetNextInstruction(hp1, hp2) and
  867. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  868. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  869. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  870. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  871. (((taicpu(p).opcode=A_MOVAPS) and
  872. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  873. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  874. ((taicpu(p).opcode=A_MOVAPD) and
  875. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  876. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  877. ) then
  878. { change
  879. movapX reg,reg2
  880. addsX/subsX/... reg3, reg2
  881. movapX reg2,reg
  882. to
  883. addsX/subsX/... reg3,reg
  884. }
  885. begin
  886. CopyUsedRegs(TmpUsedRegs);
  887. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  888. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  889. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  890. begin
  891. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  892. std_op2str[taicpu(p).opcode]+' '+
  893. std_op2str[taicpu(hp1).opcode]+' '+
  894. std_op2str[taicpu(hp2).opcode]+') done',p);
  895. { we cannot eliminate the first move if
  896. the operations uses the same register for source and dest }
  897. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  898. begin
  899. asml.remove(p);
  900. p.Free;
  901. end;
  902. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  903. asml.remove(hp2);
  904. hp2.Free;
  905. p:=hp1;
  906. result:=true;
  907. end;
  908. ReleaseUsedRegs(TmpUsedRegs);
  909. end
  910. end;
  911. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  912. var
  913. TmpUsedRegs : TAllUsedRegs;
  914. hp1,hp2 : tai;
  915. begin
  916. result:=false;
  917. if MatchOpType(taicpu(p),top_reg,top_reg) then
  918. begin
  919. { vmova* reg1,reg1
  920. =>
  921. <nop> }
  922. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  923. begin
  924. GetNextInstruction(p,hp1);
  925. asml.Remove(p);
  926. p.Free;
  927. p:=hp1;
  928. result:=true;
  929. end
  930. else if GetNextInstruction(p,hp1) then
  931. begin
  932. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  933. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  934. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  935. begin
  936. { vmova* reg1,reg2
  937. vmova* reg2,reg3
  938. dealloc reg2
  939. =>
  940. vmova* reg1,reg3 }
  941. CopyUsedRegs(TmpUsedRegs);
  942. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  943. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  944. begin
  945. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  946. asml.Remove(hp1);
  947. hp1.Free;
  948. result:=true;
  949. end
  950. { special case:
  951. vmova* reg1,reg2
  952. vmova* reg2,reg1
  953. =>
  954. vmova* reg1,reg2 }
  955. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  956. begin
  957. asml.Remove(hp1);
  958. hp1.Free;
  959. result:=true;
  960. end
  961. end
  962. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  963. { we mix single and double opperations here because we assume that the compiler
  964. generates vmovapd only after double operations and vmovaps only after single operations }
  965. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  966. GetNextInstruction(hp1,hp2) and
  967. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  968. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  969. begin
  970. CopyUsedRegs(TmpUsedRegs);
  971. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  972. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  973. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  974. then
  975. begin
  976. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  977. asml.Remove(p);
  978. p.Free;
  979. asml.Remove(hp2);
  980. hp2.Free;
  981. p:=hp1;
  982. end;
  983. end;
  984. end;
  985. end;
  986. end;
  987. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  988. var
  989. TmpUsedRegs : TAllUsedRegs;
  990. hp1 : tai;
  991. begin
  992. result:=false;
  993. { replace
  994. V<Op>X %mreg1,%mreg2,%mreg3
  995. VMovX %mreg3,%mreg4
  996. dealloc %mreg3
  997. by
  998. V<Op>X %mreg1,%mreg2,%mreg4
  999. ?
  1000. }
  1001. if GetNextInstruction(p,hp1) and
  1002. { we mix single and double operations here because we assume that the compiler
  1003. generates vmovapd only after double operations and vmovaps only after single operations }
  1004. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1005. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1006. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1007. begin
  1008. CopyUsedRegs(TmpUsedRegs);
  1009. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1010. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1011. ) then
  1012. begin
  1013. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1014. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1015. asml.Remove(hp1);
  1016. hp1.Free;
  1017. result:=true;
  1018. end;
  1019. end;
  1020. end;
  1021. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1022. var
  1023. hp1, hp2: tai;
  1024. TmpUsedRegs : TAllUsedRegs;
  1025. GetNextInstruction_p : Boolean;
  1026. begin
  1027. Result:=false;
  1028. { remove mov reg1,reg1? }
  1029. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1030. begin
  1031. GetNextInstruction(p, hp1);
  1032. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1033. asml.remove(p);
  1034. p.free;
  1035. p:=hp1;
  1036. Result:=true;
  1037. exit;
  1038. end;
  1039. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1040. if GetNextInstruction_p and
  1041. MatchInstruction(hp1,A_AND,[]) and
  1042. (taicpu(p).oper[1]^.typ = top_reg) and
  1043. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1044. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1045. case taicpu(p).opsize Of
  1046. S_L:
  1047. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1048. begin
  1049. { Optimize out:
  1050. mov x, %reg
  1051. and ffffffffh, %reg
  1052. }
  1053. DebugMsg('PeepHole Optimization MovAnd2Mov 1 done',p);
  1054. asml.remove(hp1);
  1055. hp1.free;
  1056. Result:=true;
  1057. exit;
  1058. end;
  1059. S_Q: { TODO: Confirm if this is even possible }
  1060. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1061. begin
  1062. { Optimize out:
  1063. mov x, %reg
  1064. and ffffffffffffffffh, %reg
  1065. }
  1066. DebugMsg('PeepHole Optimization MovAnd2Mov 2 done',p);
  1067. asml.remove(hp1);
  1068. hp1.free;
  1069. Result:=true;
  1070. exit;
  1071. end;
  1072. end
  1073. else if GetNextInstruction_p and
  1074. MatchInstruction(hp1,A_MOV,[]) and
  1075. (taicpu(p).oper[1]^.typ = top_reg) and
  1076. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1077. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1078. begin
  1079. CopyUsedRegs(TmpUsedRegs);
  1080. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1081. { we have
  1082. mov x, %treg
  1083. mov %treg, y
  1084. }
  1085. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1086. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1087. { we've got
  1088. mov x, %treg
  1089. mov %treg, y
  1090. with %treg is not used after }
  1091. case taicpu(p).oper[0]^.typ Of
  1092. top_reg:
  1093. begin
  1094. { change
  1095. mov %reg, %treg
  1096. mov %treg, y
  1097. to
  1098. mov %reg, y
  1099. }
  1100. if taicpu(hp1).oper[1]^.typ=top_reg then
  1101. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1102. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1103. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1104. asml.remove(hp1);
  1105. hp1.free;
  1106. ReleaseUsedRegs(TmpUsedRegs);
  1107. Result:=true;
  1108. Exit;
  1109. end;
  1110. top_ref:
  1111. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1112. begin
  1113. { change
  1114. mov mem, %treg
  1115. mov %treg, %reg
  1116. to
  1117. mov mem, %reg"
  1118. }
  1119. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1120. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1121. asml.remove(hp1);
  1122. hp1.free;
  1123. ReleaseUsedRegs(TmpUsedRegs);
  1124. Result:=true;
  1125. Exit;
  1126. end;
  1127. end;
  1128. ReleaseUsedRegs(TmpUsedRegs);
  1129. end
  1130. else
  1131. { Change
  1132. mov %reg1, %reg2
  1133. xxx %reg2, ???
  1134. to
  1135. mov %reg1, %reg2
  1136. xxx %reg1, ???
  1137. to avoid a write/read penalty
  1138. }
  1139. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1140. GetNextInstruction(p,hp1) and
  1141. (tai(hp1).typ = ait_instruction) and
  1142. (taicpu(hp1).ops >= 1) and
  1143. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1144. { we have
  1145. mov %reg1, %reg2
  1146. XXX %reg2, ???
  1147. }
  1148. begin
  1149. if ((taicpu(hp1).opcode = A_OR) or
  1150. (taicpu(hp1).opcode = A_AND) or
  1151. (taicpu(hp1).opcode = A_TEST)) and
  1152. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1153. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1154. { we have
  1155. mov %reg1, %reg2
  1156. test/or/and %reg2, %reg2
  1157. }
  1158. begin
  1159. CopyUsedRegs(TmpUsedRegs);
  1160. { reg1 will be used after the first instruction,
  1161. so update the allocation info }
  1162. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1163. if GetNextInstruction(hp1, hp2) and
  1164. (hp2.typ = ait_instruction) and
  1165. taicpu(hp2).is_jmp and
  1166. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1167. { change
  1168. mov %reg1, %reg2
  1169. test/or/and %reg2, %reg2
  1170. jxx
  1171. to
  1172. test %reg1, %reg1
  1173. jxx
  1174. }
  1175. begin
  1176. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1177. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1178. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1179. asml.remove(p);
  1180. p.free;
  1181. p := hp1;
  1182. ReleaseUsedRegs(TmpUsedRegs);
  1183. Exit;
  1184. end
  1185. else
  1186. { change
  1187. mov %reg1, %reg2
  1188. test/or/and %reg2, %reg2
  1189. to
  1190. mov %reg1, %reg2
  1191. test/or/and %reg1, %reg1
  1192. }
  1193. begin
  1194. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1195. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1196. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1197. end;
  1198. ReleaseUsedRegs(TmpUsedRegs);
  1199. end
  1200. end
  1201. else
  1202. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1203. x >= RetOffset) as it doesn't do anything (it writes either to a
  1204. parameter or to the temporary storage room for the function
  1205. result)
  1206. }
  1207. if GetNextInstruction_p and
  1208. (tai(hp1).typ = ait_instruction) then
  1209. begin
  1210. if IsExitCode(hp1) and
  1211. MatchOpType(taicpu(p),top_reg,top_ref) and
  1212. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1213. not(assigned(current_procinfo.procdef.funcretsym) and
  1214. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1215. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1216. begin
  1217. asml.remove(p);
  1218. p.free;
  1219. p:=hp1;
  1220. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1221. RemoveLastDeallocForFuncRes(p);
  1222. exit;
  1223. end
  1224. { change
  1225. mov reg1, mem1
  1226. test/cmp x, mem1
  1227. to
  1228. mov reg1, mem1
  1229. test/cmp x, reg1
  1230. }
  1231. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1232. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1233. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1234. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1235. begin
  1236. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1237. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1238. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1239. end;
  1240. end;
  1241. { Next instruction is also a MOV ? }
  1242. if GetNextInstruction_p and
  1243. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1244. begin
  1245. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1246. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1247. { mov reg1, mem1 or mov mem1, reg1
  1248. mov mem2, reg2 mov reg2, mem2}
  1249. begin
  1250. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1251. { mov reg1, mem1 or mov mem1, reg1
  1252. mov mem2, reg1 mov reg2, mem1}
  1253. begin
  1254. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1255. { Removes the second statement from
  1256. mov reg1, mem1/reg2
  1257. mov mem1/reg2, reg1 }
  1258. begin
  1259. if taicpu(p).oper[0]^.typ=top_reg then
  1260. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1261. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1262. asml.remove(hp1);
  1263. hp1.free;
  1264. Result:=true;
  1265. exit;
  1266. end
  1267. else
  1268. begin
  1269. CopyUsedRegs(TmpUsedRegs);
  1270. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1271. if (taicpu(p).oper[1]^.typ = top_ref) and
  1272. { mov reg1, mem1
  1273. mov mem2, reg1 }
  1274. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1275. GetNextInstruction(hp1, hp2) and
  1276. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1277. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1278. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1279. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1280. { change to
  1281. mov reg1, mem1 mov reg1, mem1
  1282. mov mem2, reg1 cmp reg1, mem2
  1283. cmp mem1, reg1
  1284. }
  1285. begin
  1286. asml.remove(hp2);
  1287. hp2.free;
  1288. taicpu(hp1).opcode := A_CMP;
  1289. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1290. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1291. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1292. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1293. end;
  1294. ReleaseUsedRegs(TmpUsedRegs);
  1295. end;
  1296. end
  1297. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1298. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1299. begin
  1300. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1301. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1302. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1303. end
  1304. else
  1305. begin
  1306. CopyUsedRegs(TmpUsedRegs);
  1307. if GetNextInstruction(hp1, hp2) and
  1308. MatchOpType(taicpu(p),top_ref,top_reg) and
  1309. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1310. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1311. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1312. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1313. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1314. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1315. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1316. { mov mem1, %reg1
  1317. mov %reg1, mem2
  1318. mov mem2, reg2
  1319. to:
  1320. mov mem1, reg2
  1321. mov reg2, mem2}
  1322. begin
  1323. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1324. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1325. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1326. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1327. asml.remove(hp2);
  1328. hp2.free;
  1329. end
  1330. {$ifdef i386}
  1331. { this is enabled for i386 only, as the rules to create the reg sets below
  1332. are too complicated for x86-64, so this makes this code too error prone
  1333. on x86-64
  1334. }
  1335. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1336. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1337. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1338. { mov mem1, reg1 mov mem1, reg1
  1339. mov reg1, mem2 mov reg1, mem2
  1340. mov mem2, reg2 mov mem2, reg1
  1341. to: to:
  1342. mov mem1, reg1 mov mem1, reg1
  1343. mov mem1, reg2 mov reg1, mem2
  1344. mov reg1, mem2
  1345. or (if mem1 depends on reg1
  1346. and/or if mem2 depends on reg2)
  1347. to:
  1348. mov mem1, reg1
  1349. mov reg1, mem2
  1350. mov reg1, reg2
  1351. }
  1352. begin
  1353. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1354. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1355. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1356. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1357. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1358. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1359. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1360. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1361. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1362. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1363. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1364. end
  1365. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1366. begin
  1367. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1368. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1369. end
  1370. else
  1371. begin
  1372. asml.remove(hp2);
  1373. hp2.free;
  1374. end
  1375. {$endif i386}
  1376. ;
  1377. ReleaseUsedRegs(TmpUsedRegs);
  1378. end;
  1379. end
  1380. (* { movl [mem1],reg1
  1381. movl [mem1],reg2
  1382. to
  1383. movl [mem1],reg1
  1384. movl reg1,reg2
  1385. }
  1386. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1387. (taicpu(p).oper[1]^.typ = top_reg) and
  1388. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1389. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1390. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1391. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1392. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1393. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1394. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1395. else*)
  1396. { movl const1,[mem1]
  1397. movl [mem1],reg1
  1398. to
  1399. movl const1,reg1
  1400. movl reg1,[mem1]
  1401. }
  1402. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1403. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1404. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1405. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1406. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1407. begin
  1408. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1409. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1410. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1411. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1412. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1413. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1414. end
  1415. end
  1416. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1417. GetNextInstruction_p and
  1418. (hp1.typ = ait_instruction) and
  1419. GetNextInstruction(hp1, hp2) and
  1420. MatchInstruction(hp2,A_MOV,[]) and
  1421. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1422. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1423. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1424. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1425. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1426. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1427. ) then
  1428. { change movsX/movzX reg/ref, reg2
  1429. add/sub/or/... reg3/$const, reg2
  1430. mov reg2 reg/ref
  1431. to add/sub/or/... reg3/$const, reg/ref }
  1432. begin
  1433. CopyUsedRegs(TmpUsedRegs);
  1434. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1435. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1436. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1437. begin
  1438. { by example:
  1439. movswl %si,%eax movswl %si,%eax p
  1440. decl %eax addl %edx,%eax hp1
  1441. movw %ax,%si movw %ax,%si hp2
  1442. ->
  1443. movswl %si,%eax movswl %si,%eax p
  1444. decw %eax addw %edx,%eax hp1
  1445. movw %ax,%si movw %ax,%si hp2
  1446. }
  1447. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1448. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1449. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1450. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1451. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1452. {
  1453. ->
  1454. movswl %si,%eax movswl %si,%eax p
  1455. decw %si addw %dx,%si hp1
  1456. movw %ax,%si movw %ax,%si hp2
  1457. }
  1458. case taicpu(hp1).ops of
  1459. 1:
  1460. begin
  1461. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1462. if taicpu(hp1).oper[0]^.typ=top_reg then
  1463. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1464. end;
  1465. 2:
  1466. begin
  1467. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1468. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1469. (taicpu(hp1).opcode<>A_SHL) and
  1470. (taicpu(hp1).opcode<>A_SHR) and
  1471. (taicpu(hp1).opcode<>A_SAR) then
  1472. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1473. end;
  1474. else
  1475. internalerror(2008042701);
  1476. end;
  1477. {
  1478. ->
  1479. decw %si addw %dx,%si p
  1480. }
  1481. asml.remove(p);
  1482. asml.remove(hp2);
  1483. p.Free;
  1484. hp2.Free;
  1485. p := hp1;
  1486. end;
  1487. ReleaseUsedRegs(TmpUsedRegs);
  1488. end
  1489. else if GetNextInstruction_p and
  1490. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1491. GetNextInstruction(hp1, hp2) and
  1492. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1493. MatchOperand(Taicpu(p).oper[0]^,0) and
  1494. (Taicpu(p).oper[1]^.typ = top_reg) and
  1495. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1496. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1497. { mov reg1,0
  1498. bts reg1,operand1 --> mov reg1,operand2
  1499. or reg1,operand2 bts reg1,operand1}
  1500. begin
  1501. Taicpu(hp2).opcode:=A_MOV;
  1502. asml.remove(hp1);
  1503. insertllitem(hp2,hp2.next,hp1);
  1504. asml.remove(p);
  1505. p.free;
  1506. p:=hp1;
  1507. end
  1508. else if GetNextInstruction_p and
  1509. MatchInstruction(hp1,A_LEA,[S_L]) and
  1510. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1511. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1512. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1513. ) or
  1514. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1515. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1516. )
  1517. ) then
  1518. { mov reg1,ref
  1519. lea reg2,[reg1,reg2]
  1520. to
  1521. add reg2,ref}
  1522. begin
  1523. CopyUsedRegs(TmpUsedRegs);
  1524. { reg1 may not be used afterwards }
  1525. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1526. begin
  1527. Taicpu(hp1).opcode:=A_ADD;
  1528. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1529. DebugMsg('Peephole MovLea2Add done',hp1);
  1530. asml.remove(p);
  1531. p.free;
  1532. p:=hp1;
  1533. end;
  1534. ReleaseUsedRegs(TmpUsedRegs);
  1535. end;
  1536. end;
  1537. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1538. var
  1539. hp1 : tai;
  1540. begin
  1541. Result:=false;
  1542. if taicpu(p).ops <> 2 then
  1543. exit;
  1544. if GetNextInstruction(p,hp1) and
  1545. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1546. (taicpu(hp1).ops = 2) then
  1547. begin
  1548. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1549. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1550. { movXX reg1, mem1 or movXX mem1, reg1
  1551. movXX mem2, reg2 movXX reg2, mem2}
  1552. begin
  1553. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1554. { movXX reg1, mem1 or movXX mem1, reg1
  1555. movXX mem2, reg1 movXX reg2, mem1}
  1556. begin
  1557. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1558. begin
  1559. { Removes the second statement from
  1560. movXX reg1, mem1/reg2
  1561. movXX mem1/reg2, reg1
  1562. }
  1563. if taicpu(p).oper[0]^.typ=top_reg then
  1564. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1565. { Removes the second statement from
  1566. movXX mem1/reg1, reg2
  1567. movXX reg2, mem1/reg1
  1568. }
  1569. if (taicpu(p).oper[1]^.typ=top_reg) and
  1570. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1571. begin
  1572. asml.remove(p);
  1573. p.free;
  1574. GetNextInstruction(hp1,p);
  1575. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1576. end
  1577. else
  1578. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1579. asml.remove(hp1);
  1580. hp1.free;
  1581. Result:=true;
  1582. exit;
  1583. end
  1584. end;
  1585. end;
  1586. end;
  1587. end;
  1588. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1589. var
  1590. TmpUsedRegs : TAllUsedRegs;
  1591. hp1 : tai;
  1592. begin
  1593. result:=false;
  1594. { replace
  1595. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1596. MovX %mreg2,%mreg1
  1597. dealloc %mreg2
  1598. by
  1599. <Op>X %mreg2,%mreg1
  1600. ?
  1601. }
  1602. if GetNextInstruction(p,hp1) and
  1603. { we mix single and double opperations here because we assume that the compiler
  1604. generates vmovapd only after double operations and vmovaps only after single operations }
  1605. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1606. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1607. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1608. (taicpu(p).oper[0]^.typ=top_reg) then
  1609. begin
  1610. CopyUsedRegs(TmpUsedRegs);
  1611. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1612. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1613. begin
  1614. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1615. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1616. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1617. asml.Remove(hp1);
  1618. hp1.Free;
  1619. result:=true;
  1620. end;
  1621. ReleaseUsedRegs(TmpUsedRegs);
  1622. end;
  1623. end;
  1624. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1625. var
  1626. hp1 : tai;
  1627. l : ASizeInt;
  1628. TmpUsedRegs : TAllUsedRegs;
  1629. begin
  1630. Result:=false;
  1631. { removes seg register prefixes from LEA operations, as they
  1632. don't do anything}
  1633. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1634. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1635. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1636. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1637. { do not mess with leas acessing the stack pointer }
  1638. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1639. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1640. begin
  1641. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1642. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1643. begin
  1644. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1645. taicpu(p).oper[1]^.reg);
  1646. InsertLLItem(p.previous,p.next, hp1);
  1647. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1648. p.free;
  1649. p:=hp1;
  1650. Result:=true;
  1651. exit;
  1652. end
  1653. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1654. begin
  1655. hp1:=taicpu(p.Next);
  1656. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1657. asml.remove(p);
  1658. p.free;
  1659. p:=hp1;
  1660. Result:=true;
  1661. exit;
  1662. end
  1663. { continue to use lea to adjust the stack pointer,
  1664. it is the recommended way, but only if not optimizing for size }
  1665. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1666. (cs_opt_size in current_settings.optimizerswitches) then
  1667. with taicpu(p).oper[0]^.ref^ do
  1668. if (base = taicpu(p).oper[1]^.reg) then
  1669. begin
  1670. l:=offset;
  1671. if (l=1) and UseIncDec then
  1672. begin
  1673. taicpu(p).opcode:=A_INC;
  1674. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1675. taicpu(p).ops:=1;
  1676. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1677. end
  1678. else if (l=-1) and UseIncDec then
  1679. begin
  1680. taicpu(p).opcode:=A_DEC;
  1681. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1682. taicpu(p).ops:=1;
  1683. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1684. end
  1685. else
  1686. begin
  1687. if (l<0) and (l<>-2147483648) then
  1688. begin
  1689. taicpu(p).opcode:=A_SUB;
  1690. taicpu(p).loadConst(0,-l);
  1691. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1692. end
  1693. else
  1694. begin
  1695. taicpu(p).opcode:=A_ADD;
  1696. taicpu(p).loadConst(0,l);
  1697. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1698. end;
  1699. end;
  1700. Result:=true;
  1701. exit;
  1702. end;
  1703. end;
  1704. if GetNextInstruction(p,hp1) and
  1705. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1706. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1707. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1708. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1709. begin
  1710. CopyUsedRegs(TmpUsedRegs);
  1711. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1712. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1713. begin
  1714. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1715. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1716. asml.Remove(hp1);
  1717. hp1.Free;
  1718. result:=true;
  1719. end;
  1720. ReleaseUsedRegs(TmpUsedRegs);
  1721. end;
  1722. (*
  1723. This is unsafe, lea doesn't modify the flags but "add"
  1724. does. This breaks webtbs/tw15694.pp. The above
  1725. transformations are also unsafe, but they don't seem to
  1726. be triggered by code that FPC generators (or that at
  1727. least does not occur in the tests...). This needs to be
  1728. fixed by checking for the liveness of the flags register.
  1729. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1730. begin
  1731. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1732. taicpu(p).oper[0]^.ref^.base);
  1733. InsertLLItem(asml,p.previous,p.next, hp1);
  1734. DebugMsg('Peephole Lea2AddBase done',hp1);
  1735. p.free;
  1736. p:=hp1;
  1737. continue;
  1738. end
  1739. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1740. begin
  1741. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1742. taicpu(p).oper[0]^.ref^.index);
  1743. InsertLLItem(asml,p.previous,p.next,hp1);
  1744. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1745. p.free;
  1746. p:=hp1;
  1747. continue;
  1748. end
  1749. *)
  1750. end;
  1751. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1752. var
  1753. TmpUsedRegs : TAllUsedRegs;
  1754. hp1,hp2: tai;
  1755. begin
  1756. Result:=false;
  1757. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1758. GetNextInstruction(p, hp1) and
  1759. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1760. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1761. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1762. or
  1763. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1764. ) and
  1765. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1766. { mov reg1, reg2
  1767. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1768. begin
  1769. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1770. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1771. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1772. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1773. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1774. asml.remove(p);
  1775. p.free;
  1776. p := hp1;
  1777. Result:=true;
  1778. exit;
  1779. end
  1780. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1781. GetNextInstruction(p,hp1) and
  1782. (hp1.typ = ait_instruction) and
  1783. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1784. doing it separately in both branches allows to do the cheap checks
  1785. with low probability earlier }
  1786. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1787. GetNextInstruction(hp1,hp2) and
  1788. MatchInstruction(hp2,A_MOV,[])
  1789. ) or
  1790. ((taicpu(hp1).opcode=A_LEA) and
  1791. GetNextInstruction(hp1,hp2) and
  1792. MatchInstruction(hp2,A_MOV,[]) and
  1793. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1794. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1795. ) or
  1796. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1797. taicpu(p).oper[1]^.reg) and
  1798. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1799. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1800. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1801. ) and
  1802. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1803. )
  1804. ) and
  1805. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1806. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1807. begin
  1808. CopyUsedRegs(TmpUsedRegs);
  1809. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1810. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1811. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1812. { change mov (ref), reg
  1813. add/sub/or/... reg2/$const, reg
  1814. mov reg, (ref)
  1815. # release reg
  1816. to add/sub/or/... reg2/$const, (ref) }
  1817. begin
  1818. case taicpu(hp1).opcode of
  1819. A_INC,A_DEC,A_NOT,A_NEG :
  1820. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1821. A_LEA :
  1822. begin
  1823. taicpu(hp1).opcode:=A_ADD;
  1824. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1825. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1826. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1827. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1828. else
  1829. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1830. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1831. DebugMsg('Peephole FoldLea done',hp1);
  1832. end
  1833. else
  1834. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1835. end;
  1836. asml.remove(p);
  1837. asml.remove(hp2);
  1838. p.free;
  1839. hp2.free;
  1840. p := hp1
  1841. end;
  1842. ReleaseUsedRegs(TmpUsedRegs);
  1843. end;
  1844. end;
  1845. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1846. var
  1847. TmpUsedRegs : TAllUsedRegs;
  1848. hp1 : tai;
  1849. begin
  1850. Result:=false;
  1851. if (taicpu(p).ops >= 2) and
  1852. ((taicpu(p).oper[0]^.typ = top_const) or
  1853. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1854. (taicpu(p).oper[1]^.typ = top_reg) and
  1855. ((taicpu(p).ops = 2) or
  1856. ((taicpu(p).oper[2]^.typ = top_reg) and
  1857. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1858. GetLastInstruction(p,hp1) and
  1859. MatchInstruction(hp1,A_MOV,[]) and
  1860. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1861. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1862. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1863. begin
  1864. CopyUsedRegs(TmpUsedRegs);
  1865. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1866. { change
  1867. mov reg1,reg2
  1868. imul y,reg2 to imul y,reg1,reg2 }
  1869. begin
  1870. taicpu(p).ops := 3;
  1871. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1872. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1873. DebugMsg('Peephole MovImul2Imul done',p);
  1874. asml.remove(hp1);
  1875. hp1.free;
  1876. result:=true;
  1877. end;
  1878. ReleaseUsedRegs(TmpUsedRegs);
  1879. end;
  1880. end;
  1881. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1882. var
  1883. hp1 : tai;
  1884. begin
  1885. {
  1886. change
  1887. jmp .L1
  1888. ...
  1889. .L1:
  1890. ret
  1891. into
  1892. ret
  1893. }
  1894. result:=false;
  1895. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1896. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1897. begin
  1898. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1899. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1900. MatchInstruction(hp1,A_RET,[S_NO]) then
  1901. begin
  1902. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1903. taicpu(p).opcode:=A_RET;
  1904. taicpu(p).is_jmp:=false;
  1905. taicpu(p).ops:=taicpu(hp1).ops;
  1906. case taicpu(hp1).ops of
  1907. 0:
  1908. taicpu(p).clearop(0);
  1909. 1:
  1910. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1911. else
  1912. internalerror(2016041301);
  1913. end;
  1914. result:=true;
  1915. end;
  1916. end;
  1917. end;
  1918. function CanBeCMOV(p : tai) : boolean;
  1919. begin
  1920. CanBeCMOV:=assigned(p) and
  1921. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1922. { we can't use cmov ref,reg because
  1923. ref could be nil and cmov still throws an exception
  1924. if ref=nil but the mov isn't done (FK)
  1925. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1926. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1927. }
  1928. MatchOpType(taicpu(p),top_reg,top_reg);
  1929. end;
  1930. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1931. var
  1932. hp1,hp2,hp3: tai;
  1933. carryadd_opcode : TAsmOp;
  1934. l : Longint;
  1935. condition : TAsmCond;
  1936. begin
  1937. { jb @@1 cmc
  1938. inc/dec operand --> adc/sbb operand,0
  1939. @@1:
  1940. ... and ...
  1941. jnb @@1
  1942. inc/dec operand --> adc/sbb operand,0
  1943. @@1: }
  1944. result:=false;
  1945. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1946. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1947. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1948. begin
  1949. carryadd_opcode:=A_NONE;
  1950. if Taicpu(p).condition in [C_NAE,C_B] then
  1951. begin
  1952. if Taicpu(hp1).opcode=A_INC then
  1953. carryadd_opcode:=A_ADC;
  1954. if Taicpu(hp1).opcode=A_DEC then
  1955. carryadd_opcode:=A_SBB;
  1956. if carryadd_opcode<>A_NONE then
  1957. begin
  1958. Taicpu(p).clearop(0);
  1959. Taicpu(p).ops:=0;
  1960. Taicpu(p).is_jmp:=false;
  1961. Taicpu(p).opcode:=A_CMC;
  1962. Taicpu(p).condition:=C_NONE;
  1963. Taicpu(hp1).ops:=2;
  1964. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1965. Taicpu(hp1).loadconst(0,0);
  1966. Taicpu(hp1).opcode:=carryadd_opcode;
  1967. result:=true;
  1968. exit;
  1969. end;
  1970. end;
  1971. if Taicpu(p).condition in [C_AE,C_NB] then
  1972. begin
  1973. if Taicpu(hp1).opcode=A_INC then
  1974. carryadd_opcode:=A_ADC;
  1975. if Taicpu(hp1).opcode=A_DEC then
  1976. carryadd_opcode:=A_SBB;
  1977. if carryadd_opcode<>A_NONE then
  1978. begin
  1979. asml.remove(p);
  1980. p.free;
  1981. Taicpu(hp1).ops:=2;
  1982. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1983. Taicpu(hp1).loadconst(0,0);
  1984. Taicpu(hp1).opcode:=carryadd_opcode;
  1985. p:=hp1;
  1986. result:=true;
  1987. exit;
  1988. end;
  1989. end;
  1990. end;
  1991. {$ifndef i8086}
  1992. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1993. begin
  1994. { check for
  1995. jCC xxx
  1996. <several movs>
  1997. xxx:
  1998. }
  1999. l:=0;
  2000. GetNextInstruction(p, hp1);
  2001. while assigned(hp1) and
  2002. CanBeCMOV(hp1) and
  2003. { stop on labels }
  2004. not(hp1.typ=ait_label) do
  2005. begin
  2006. inc(l);
  2007. GetNextInstruction(hp1,hp1);
  2008. end;
  2009. if assigned(hp1) then
  2010. begin
  2011. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2012. begin
  2013. if (l<=4) and (l>0) then
  2014. begin
  2015. condition:=inverse_cond(taicpu(p).condition);
  2016. hp2:=p;
  2017. GetNextInstruction(p,hp1);
  2018. p:=hp1;
  2019. repeat
  2020. taicpu(hp1).opcode:=A_CMOVcc;
  2021. taicpu(hp1).condition:=condition;
  2022. GetNextInstruction(hp1,hp1);
  2023. until not(assigned(hp1)) or
  2024. not(CanBeCMOV(hp1));
  2025. { wait with removing else GetNextInstruction could
  2026. ignore the label if it was the only usage in the
  2027. jump moved away }
  2028. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2029. { if the label refs. reach zero, remove any alignment before the label }
  2030. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2031. begin
  2032. asml.Remove(hp1);
  2033. hp1.Free;
  2034. end;
  2035. asml.remove(hp2);
  2036. hp2.free;
  2037. result:=true;
  2038. exit;
  2039. end;
  2040. end
  2041. else
  2042. begin
  2043. { check further for
  2044. jCC xxx
  2045. <several movs 1>
  2046. jmp yyy
  2047. xxx:
  2048. <several movs 2>
  2049. yyy:
  2050. }
  2051. { hp2 points to jmp yyy }
  2052. hp2:=hp1;
  2053. { skip hp1 to xxx }
  2054. GetNextInstruction(hp1, hp1);
  2055. if assigned(hp2) and
  2056. assigned(hp1) and
  2057. (l<=3) and
  2058. (hp2.typ=ait_instruction) and
  2059. (taicpu(hp2).is_jmp) and
  2060. (taicpu(hp2).condition=C_None) and
  2061. { real label and jump, no further references to the
  2062. label are allowed }
  2063. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2064. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2065. begin
  2066. l:=0;
  2067. { skip hp1 to <several moves 2> }
  2068. GetNextInstruction(hp1, hp1);
  2069. while assigned(hp1) and
  2070. CanBeCMOV(hp1) do
  2071. begin
  2072. inc(l);
  2073. GetNextInstruction(hp1, hp1);
  2074. end;
  2075. { hp1 points to yyy: }
  2076. if assigned(hp1) and
  2077. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2078. begin
  2079. condition:=inverse_cond(taicpu(p).condition);
  2080. GetNextInstruction(p,hp1);
  2081. hp3:=p;
  2082. p:=hp1;
  2083. repeat
  2084. taicpu(hp1).opcode:=A_CMOVcc;
  2085. taicpu(hp1).condition:=condition;
  2086. GetNextInstruction(hp1,hp1);
  2087. until not(assigned(hp1)) or
  2088. not(CanBeCMOV(hp1));
  2089. { hp2 is still at jmp yyy }
  2090. GetNextInstruction(hp2,hp1);
  2091. { hp2 is now at xxx: }
  2092. condition:=inverse_cond(condition);
  2093. GetNextInstruction(hp1,hp1);
  2094. { hp1 is now at <several movs 2> }
  2095. repeat
  2096. taicpu(hp1).opcode:=A_CMOVcc;
  2097. taicpu(hp1).condition:=condition;
  2098. GetNextInstruction(hp1,hp1);
  2099. until not(assigned(hp1)) or
  2100. not(CanBeCMOV(hp1));
  2101. {
  2102. asml.remove(hp1.next)
  2103. hp1.next.free;
  2104. asml.remove(hp1);
  2105. hp1.free;
  2106. }
  2107. { remove jCC }
  2108. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2109. asml.remove(hp3);
  2110. hp3.free;
  2111. { remove jmp }
  2112. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2113. asml.remove(hp2);
  2114. hp2.free;
  2115. result:=true;
  2116. exit;
  2117. end;
  2118. end;
  2119. end;
  2120. end;
  2121. end;
  2122. {$endif i8086}
  2123. end;
  2124. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2125. var
  2126. hp1,hp2: tai;
  2127. begin
  2128. result:=false;
  2129. if (taicpu(p).oper[1]^.typ = top_reg) and
  2130. GetNextInstruction(p,hp1) and
  2131. (hp1.typ = ait_instruction) and
  2132. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2133. GetNextInstruction(hp1,hp2) and
  2134. MatchInstruction(hp2,A_MOV,[]) and
  2135. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2136. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2137. {$ifdef i386}
  2138. { not all registers have byte size sub registers on i386 }
  2139. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2140. {$endif i386}
  2141. (((taicpu(hp1).ops=2) and
  2142. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2143. ((taicpu(hp1).ops=1) and
  2144. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2145. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2146. begin
  2147. { change movsX/movzX reg/ref, reg2
  2148. add/sub/or/... reg3/$const, reg2
  2149. mov reg2 reg/ref
  2150. to add/sub/or/... reg3/$const, reg/ref }
  2151. { by example:
  2152. movswl %si,%eax movswl %si,%eax p
  2153. decl %eax addl %edx,%eax hp1
  2154. movw %ax,%si movw %ax,%si hp2
  2155. ->
  2156. movswl %si,%eax movswl %si,%eax p
  2157. decw %eax addw %edx,%eax hp1
  2158. movw %ax,%si movw %ax,%si hp2
  2159. }
  2160. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2161. {
  2162. ->
  2163. movswl %si,%eax movswl %si,%eax p
  2164. decw %si addw %dx,%si hp1
  2165. movw %ax,%si movw %ax,%si hp2
  2166. }
  2167. case taicpu(hp1).ops of
  2168. 1:
  2169. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2170. 2:
  2171. begin
  2172. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2173. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2174. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2175. end;
  2176. else
  2177. internalerror(2008042701);
  2178. end;
  2179. {
  2180. ->
  2181. decw %si addw %dx,%si p
  2182. }
  2183. DebugMsg('PeepHole Optimization,var3',p);
  2184. asml.remove(p);
  2185. asml.remove(hp2);
  2186. p.free;
  2187. hp2.free;
  2188. p:=hp1;
  2189. end
  2190. { removes superfluous And's after movzx's }
  2191. else if taicpu(p).opcode=A_MOVZX then
  2192. begin
  2193. if (taicpu(p).oper[1]^.typ = top_reg) and
  2194. GetNextInstruction(p, hp1) and
  2195. (tai(hp1).typ = ait_instruction) and
  2196. (taicpu(hp1).opcode = A_AND) and
  2197. (taicpu(hp1).oper[0]^.typ = top_const) and
  2198. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2199. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2200. begin
  2201. case taicpu(p).opsize Of
  2202. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2203. if (taicpu(hp1).oper[0]^.val = $ff) then
  2204. begin
  2205. DebugMsg('PeepHole Optimization,var4',p);
  2206. asml.remove(hp1);
  2207. hp1.free;
  2208. end;
  2209. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2210. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2211. begin
  2212. DebugMsg('PeepHole Optimization,var5',p);
  2213. asml.remove(hp1);
  2214. hp1.free;
  2215. end;
  2216. {$ifdef x86_64}
  2217. S_LQ:
  2218. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2219. begin
  2220. if (cs_asm_source in current_settings.globalswitches) then
  2221. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2222. asml.remove(hp1);
  2223. hp1.Free;
  2224. end;
  2225. {$endif x86_64}
  2226. end;
  2227. end;
  2228. { changes some movzx constructs to faster synonims (all examples
  2229. are given with eax/ax, but are also valid for other registers)}
  2230. if (taicpu(p).oper[1]^.typ = top_reg) then
  2231. if (taicpu(p).oper[0]^.typ = top_reg) then
  2232. case taicpu(p).opsize of
  2233. S_BW:
  2234. begin
  2235. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2236. not(cs_opt_size in current_settings.optimizerswitches) then
  2237. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2238. begin
  2239. taicpu(p).opcode := A_AND;
  2240. taicpu(p).changeopsize(S_W);
  2241. taicpu(p).loadConst(0,$ff);
  2242. DebugMsg('PeepHole Optimization,var7',p);
  2243. end
  2244. else if GetNextInstruction(p, hp1) and
  2245. (tai(hp1).typ = ait_instruction) and
  2246. (taicpu(hp1).opcode = A_AND) and
  2247. (taicpu(hp1).oper[0]^.typ = top_const) and
  2248. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2249. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2250. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2251. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2252. begin
  2253. DebugMsg('PeepHole Optimization,var8',p);
  2254. taicpu(p).opcode := A_MOV;
  2255. taicpu(p).changeopsize(S_W);
  2256. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2257. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2258. end;
  2259. end;
  2260. S_BL:
  2261. begin
  2262. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2263. not(cs_opt_size in current_settings.optimizerswitches) then
  2264. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2265. begin
  2266. taicpu(p).opcode := A_AND;
  2267. taicpu(p).changeopsize(S_L);
  2268. taicpu(p).loadConst(0,$ff)
  2269. end
  2270. else if GetNextInstruction(p, hp1) and
  2271. (tai(hp1).typ = ait_instruction) and
  2272. (taicpu(hp1).opcode = A_AND) and
  2273. (taicpu(hp1).oper[0]^.typ = top_const) and
  2274. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2275. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2276. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2277. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2278. begin
  2279. DebugMsg('PeepHole Optimization,var10',p);
  2280. taicpu(p).opcode := A_MOV;
  2281. taicpu(p).changeopsize(S_L);
  2282. { do not use R_SUBWHOLE
  2283. as movl %rdx,%eax
  2284. is invalid in assembler PM }
  2285. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2286. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2287. end
  2288. end;
  2289. {$ifndef i8086}
  2290. S_WL:
  2291. begin
  2292. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2293. not(cs_opt_size in current_settings.optimizerswitches) then
  2294. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2295. begin
  2296. DebugMsg('PeepHole Optimization,var11',p);
  2297. taicpu(p).opcode := A_AND;
  2298. taicpu(p).changeopsize(S_L);
  2299. taicpu(p).loadConst(0,$ffff);
  2300. end
  2301. else if GetNextInstruction(p, hp1) and
  2302. (tai(hp1).typ = ait_instruction) and
  2303. (taicpu(hp1).opcode = A_AND) and
  2304. (taicpu(hp1).oper[0]^.typ = top_const) and
  2305. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2306. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2307. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2308. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2309. begin
  2310. DebugMsg('PeepHole Optimization,var12',p);
  2311. taicpu(p).opcode := A_MOV;
  2312. taicpu(p).changeopsize(S_L);
  2313. { do not use R_SUBWHOLE
  2314. as movl %rdx,%eax
  2315. is invalid in assembler PM }
  2316. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2317. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2318. end;
  2319. end;
  2320. {$endif i8086}
  2321. end
  2322. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2323. begin
  2324. if GetNextInstruction(p, hp1) and
  2325. (tai(hp1).typ = ait_instruction) and
  2326. (taicpu(hp1).opcode = A_AND) and
  2327. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2328. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2329. begin
  2330. taicpu(p).opcode := A_MOV;
  2331. case taicpu(p).opsize Of
  2332. S_BL:
  2333. begin
  2334. DebugMsg('PeepHole Optimization,var13',p);
  2335. taicpu(p).changeopsize(S_L);
  2336. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2337. end;
  2338. S_WL:
  2339. begin
  2340. DebugMsg('PeepHole Optimization,var14',p);
  2341. taicpu(p).changeopsize(S_L);
  2342. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2343. end;
  2344. S_BW:
  2345. begin
  2346. DebugMsg('PeepHole Optimization,var15',p);
  2347. taicpu(p).changeopsize(S_W);
  2348. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2349. end;
  2350. {$ifdef x86_64}
  2351. S_BQ:
  2352. begin
  2353. DebugMsg('PeepHole Optimization,var16',p);
  2354. taicpu(p).changeopsize(S_Q);
  2355. taicpu(hp1).loadConst(
  2356. 0, taicpu(hp1).oper[0]^.val and $ff);
  2357. end;
  2358. S_WQ:
  2359. begin
  2360. DebugMsg('PeepHole Optimization,var17',p);
  2361. taicpu(p).changeopsize(S_Q);
  2362. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2363. end;
  2364. S_LQ:
  2365. begin
  2366. DebugMsg('PeepHole Optimization,var18',p);
  2367. taicpu(p).changeopsize(S_Q);
  2368. taicpu(hp1).loadConst(
  2369. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2370. end;
  2371. {$endif x86_64}
  2372. else
  2373. Internalerror(2017050704)
  2374. end;
  2375. end;
  2376. end;
  2377. end;
  2378. end;
  2379. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2380. var
  2381. hp1 : tai;
  2382. begin
  2383. Result:=false;
  2384. if not(GetNextInstruction(p, hp1)) then
  2385. exit;
  2386. if MatchOpType(taicpu(p),top_const,top_reg) and
  2387. MatchInstruction(hp1,A_AND,[]) and
  2388. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2389. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2390. { the second register must contain the first one, so compare their subreg types }
  2391. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2392. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2393. { change
  2394. and const1, reg
  2395. and const2, reg
  2396. to
  2397. and (const1 and const2), reg
  2398. }
  2399. begin
  2400. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2401. DebugMsg('Peephole AndAnd2And done',hp1);
  2402. asml.remove(p);
  2403. p.Free;
  2404. p:=hp1;
  2405. Result:=true;
  2406. exit;
  2407. end
  2408. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2409. MatchInstruction(hp1,A_MOVZX,[]) and
  2410. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2411. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2412. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2413. (((taicpu(p).opsize=S_W) and
  2414. (taicpu(hp1).opsize=S_BW)) or
  2415. ((taicpu(p).opsize=S_L) and
  2416. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2417. {$ifdef x86_64}
  2418. or
  2419. ((taicpu(p).opsize=S_Q) and
  2420. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2421. {$endif x86_64}
  2422. ) then
  2423. begin
  2424. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2425. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2426. ) or
  2427. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2428. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2429. {$ifdef x86_64}
  2430. or
  2431. (((taicpu(hp1).opsize)=S_LQ) and
  2432. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2433. )
  2434. {$endif x86_64}
  2435. then
  2436. begin
  2437. DebugMsg('Peephole AndMovzToAnd done',p);
  2438. asml.remove(hp1);
  2439. hp1.free;
  2440. end;
  2441. end
  2442. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2443. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2444. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2445. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2446. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2447. (((taicpu(p).opsize=S_W) and
  2448. (taicpu(hp1).opsize=S_BW)) or
  2449. ((taicpu(p).opsize=S_L) and
  2450. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2451. {$ifdef x86_64}
  2452. or
  2453. ((taicpu(p).opsize=S_Q) and
  2454. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2455. {$endif x86_64}
  2456. ) then
  2457. begin
  2458. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2459. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2460. ) or
  2461. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2462. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2463. {$ifdef x86_64}
  2464. or
  2465. (((taicpu(hp1).opsize)=S_LQ) and
  2466. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2467. )
  2468. {$endif x86_64}
  2469. then
  2470. begin
  2471. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2472. asml.remove(hp1);
  2473. hp1.free;
  2474. end;
  2475. end
  2476. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2477. (hp1.typ = ait_instruction) and
  2478. (taicpu(hp1).is_jmp) and
  2479. (taicpu(hp1).opcode<>A_JMP) and
  2480. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2481. { change
  2482. and x, reg
  2483. jxx
  2484. to
  2485. test x, reg
  2486. jxx
  2487. if reg is deallocated before the
  2488. jump, but only if it's a conditional jump (PFV)
  2489. }
  2490. taicpu(p).opcode := A_TEST;
  2491. end;
  2492. function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
  2493. begin
  2494. Result:=false;
  2495. if (taicpu(p).oper[1]^.typ = Top_Reg) and
  2496. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2497. begin
  2498. if (taicpu(p).oper[0]^.typ = top_const) then
  2499. begin
  2500. case taicpu(p).oper[0]^.val of
  2501. 0:
  2502. begin
  2503. { change "mov $0,%reg" into "xor %reg,%reg" }
  2504. taicpu(p).opcode := A_XOR;
  2505. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2506. end;
  2507. $1..$FFFFFFFF:
  2508. begin
  2509. { Code size reduction by J. Gareth "Kit" Moreton }
  2510. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2511. case taicpu(p).opsize of
  2512. S_Q:
  2513. begin
  2514. DebugMsg('Peephole Optimization: movq x,%reg -> movd x,%reg (x is a 32-bit constant)', p);
  2515. TRegisterRec(taicpu(p).oper[1]^.reg).subreg := R_SUBD;
  2516. taicpu(p).opsize := S_L;
  2517. end;
  2518. end;
  2519. end;
  2520. end;
  2521. end;
  2522. end;
  2523. end;
  2524. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  2525. begin
  2526. Result:=false;
  2527. { change "cmp $0, %reg" to "test %reg, %reg" }
  2528. if MatchOpType(taicpu(p),top_const,top_reg) and
  2529. (taicpu(p).oper[0]^.val = 0) then
  2530. begin
  2531. taicpu(p).opcode := A_TEST;
  2532. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2533. Result:=true;
  2534. end;
  2535. end;
  2536. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  2537. var
  2538. IsTestConstX : Boolean;
  2539. hp1,hp2 : tai;
  2540. begin
  2541. Result:=false;
  2542. { removes the line marked with (x) from the sequence
  2543. and/or/xor/add/sub/... $x, %y
  2544. test/or %y, %y | test $-1, %y (x)
  2545. j(n)z _Label
  2546. as the first instruction already adjusts the ZF
  2547. %y operand may also be a reference }
  2548. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2549. MatchOperand(taicpu(p).oper[0]^,-1);
  2550. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2551. GetLastInstruction(p, hp1) and
  2552. (tai(hp1).typ = ait_instruction) and
  2553. GetNextInstruction(p,hp2) and
  2554. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2555. case taicpu(hp1).opcode Of
  2556. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2557. begin
  2558. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2559. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2560. { and in case of carry for A(E)/B(E)/C/NC }
  2561. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2562. ((taicpu(hp1).opcode <> A_ADD) and
  2563. (taicpu(hp1).opcode <> A_SUB))) then
  2564. begin
  2565. hp1 := tai(p.next);
  2566. asml.remove(p);
  2567. p.free;
  2568. p := tai(hp1);
  2569. Result:=true;
  2570. end;
  2571. end;
  2572. A_SHL, A_SAL, A_SHR, A_SAR:
  2573. begin
  2574. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2575. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2576. { therefore, it's only safe to do this optimization for }
  2577. { shifts by a (nonzero) constant }
  2578. (taicpu(hp1).oper[0]^.typ = top_const) and
  2579. (taicpu(hp1).oper[0]^.val <> 0) and
  2580. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2581. { and in case of carry for A(E)/B(E)/C/NC }
  2582. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2583. begin
  2584. hp1 := tai(p.next);
  2585. asml.remove(p);
  2586. p.free;
  2587. p := tai(hp1);
  2588. Result:=true;
  2589. end;
  2590. end;
  2591. A_DEC, A_INC, A_NEG:
  2592. begin
  2593. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2594. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2595. { and in case of carry for A(E)/B(E)/C/NC }
  2596. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2597. begin
  2598. case taicpu(hp1).opcode Of
  2599. A_DEC, A_INC:
  2600. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  2601. begin
  2602. case taicpu(hp1).opcode Of
  2603. A_DEC: taicpu(hp1).opcode := A_SUB;
  2604. A_INC: taicpu(hp1).opcode := A_ADD;
  2605. end;
  2606. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2607. taicpu(hp1).loadConst(0,1);
  2608. taicpu(hp1).ops:=2;
  2609. end
  2610. end;
  2611. hp1 := tai(p.next);
  2612. asml.remove(p);
  2613. p.free;
  2614. p := tai(hp1);
  2615. Result:=true;
  2616. end;
  2617. end
  2618. else
  2619. { change "test $-1,%reg" into "test %reg,%reg" }
  2620. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2621. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2622. end { case }
  2623. { change "test $-1,%reg" into "test %reg,%reg" }
  2624. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2625. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2626. end;
  2627. procedure TX86AsmOptimizer.OptReferences;
  2628. var
  2629. p: tai;
  2630. i: Integer;
  2631. begin
  2632. p := BlockStart;
  2633. while (p <> BlockEnd) Do
  2634. begin
  2635. if p.typ=ait_instruction then
  2636. begin
  2637. for i:=0 to taicpu(p).ops-1 do
  2638. if taicpu(p).oper[i]^.typ=top_ref then
  2639. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  2640. end;
  2641. p:=tai(p.next);
  2642. end;
  2643. end;
  2644. end.