aoptx86.pas 101 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass2MOV(var p : tai) : boolean;
  54. function OptPass2Imul(var p : tai) : boolean;
  55. function OptPass2Jmp(var p : tai) : boolean;
  56. function OptPass2Jcc(var p : tai) : boolean;
  57. procedure PostPeepholeOptMov(const p : tai);
  58. end;
  59. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  60. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  61. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  62. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  63. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  64. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  65. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  66. function RefsEqual(const r1, r2: treference): boolean;
  67. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  68. { returns true, if ref is a reference using only the registers passed as base and index
  69. and having an offset }
  70. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  71. implementation
  72. uses
  73. cutils,verbose,
  74. globals,
  75. cpuinfo,
  76. procinfo,
  77. aasmbase,
  78. aoptutils,
  79. symconst,symsym,
  80. itcpugas;
  81. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  82. begin
  83. result :=
  84. (instr.typ = ait_instruction) and
  85. (taicpu(instr).opcode = op) and
  86. ((opsize = []) or (taicpu(instr).opsize in opsize));
  87. end;
  88. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  89. begin
  90. result :=
  91. (instr.typ = ait_instruction) and
  92. ((taicpu(instr).opcode = op1) or
  93. (taicpu(instr).opcode = op2)
  94. ) and
  95. ((opsize = []) or (taicpu(instr).opsize in opsize));
  96. end;
  97. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. ((taicpu(instr).opcode = op1) or
  102. (taicpu(instr).opcode = op2) or
  103. (taicpu(instr).opcode = op3)
  104. ) and
  105. ((opsize = []) or (taicpu(instr).opsize in opsize));
  106. end;
  107. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  108. const opsize : topsizes) : boolean;
  109. var
  110. op : TAsmOp;
  111. begin
  112. result:=false;
  113. for op in ops do
  114. begin
  115. if (instr.typ = ait_instruction) and
  116. (taicpu(instr).opcode = op) and
  117. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  118. begin
  119. result:=true;
  120. exit;
  121. end;
  122. end;
  123. end;
  124. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  125. begin
  126. result := (oper.typ = top_reg) and (oper.reg = reg);
  127. end;
  128. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  129. begin
  130. result := (oper.typ = top_const) and (oper.val = a);
  131. end;
  132. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  133. begin
  134. result := oper1.typ = oper2.typ;
  135. if result then
  136. case oper1.typ of
  137. top_const:
  138. Result:=oper1.val = oper2.val;
  139. top_reg:
  140. Result:=oper1.reg = oper2.reg;
  141. top_ref:
  142. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  143. else
  144. internalerror(2013102801);
  145. end
  146. end;
  147. function RefsEqual(const r1, r2: treference): boolean;
  148. begin
  149. RefsEqual :=
  150. (r1.offset = r2.offset) and
  151. (r1.segment = r2.segment) and (r1.base = r2.base) and
  152. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  153. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  154. (r1.relsymbol = r2.relsymbol);
  155. end;
  156. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  157. begin
  158. Result:=(ref.offset=0) and
  159. (ref.scalefactor in [0,1]) and
  160. (ref.segment=NR_NO) and
  161. (ref.symbol=nil) and
  162. (ref.relsymbol=nil) and
  163. ((base=NR_INVALID) or
  164. (ref.base=base)) and
  165. ((index=NR_INVALID) or
  166. (ref.index=index));
  167. end;
  168. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  169. begin
  170. Result:=(ref.scalefactor in [0,1]) and
  171. (ref.segment=NR_NO) and
  172. (ref.symbol=nil) and
  173. (ref.relsymbol=nil) and
  174. ((base=NR_INVALID) or
  175. (ref.base=base)) and
  176. ((index=NR_INVALID) or
  177. (ref.index=index));
  178. end;
  179. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  180. begin
  181. Result:=RegReadByInstruction(reg,hp);
  182. end;
  183. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  184. var
  185. p: taicpu;
  186. opcount: longint;
  187. begin
  188. RegReadByInstruction := false;
  189. if hp.typ <> ait_instruction then
  190. exit;
  191. p := taicpu(hp);
  192. case p.opcode of
  193. A_CALL:
  194. regreadbyinstruction := true;
  195. A_IMUL:
  196. case p.ops of
  197. 1:
  198. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  199. (
  200. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  201. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  202. );
  203. 2,3:
  204. regReadByInstruction :=
  205. reginop(reg,p.oper[0]^) or
  206. reginop(reg,p.oper[1]^);
  207. end;
  208. A_MUL:
  209. begin
  210. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  211. (
  212. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  213. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  214. );
  215. end;
  216. A_IDIV,A_DIV:
  217. begin
  218. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  219. (
  220. (getregtype(reg)=R_INTREGISTER) and
  221. (
  222. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  223. )
  224. );
  225. end;
  226. else
  227. begin
  228. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  229. begin
  230. RegReadByInstruction := false;
  231. exit;
  232. end;
  233. for opcount := 0 to p.ops-1 do
  234. if (p.oper[opCount]^.typ = top_ref) and
  235. RegInRef(reg,p.oper[opcount]^.ref^) then
  236. begin
  237. RegReadByInstruction := true;
  238. exit
  239. end;
  240. { special handling for SSE MOVSD }
  241. if (p.opcode=A_MOVSD) and (p.ops>0) then
  242. begin
  243. if p.ops<>2 then
  244. internalerror(2017042702);
  245. regReadByInstruction := reginop(reg,p.oper[0]^) or
  246. (
  247. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  248. );
  249. exit;
  250. end;
  251. with insprop[p.opcode] do
  252. begin
  253. if getregtype(reg)=R_INTREGISTER then
  254. begin
  255. case getsupreg(reg) of
  256. RS_EAX:
  257. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  258. begin
  259. RegReadByInstruction := true;
  260. exit
  261. end;
  262. RS_ECX:
  263. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  264. begin
  265. RegReadByInstruction := true;
  266. exit
  267. end;
  268. RS_EDX:
  269. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  270. begin
  271. RegReadByInstruction := true;
  272. exit
  273. end;
  274. RS_EBX:
  275. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  276. begin
  277. RegReadByInstruction := true;
  278. exit
  279. end;
  280. RS_ESP:
  281. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  282. begin
  283. RegReadByInstruction := true;
  284. exit
  285. end;
  286. RS_EBP:
  287. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  288. begin
  289. RegReadByInstruction := true;
  290. exit
  291. end;
  292. RS_ESI:
  293. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  294. begin
  295. RegReadByInstruction := true;
  296. exit
  297. end;
  298. RS_EDI:
  299. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  300. begin
  301. RegReadByInstruction := true;
  302. exit
  303. end;
  304. end;
  305. end;
  306. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  307. begin
  308. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  309. begin
  310. case p.condition of
  311. C_A,C_NBE, { CF=0 and ZF=0 }
  312. C_BE,C_NA: { CF=1 or ZF=1 }
  313. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  314. C_AE,C_NB,C_NC, { CF=0 }
  315. C_B,C_NAE,C_C: { CF=1 }
  316. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  317. C_NE,C_NZ, { ZF=0 }
  318. C_E,C_Z: { ZF=1 }
  319. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  320. C_G,C_NLE, { ZF=0 and SF=OF }
  321. C_LE,C_NG: { ZF=1 or SF<>OF }
  322. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  323. C_GE,C_NL, { SF=OF }
  324. C_L,C_NGE: { SF<>OF }
  325. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  326. C_NO, { OF=0 }
  327. C_O: { OF=1 }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  329. C_NP,C_PO, { PF=0 }
  330. C_P,C_PE: { PF=1 }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  332. C_NS, { SF=0 }
  333. C_S: { SF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  335. else
  336. internalerror(2017042701);
  337. end;
  338. if RegReadByInstruction then
  339. exit;
  340. end;
  341. case getsubreg(reg) of
  342. R_SUBW,R_SUBD,R_SUBQ:
  343. RegReadByInstruction :=
  344. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  345. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  346. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  347. R_SUBFLAGCARRY:
  348. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  349. R_SUBFLAGPARITY:
  350. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  351. R_SUBFLAGAUXILIARY:
  352. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  353. R_SUBFLAGZERO:
  354. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  355. R_SUBFLAGSIGN:
  356. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGOVERFLOW:
  358. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGINTERRUPT:
  360. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. R_SUBFLAGDIRECTION:
  362. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  363. else
  364. internalerror(2017042601);
  365. end;
  366. exit;
  367. end;
  368. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  369. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  370. (p.oper[0]^.reg=p.oper[1]^.reg) then
  371. exit;
  372. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  373. begin
  374. RegReadByInstruction := true;
  375. exit
  376. end;
  377. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  378. begin
  379. RegReadByInstruction := true;
  380. exit
  381. end;
  382. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  383. begin
  384. RegReadByInstruction := true;
  385. exit
  386. end;
  387. end;
  388. end;
  389. end;
  390. end;
  391. {$ifdef DEBUG_AOPTCPU}
  392. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  393. begin
  394. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  395. end;
  396. {$else DEBUG_AOPTCPU}
  397. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  398. begin
  399. end;
  400. {$endif DEBUG_AOPTCPU}
  401. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  402. begin
  403. if not SuperRegistersEqual(reg1,reg2) then
  404. exit(false);
  405. if getregtype(reg1)<>R_INTREGISTER then
  406. exit(true); {because SuperRegisterEqual is true}
  407. case getsubreg(reg1) of
  408. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  409. higher, it preserves the high bits, so the new value depends on
  410. reg2's previous value. In other words, it is equivalent to doing:
  411. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  412. R_SUBL:
  413. exit(getsubreg(reg2)=R_SUBL);
  414. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  415. higher, it actually does a:
  416. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  417. R_SUBH:
  418. exit(getsubreg(reg2)=R_SUBH);
  419. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  420. bits of reg2:
  421. reg2 := (reg2 and $ffff0000) or word(reg1); }
  422. R_SUBW:
  423. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  424. { a write to R_SUBD always overwrites every other subregister,
  425. because it clears the high 32 bits of R_SUBQ on x86_64 }
  426. R_SUBD,
  427. R_SUBQ:
  428. exit(true);
  429. else
  430. internalerror(2017042801);
  431. end;
  432. end;
  433. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  434. begin
  435. if not SuperRegistersEqual(reg1,reg2) then
  436. exit(false);
  437. if getregtype(reg1)<>R_INTREGISTER then
  438. exit(true); {because SuperRegisterEqual is true}
  439. case getsubreg(reg1) of
  440. R_SUBL:
  441. exit(getsubreg(reg2)<>R_SUBH);
  442. R_SUBH:
  443. exit(getsubreg(reg2)<>R_SUBL);
  444. R_SUBW,
  445. R_SUBD,
  446. R_SUBQ:
  447. exit(true);
  448. else
  449. internalerror(2017042802);
  450. end;
  451. end;
  452. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  453. var
  454. hp1 : tai;
  455. l : TCGInt;
  456. begin
  457. result:=false;
  458. { changes the code sequence
  459. shr/sar const1, x
  460. shl const2, x
  461. to
  462. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  463. if GetNextInstruction(p, hp1) and
  464. MatchInstruction(hp1,A_SHL,[]) and
  465. (taicpu(p).oper[0]^.typ = top_const) and
  466. (taicpu(hp1).oper[0]^.typ = top_const) and
  467. (taicpu(hp1).opsize = taicpu(p).opsize) and
  468. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  469. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  470. begin
  471. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  472. not(cs_opt_size in current_settings.optimizerswitches) then
  473. begin
  474. { shr/sar const1, %reg
  475. shl const2, %reg
  476. with const1 > const2 }
  477. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  478. taicpu(hp1).opcode := A_AND;
  479. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  480. case taicpu(p).opsize Of
  481. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  482. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  483. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  484. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  485. else
  486. Internalerror(2017050703)
  487. end;
  488. end
  489. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  490. not(cs_opt_size in current_settings.optimizerswitches) then
  491. begin
  492. { shr/sar const1, %reg
  493. shl const2, %reg
  494. with const1 < const2 }
  495. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  496. taicpu(p).opcode := A_AND;
  497. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  498. case taicpu(p).opsize Of
  499. S_B: taicpu(p).loadConst(0,l Xor $ff);
  500. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  501. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  502. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  503. else
  504. Internalerror(2017050702)
  505. end;
  506. end
  507. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  508. begin
  509. { shr/sar const1, %reg
  510. shl const2, %reg
  511. with const1 = const2 }
  512. taicpu(p).opcode := A_AND;
  513. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  514. case taicpu(p).opsize Of
  515. S_B: taicpu(p).loadConst(0,l Xor $ff);
  516. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  517. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  518. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  519. else
  520. Internalerror(2017050701)
  521. end;
  522. asml.remove(hp1);
  523. hp1.free;
  524. end;
  525. end;
  526. end;
  527. { allocates register reg between (and including) instructions p1 and p2
  528. the type of p1 and p2 must not be in SkipInstr
  529. note that this routine is both called from the peephole optimizer
  530. where optinfo is not yet initialised) and from the cse (where it is) }
  531. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  532. var
  533. hp, start: tai;
  534. removedsomething,
  535. firstRemovedWasAlloc,
  536. lastRemovedWasDealloc: boolean;
  537. begin
  538. {$ifdef EXTDEBUG}
  539. { if assigned(p1.optinfo) and
  540. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  541. internalerror(2004101010); }
  542. {$endif EXTDEBUG}
  543. start := p1;
  544. if (reg = NR_ESP) or
  545. (reg = current_procinfo.framepointer) or
  546. not(assigned(p1)) then
  547. { this happens with registers which are loaded implicitely, outside the }
  548. { current block (e.g. esi with self) }
  549. exit;
  550. { make sure we allocate it for this instruction }
  551. getnextinstruction(p2,p2);
  552. lastRemovedWasDealloc := false;
  553. removedSomething := false;
  554. firstRemovedWasAlloc := false;
  555. {$ifdef allocregdebug}
  556. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  557. ' from here...'));
  558. insertllitem(asml,p1.previous,p1,hp);
  559. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  560. ' till here...'));
  561. insertllitem(asml,p2,p2.next,hp);
  562. {$endif allocregdebug}
  563. { do it the safe way: always allocate the full super register,
  564. as we do no register re-allocation in the peephole optimizer,
  565. this does not hurt
  566. }
  567. case getregtype(reg) of
  568. R_MMREGISTER:
  569. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  570. R_INTREGISTER:
  571. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  572. end;
  573. if not(RegInUsedRegs(reg,initialusedregs)) then
  574. begin
  575. hp := tai_regalloc.alloc(reg,nil);
  576. insertllItem(p1.previous,p1,hp);
  577. IncludeRegInUsedRegs(reg,initialusedregs);
  578. end;
  579. while assigned(p1) and
  580. (p1 <> p2) do
  581. begin
  582. if assigned(p1.optinfo) then
  583. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  584. p1 := tai(p1.next);
  585. repeat
  586. while assigned(p1) and
  587. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  588. p1 := tai(p1.next);
  589. { remove all allocation/deallocation info about the register in between }
  590. if assigned(p1) and
  591. (p1.typ = ait_regalloc) then
  592. begin
  593. { same super register, different sub register? }
  594. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  595. begin
  596. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  597. internalerror(2016101501);
  598. tai_regalloc(p1).reg:=reg;
  599. end;
  600. if tai_regalloc(p1).reg=reg then
  601. begin
  602. if not removedSomething then
  603. begin
  604. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  605. removedSomething := true;
  606. end;
  607. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  608. hp := tai(p1.Next);
  609. asml.Remove(p1);
  610. p1.free;
  611. p1 := hp;
  612. end
  613. else
  614. p1 := tai(p1.next);
  615. end;
  616. until not(assigned(p1)) or
  617. not(p1.typ in SkipInstr);
  618. end;
  619. if assigned(p1) then
  620. begin
  621. if firstRemovedWasAlloc then
  622. begin
  623. hp := tai_regalloc.Alloc(reg,nil);
  624. insertLLItem(start.previous,start,hp);
  625. end;
  626. if lastRemovedWasDealloc then
  627. begin
  628. hp := tai_regalloc.DeAlloc(reg,nil);
  629. insertLLItem(p1.previous,p1,hp);
  630. end;
  631. end;
  632. end;
  633. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  634. var
  635. p: taicpu;
  636. begin
  637. if not assigned(hp) or
  638. (hp.typ <> ait_instruction) then
  639. begin
  640. Result := false;
  641. exit;
  642. end;
  643. p := taicpu(hp);
  644. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  645. with insprop[p.opcode] do
  646. begin
  647. case getsubreg(reg) of
  648. R_SUBW,R_SUBD,R_SUBQ:
  649. Result:=
  650. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  651. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  652. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  653. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  654. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  655. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  656. R_SUBFLAGCARRY:
  657. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  658. R_SUBFLAGPARITY:
  659. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  660. R_SUBFLAGAUXILIARY:
  661. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  662. R_SUBFLAGZERO:
  663. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  664. R_SUBFLAGSIGN:
  665. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  666. R_SUBFLAGOVERFLOW:
  667. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGINTERRUPT:
  669. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  670. R_SUBFLAGDIRECTION:
  671. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  672. else
  673. internalerror(2017050501);
  674. end;
  675. exit;
  676. end;
  677. Result :=
  678. (((p.opcode = A_MOV) or
  679. (p.opcode = A_MOVZX) or
  680. (p.opcode = A_MOVSX) or
  681. (p.opcode = A_LEA) or
  682. (p.opcode = A_VMOVSS) or
  683. (p.opcode = A_VMOVSD) or
  684. (p.opcode = A_VMOVAPD) or
  685. (p.opcode = A_VMOVAPS) or
  686. (p.opcode = A_VMOVQ) or
  687. (p.opcode = A_MOVSS) or
  688. (p.opcode = A_MOVSD) or
  689. (p.opcode = A_MOVQ) or
  690. (p.opcode = A_MOVAPD) or
  691. (p.opcode = A_MOVAPS) or
  692. {$ifndef x86_64}
  693. (p.opcode = A_LDS) or
  694. (p.opcode = A_LES) or
  695. {$endif not x86_64}
  696. (p.opcode = A_LFS) or
  697. (p.opcode = A_LGS) or
  698. (p.opcode = A_LSS)) and
  699. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  700. (p.oper[1]^.typ = top_reg) and
  701. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  702. ((p.oper[0]^.typ = top_const) or
  703. ((p.oper[0]^.typ = top_reg) and
  704. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  705. ((p.oper[0]^.typ = top_ref) and
  706. not RegInRef(reg,p.oper[0]^.ref^)))) or
  707. ((p.opcode = A_POP) and
  708. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  709. ((p.opcode = A_IMUL) and
  710. (p.ops=3) and
  711. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  712. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  713. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  714. ((((p.opcode = A_IMUL) or
  715. (p.opcode = A_MUL)) and
  716. (p.ops=1)) and
  717. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  718. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  719. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  720. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  721. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  722. {$ifdef x86_64}
  723. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  724. {$endif x86_64}
  725. )) or
  726. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  727. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  728. {$ifdef x86_64}
  729. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  730. {$endif x86_64}
  731. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  732. {$ifndef x86_64}
  733. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  734. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  735. {$endif not x86_64}
  736. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  737. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  738. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  739. {$ifndef x86_64}
  740. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  741. {$endif not x86_64}
  742. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  743. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  744. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  745. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  746. {$ifdef x86_64}
  747. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  748. {$endif x86_64}
  749. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  750. (((p.opcode = A_FSTSW) or
  751. (p.opcode = A_FNSTSW)) and
  752. (p.oper[0]^.typ=top_reg) and
  753. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  754. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  755. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  756. (p.oper[0]^.reg=p.oper[1]^.reg) and
  757. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  758. end;
  759. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  760. var
  761. hp2,hp3 : tai;
  762. begin
  763. { some x86-64 issue a NOP before the real exit code }
  764. if MatchInstruction(p,A_NOP,[]) then
  765. GetNextInstruction(p,p);
  766. result:=assigned(p) and (p.typ=ait_instruction) and
  767. ((taicpu(p).opcode = A_RET) or
  768. ((taicpu(p).opcode=A_LEAVE) and
  769. GetNextInstruction(p,hp2) and
  770. MatchInstruction(hp2,A_RET,[S_NO])
  771. ) or
  772. ((((taicpu(p).opcode=A_MOV) and
  773. MatchOpType(taicpu(p),top_reg,top_reg) and
  774. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  775. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  776. ((taicpu(p).opcode=A_LEA) and
  777. MatchOpType(taicpu(p),top_ref,top_reg) and
  778. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  779. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  780. )
  781. ) and
  782. GetNextInstruction(p,hp2) and
  783. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  784. MatchOpType(taicpu(hp2),top_reg) and
  785. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  786. GetNextInstruction(hp2,hp3) and
  787. MatchInstruction(hp3,A_RET,[S_NO])
  788. )
  789. );
  790. end;
  791. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  792. begin
  793. isFoldableArithOp := False;
  794. case hp1.opcode of
  795. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  796. isFoldableArithOp :=
  797. ((taicpu(hp1).oper[0]^.typ = top_const) or
  798. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  799. (taicpu(hp1).oper[0]^.reg <> reg))) and
  800. (taicpu(hp1).oper[1]^.typ = top_reg) and
  801. (taicpu(hp1).oper[1]^.reg = reg);
  802. A_INC,A_DEC,A_NEG,A_NOT:
  803. isFoldableArithOp :=
  804. (taicpu(hp1).oper[0]^.typ = top_reg) and
  805. (taicpu(hp1).oper[0]^.reg = reg);
  806. end;
  807. end;
  808. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  809. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  810. var
  811. hp2: tai;
  812. begin
  813. hp2 := p;
  814. repeat
  815. hp2 := tai(hp2.previous);
  816. if assigned(hp2) and
  817. (hp2.typ = ait_regalloc) and
  818. (tai_regalloc(hp2).ratype=ra_dealloc) and
  819. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  820. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  821. begin
  822. asml.remove(hp2);
  823. hp2.free;
  824. break;
  825. end;
  826. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  827. end;
  828. begin
  829. case current_procinfo.procdef.returndef.typ of
  830. arraydef,recorddef,pointerdef,
  831. stringdef,enumdef,procdef,objectdef,errordef,
  832. filedef,setdef,procvardef,
  833. classrefdef,forwarddef:
  834. DoRemoveLastDeallocForFuncRes(RS_EAX);
  835. orddef:
  836. if current_procinfo.procdef.returndef.size <> 0 then
  837. begin
  838. DoRemoveLastDeallocForFuncRes(RS_EAX);
  839. { for int64/qword }
  840. if current_procinfo.procdef.returndef.size = 8 then
  841. DoRemoveLastDeallocForFuncRes(RS_EDX);
  842. end;
  843. end;
  844. end;
  845. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  846. var
  847. TmpUsedRegs : TAllUsedRegs;
  848. hp1,hp2 : tai;
  849. alloc ,dealloc: tai_regalloc;
  850. begin
  851. result:=false;
  852. if MatchOpType(taicpu(p),top_reg,top_reg) and
  853. GetNextInstruction(p, hp1) and
  854. (hp1.typ = ait_instruction) and
  855. GetNextInstruction(hp1, hp2) and
  856. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  857. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  858. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  859. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  860. (((taicpu(p).opcode=A_MOVAPS) and
  861. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  862. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  863. ((taicpu(p).opcode=A_MOVAPD) and
  864. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  865. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  866. ) then
  867. { change
  868. movapX reg,reg2
  869. addsX/subsX/... reg3, reg2
  870. movapX reg2,reg
  871. to
  872. addsX/subsX/... reg3,reg
  873. }
  874. begin
  875. CopyUsedRegs(TmpUsedRegs);
  876. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  877. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  878. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  879. begin
  880. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  881. std_op2str[taicpu(p).opcode]+' '+
  882. std_op2str[taicpu(hp1).opcode]+' '+
  883. std_op2str[taicpu(hp2).opcode]+')',p);
  884. { we cannot eliminate the first move if
  885. the operations uses the same register for source and dest }
  886. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  887. begin
  888. asml.remove(p);
  889. p.Free;
  890. end;
  891. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  892. asml.remove(hp2);
  893. hp2.Free;
  894. p:=hp1;
  895. result:=true;
  896. end;
  897. ReleaseUsedRegs(TmpUsedRegs);
  898. end
  899. end;
  900. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  901. var
  902. TmpUsedRegs : TAllUsedRegs;
  903. hp1,hp2 : tai;
  904. begin
  905. result:=false;
  906. if MatchOpType(taicpu(p),top_reg,top_reg) then
  907. begin
  908. { vmova* reg1,reg1
  909. =>
  910. <nop> }
  911. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  912. begin
  913. GetNextInstruction(p,hp1);
  914. asml.Remove(p);
  915. p.Free;
  916. p:=hp1;
  917. result:=true;
  918. end
  919. else if GetNextInstruction(p,hp1) then
  920. begin
  921. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  922. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  923. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  924. begin
  925. { vmova* reg1,reg2
  926. vmova* reg2,reg3
  927. dealloc reg2
  928. =>
  929. vmova* reg1,reg3 }
  930. CopyUsedRegs(TmpUsedRegs);
  931. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  932. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  933. begin
  934. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  935. asml.Remove(hp1);
  936. hp1.Free;
  937. result:=true;
  938. end
  939. { special case:
  940. vmova* reg1,reg2
  941. vmova* reg2,reg1
  942. =>
  943. vmova* reg1,reg2 }
  944. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  945. begin
  946. asml.Remove(hp1);
  947. hp1.Free;
  948. result:=true;
  949. end
  950. end
  951. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  952. { we mix single and double opperations here because we assume that the compiler
  953. generates vmovapd only after double operations and vmovaps only after single operations }
  954. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  955. GetNextInstruction(hp1,hp2) and
  956. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  957. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  958. begin
  959. CopyUsedRegs(TmpUsedRegs);
  960. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  961. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  962. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  963. then
  964. begin
  965. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  966. asml.Remove(p);
  967. p.Free;
  968. asml.Remove(hp2);
  969. hp2.Free;
  970. p:=hp1;
  971. end;
  972. end;
  973. end;
  974. end;
  975. end;
  976. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  977. var
  978. TmpUsedRegs : TAllUsedRegs;
  979. hp1 : tai;
  980. begin
  981. result:=false;
  982. if GetNextInstruction(p,hp1) and
  983. { we mix single and double opperations here because we assume that the compiler
  984. generates vmovapd only after double operations and vmovaps only after single operations }
  985. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  986. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  987. (taicpu(hp1).oper[1]^.typ=top_reg) then
  988. begin
  989. CopyUsedRegs(TmpUsedRegs);
  990. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  991. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  992. ) then
  993. begin
  994. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  995. asml.Remove(hp1);
  996. hp1.Free;
  997. result:=true;
  998. end;
  999. end;
  1000. end;
  1001. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1002. var
  1003. hp1, hp2: tai;
  1004. TmpUsedRegs : TAllUsedRegs;
  1005. GetNextIntruction_p : Boolean;
  1006. begin
  1007. Result:=false;
  1008. { remove mov reg1,reg1? }
  1009. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1010. begin
  1011. GetNextInstruction(p, hp1);
  1012. DebugMsg('PeepHole Optimization,Mov2Nop',p);
  1013. asml.remove(p);
  1014. p.free;
  1015. p:=hp1;
  1016. Result:=true;
  1017. exit;
  1018. end;
  1019. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  1020. if GetNextIntruction_p and
  1021. MatchInstruction(hp1,A_AND,[]) and
  1022. (taicpu(p).oper[1]^.typ = top_reg) and
  1023. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1024. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1025. case taicpu(p).opsize Of
  1026. S_L:
  1027. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1028. begin
  1029. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  1030. asml.remove(hp1);
  1031. hp1.free;
  1032. Result:=true;
  1033. exit;
  1034. end;
  1035. end
  1036. else if GetNextIntruction_p and
  1037. MatchInstruction(hp1,A_MOV,[]) and
  1038. (taicpu(p).oper[1]^.typ = top_reg) and
  1039. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1040. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1041. begin
  1042. CopyUsedRegs(TmpUsedRegs);
  1043. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1044. { we have
  1045. mov x, %treg
  1046. mov %treg, y
  1047. }
  1048. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1049. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1050. { we've got
  1051. mov x, %treg
  1052. mov %treg, y
  1053. with %treg is not used after }
  1054. case taicpu(p).oper[0]^.typ Of
  1055. top_reg:
  1056. begin
  1057. { change
  1058. mov %reg, %treg
  1059. mov %treg, y
  1060. to
  1061. mov %reg, y
  1062. }
  1063. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1064. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  1065. asml.remove(hp1);
  1066. hp1.free;
  1067. ReleaseUsedRegs(TmpUsedRegs);
  1068. Result:=true;
  1069. Exit;
  1070. end;
  1071. top_ref:
  1072. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1073. begin
  1074. { change
  1075. mov mem, %treg
  1076. mov %treg, %reg
  1077. to
  1078. mov mem, %reg"
  1079. }
  1080. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1081. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  1082. asml.remove(hp1);
  1083. hp1.free;
  1084. ReleaseUsedRegs(TmpUsedRegs);
  1085. Result:=true;
  1086. Exit;
  1087. end;
  1088. end;
  1089. ReleaseUsedRegs(TmpUsedRegs);
  1090. end
  1091. else
  1092. { Change
  1093. mov %reg1, %reg2
  1094. xxx %reg2, ???
  1095. to
  1096. mov %reg1, %reg2
  1097. xxx %reg1, ???
  1098. to avoid a write/read penalty
  1099. }
  1100. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1101. GetNextInstruction(p,hp1) and
  1102. (tai(hp1).typ = ait_instruction) and
  1103. (taicpu(hp1).ops >= 1) and
  1104. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1105. { we have
  1106. mov %reg1, %reg2
  1107. XXX %reg2, ???
  1108. }
  1109. begin
  1110. if ((taicpu(hp1).opcode = A_OR) or
  1111. (taicpu(hp1).opcode = A_TEST)) and
  1112. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1113. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1114. { we have
  1115. mov %reg1, %reg2
  1116. test/or %reg2, %reg2
  1117. }
  1118. begin
  1119. CopyUsedRegs(TmpUsedRegs);
  1120. { reg1 will be used after the first instruction,
  1121. so update the allocation info }
  1122. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1123. if GetNextInstruction(hp1, hp2) and
  1124. (hp2.typ = ait_instruction) and
  1125. taicpu(hp2).is_jmp and
  1126. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1127. { change
  1128. mov %reg1, %reg2
  1129. test/or %reg2, %reg2
  1130. jxx
  1131. to
  1132. test %reg1, %reg1
  1133. jxx
  1134. }
  1135. begin
  1136. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1137. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1138. asml.remove(p);
  1139. p.free;
  1140. p := hp1;
  1141. ReleaseUsedRegs(TmpUsedRegs);
  1142. Exit;
  1143. end
  1144. else
  1145. { change
  1146. mov %reg1, %reg2
  1147. test/or %reg2, %reg2
  1148. to
  1149. mov %reg1, %reg2
  1150. test/or %reg1, %reg1
  1151. }
  1152. begin
  1153. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1154. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1155. end;
  1156. ReleaseUsedRegs(TmpUsedRegs);
  1157. end
  1158. end
  1159. else
  1160. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1161. x >= RetOffset) as it doesn't do anything (it writes either to a
  1162. parameter or to the temporary storage room for the function
  1163. result)
  1164. }
  1165. if GetNextIntruction_p and
  1166. (tai(hp1).typ = ait_instruction) then
  1167. begin
  1168. if IsExitCode(hp1) and
  1169. MatchOpType(taicpu(p),top_reg,top_ref) and
  1170. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1171. not(assigned(current_procinfo.procdef.funcretsym) and
  1172. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1173. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1174. begin
  1175. asml.remove(p);
  1176. p.free;
  1177. p:=hp1;
  1178. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1179. RemoveLastDeallocForFuncRes(p);
  1180. exit;
  1181. end
  1182. { change
  1183. mov reg1, mem1
  1184. test/cmp x, mem1
  1185. to
  1186. mov reg1, mem1
  1187. test/cmp x, reg1
  1188. }
  1189. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1190. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1191. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1192. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1193. begin
  1194. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1195. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1196. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1197. end;
  1198. end;
  1199. { Next instruction is also a MOV ? }
  1200. if GetNextIntruction_p and
  1201. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1202. begin
  1203. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1204. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1205. { mov reg1, mem1 or mov mem1, reg1
  1206. mov mem2, reg2 mov reg2, mem2}
  1207. begin
  1208. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1209. { mov reg1, mem1 or mov mem1, reg1
  1210. mov mem2, reg1 mov reg2, mem1}
  1211. begin
  1212. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1213. { Removes the second statement from
  1214. mov reg1, mem1/reg2
  1215. mov mem1/reg2, reg1 }
  1216. begin
  1217. if taicpu(p).oper[0]^.typ=top_reg then
  1218. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1219. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  1220. asml.remove(hp1);
  1221. hp1.free;
  1222. Result:=true;
  1223. exit;
  1224. end
  1225. else
  1226. begin
  1227. CopyUsedRegs(TmpUsedRegs);
  1228. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1229. if (taicpu(p).oper[1]^.typ = top_ref) and
  1230. { mov reg1, mem1
  1231. mov mem2, reg1 }
  1232. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1233. GetNextInstruction(hp1, hp2) and
  1234. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1235. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1236. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1237. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1238. { change to
  1239. mov reg1, mem1 mov reg1, mem1
  1240. mov mem2, reg1 cmp reg1, mem2
  1241. cmp mem1, reg1
  1242. }
  1243. begin
  1244. asml.remove(hp2);
  1245. hp2.free;
  1246. taicpu(hp1).opcode := A_CMP;
  1247. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1248. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1249. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1250. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  1251. end;
  1252. ReleaseUsedRegs(TmpUsedRegs);
  1253. end;
  1254. end
  1255. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1256. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1257. begin
  1258. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1259. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1260. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  1261. end
  1262. else
  1263. begin
  1264. CopyUsedRegs(TmpUsedRegs);
  1265. if GetNextInstruction(hp1, hp2) and
  1266. MatchOpType(taicpu(p),top_ref,top_reg) and
  1267. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1268. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1269. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1270. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1271. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1272. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1273. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1274. { mov mem1, %reg1
  1275. mov %reg1, mem2
  1276. mov mem2, reg2
  1277. to:
  1278. mov mem1, reg2
  1279. mov reg2, mem2}
  1280. begin
  1281. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1282. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  1283. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1284. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1285. asml.remove(hp2);
  1286. hp2.free;
  1287. end
  1288. {$ifdef i386}
  1289. { this is enabled for i386 only, as the rules to create the reg sets below
  1290. are too complicated for x86-64, so this makes this code too error prone
  1291. on x86-64
  1292. }
  1293. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1294. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1295. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1296. { mov mem1, reg1 mov mem1, reg1
  1297. mov reg1, mem2 mov reg1, mem2
  1298. mov mem2, reg2 mov mem2, reg1
  1299. to: to:
  1300. mov mem1, reg1 mov mem1, reg1
  1301. mov mem1, reg2 mov reg1, mem2
  1302. mov reg1, mem2
  1303. or (if mem1 depends on reg1
  1304. and/or if mem2 depends on reg2)
  1305. to:
  1306. mov mem1, reg1
  1307. mov reg1, mem2
  1308. mov reg1, reg2
  1309. }
  1310. begin
  1311. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1312. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1313. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1314. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1315. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1316. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1317. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1318. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1319. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1320. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1321. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1322. end
  1323. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1324. begin
  1325. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1326. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1327. end
  1328. else
  1329. begin
  1330. asml.remove(hp2);
  1331. hp2.free;
  1332. end
  1333. {$endif i386}
  1334. ;
  1335. ReleaseUsedRegs(TmpUsedRegs);
  1336. end;
  1337. end
  1338. (* { movl [mem1],reg1
  1339. movl [mem1],reg2
  1340. to
  1341. movl [mem1],reg1
  1342. movl reg1,reg2
  1343. }
  1344. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1345. (taicpu(p).oper[1]^.typ = top_reg) and
  1346. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1347. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1348. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1349. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1350. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1351. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1352. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1353. else*)
  1354. { movl const1,[mem1]
  1355. movl [mem1],reg1
  1356. to
  1357. movl const1,reg1
  1358. movl reg1,[mem1]
  1359. }
  1360. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1361. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1362. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1363. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1364. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1365. begin
  1366. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1367. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1368. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1369. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1370. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1371. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1372. end
  1373. end
  1374. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1375. GetNextIntruction_p and
  1376. (hp1.typ = ait_instruction) and
  1377. GetNextInstruction(hp1, hp2) and
  1378. MatchInstruction(hp2,A_MOV,[]) and
  1379. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1380. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1381. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1382. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1383. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1384. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1385. ) then
  1386. { change movsX/movzX reg/ref, reg2
  1387. add/sub/or/... reg3/$const, reg2
  1388. mov reg2 reg/ref
  1389. to add/sub/or/... reg3/$const, reg/ref }
  1390. begin
  1391. CopyUsedRegs(TmpUsedRegs);
  1392. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1393. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1394. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1395. begin
  1396. { by example:
  1397. movswl %si,%eax movswl %si,%eax p
  1398. decl %eax addl %edx,%eax hp1
  1399. movw %ax,%si movw %ax,%si hp2
  1400. ->
  1401. movswl %si,%eax movswl %si,%eax p
  1402. decw %eax addw %edx,%eax hp1
  1403. movw %ax,%si movw %ax,%si hp2
  1404. }
  1405. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1406. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1407. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1408. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1409. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1410. {
  1411. ->
  1412. movswl %si,%eax movswl %si,%eax p
  1413. decw %si addw %dx,%si hp1
  1414. movw %ax,%si movw %ax,%si hp2
  1415. }
  1416. case taicpu(hp1).ops of
  1417. 1:
  1418. begin
  1419. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1420. if taicpu(hp1).oper[0]^.typ=top_reg then
  1421. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1422. end;
  1423. 2:
  1424. begin
  1425. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1426. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1427. (taicpu(hp1).opcode<>A_SHL) and
  1428. (taicpu(hp1).opcode<>A_SHR) and
  1429. (taicpu(hp1).opcode<>A_SAR) then
  1430. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1431. end;
  1432. else
  1433. internalerror(2008042701);
  1434. end;
  1435. {
  1436. ->
  1437. decw %si addw %dx,%si p
  1438. }
  1439. asml.remove(p);
  1440. asml.remove(hp2);
  1441. p.Free;
  1442. hp2.Free;
  1443. p := hp1;
  1444. end;
  1445. ReleaseUsedRegs(TmpUsedRegs);
  1446. end
  1447. else if GetNextIntruction_p and
  1448. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1449. GetNextInstruction(hp1, hp2) and
  1450. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1451. MatchOperand(Taicpu(p).oper[0]^,0) and
  1452. (Taicpu(p).oper[1]^.typ = top_reg) and
  1453. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1454. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1455. { mov reg1,0
  1456. bts reg1,operand1 --> mov reg1,operand2
  1457. or reg1,operand2 bts reg1,operand1}
  1458. begin
  1459. Taicpu(hp2).opcode:=A_MOV;
  1460. asml.remove(hp1);
  1461. insertllitem(hp2,hp2.next,hp1);
  1462. asml.remove(p);
  1463. p.free;
  1464. p:=hp1;
  1465. end
  1466. else if GetNextIntruction_p and
  1467. MatchInstruction(hp1,A_LEA,[S_L]) and
  1468. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1469. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1470. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1471. ) or
  1472. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1473. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1474. )
  1475. ) then
  1476. { mov reg1,ref
  1477. lea reg2,[reg1,reg2]
  1478. to
  1479. add reg2,ref}
  1480. begin
  1481. CopyUsedRegs(TmpUsedRegs);
  1482. { reg1 may not be used afterwards }
  1483. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1484. begin
  1485. Taicpu(hp1).opcode:=A_ADD;
  1486. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1487. DebugMsg('Peephole MovLea2Add done',hp1);
  1488. asml.remove(p);
  1489. p.free;
  1490. p:=hp1;
  1491. end;
  1492. ReleaseUsedRegs(TmpUsedRegs);
  1493. end;
  1494. end;
  1495. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1496. var
  1497. hp1 : tai;
  1498. begin
  1499. Result:=false;
  1500. if GetNextInstruction(p,hp1) and
  1501. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) then
  1502. begin
  1503. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1504. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1505. { movXX reg1, mem1 or movXX mem1, reg1
  1506. movXX mem2, reg2 movXX reg2, mem2}
  1507. begin
  1508. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1509. { movXX reg1, mem1 or movXX mem1, reg1
  1510. movXX mem2, reg1 movXX reg2, mem1}
  1511. begin
  1512. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1513. begin
  1514. { Removes the second statement from
  1515. movXX reg1, mem1/reg2
  1516. movXX mem1/reg2, reg1
  1517. }
  1518. if taicpu(p).oper[0]^.typ=top_reg then
  1519. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1520. { Removes the second statement from
  1521. movXX mem1/reg1, reg2
  1522. movXX reg2, mem1/reg1
  1523. }
  1524. if (taicpu(p).oper[1]^.typ=top_reg) and
  1525. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1526. begin
  1527. asml.remove(p);
  1528. p.free;
  1529. DebugMsg('PeepHole Optimization,MovXXMovXX2Nop 1',p);
  1530. GetNextInstruction(hp1,p);
  1531. end
  1532. else
  1533. DebugMsg('PeepHole Optimization,MovXXMovXX2MoVXX 1',p);
  1534. asml.remove(hp1);
  1535. hp1.free;
  1536. Result:=true;
  1537. exit;
  1538. end
  1539. end;
  1540. end;
  1541. end;
  1542. end;
  1543. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1544. var
  1545. TmpUsedRegs : TAllUsedRegs;
  1546. hp1,hp2: tai;
  1547. begin
  1548. Result:=false;
  1549. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1550. GetNextInstruction(p, hp1) and
  1551. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1552. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1553. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1554. or
  1555. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1556. ) and
  1557. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1558. { mov reg1, reg2
  1559. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1560. begin
  1561. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1562. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1563. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1564. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1565. asml.remove(p);
  1566. p.free;
  1567. p := hp1;
  1568. Result:=true;
  1569. exit;
  1570. end
  1571. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1572. GetNextInstruction(p,hp1) and
  1573. (hp1.typ = ait_instruction) and
  1574. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1575. doing it separately in both branches allows to do the cheap checks
  1576. with low probability earlier }
  1577. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1578. GetNextInstruction(hp1,hp2) and
  1579. MatchInstruction(hp2,A_MOV,[])
  1580. ) or
  1581. ((taicpu(hp1).opcode=A_LEA) and
  1582. GetNextInstruction(hp1,hp2) and
  1583. MatchInstruction(hp2,A_MOV,[]) and
  1584. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1585. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1586. ) or
  1587. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1588. taicpu(p).oper[1]^.reg) and
  1589. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1590. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1591. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1592. ) and
  1593. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1594. )
  1595. ) and
  1596. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1597. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1598. begin
  1599. CopyUsedRegs(TmpUsedRegs);
  1600. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1601. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1602. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1603. { change mov (ref), reg
  1604. add/sub/or/... reg2/$const, reg
  1605. mov reg, (ref)
  1606. # release reg
  1607. to add/sub/or/... reg2/$const, (ref) }
  1608. begin
  1609. case taicpu(hp1).opcode of
  1610. A_INC,A_DEC,A_NOT,A_NEG :
  1611. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1612. A_LEA :
  1613. begin
  1614. taicpu(hp1).opcode:=A_ADD;
  1615. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1616. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1617. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1618. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1619. else
  1620. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1621. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1622. DebugMsg('Peephole FoldLea done',hp1);
  1623. end
  1624. else
  1625. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1626. end;
  1627. asml.remove(p);
  1628. asml.remove(hp2);
  1629. p.free;
  1630. hp2.free;
  1631. p := hp1
  1632. end;
  1633. ReleaseUsedRegs(TmpUsedRegs);
  1634. end;
  1635. end;
  1636. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1637. var
  1638. TmpUsedRegs : TAllUsedRegs;
  1639. hp1 : tai;
  1640. begin
  1641. Result:=false;
  1642. if (taicpu(p).ops >= 2) and
  1643. ((taicpu(p).oper[0]^.typ = top_const) or
  1644. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1645. (taicpu(p).oper[1]^.typ = top_reg) and
  1646. ((taicpu(p).ops = 2) or
  1647. ((taicpu(p).oper[2]^.typ = top_reg) and
  1648. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1649. GetLastInstruction(p,hp1) and
  1650. MatchInstruction(hp1,A_MOV,[]) and
  1651. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1652. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1653. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1654. begin
  1655. CopyUsedRegs(TmpUsedRegs);
  1656. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1657. { change
  1658. mov reg1,reg2
  1659. imul y,reg2 to imul y,reg1,reg2 }
  1660. begin
  1661. taicpu(p).ops := 3;
  1662. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1663. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1664. DebugMsg('Peephole MovImul2Imul done',p);
  1665. asml.remove(hp1);
  1666. hp1.free;
  1667. result:=true;
  1668. end;
  1669. ReleaseUsedRegs(TmpUsedRegs);
  1670. end;
  1671. end;
  1672. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1673. var
  1674. hp1 : tai;
  1675. begin
  1676. {
  1677. change
  1678. jmp .L1
  1679. ...
  1680. .L1:
  1681. ret
  1682. into
  1683. ret
  1684. }
  1685. result:=false;
  1686. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1687. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1688. begin
  1689. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1690. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1691. MatchInstruction(hp1,A_RET,[S_NO]) then
  1692. begin
  1693. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1694. taicpu(p).opcode:=A_RET;
  1695. taicpu(p).is_jmp:=false;
  1696. taicpu(p).ops:=taicpu(hp1).ops;
  1697. case taicpu(hp1).ops of
  1698. 0:
  1699. taicpu(p).clearop(0);
  1700. 1:
  1701. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1702. else
  1703. internalerror(2016041301);
  1704. end;
  1705. result:=true;
  1706. end;
  1707. end;
  1708. end;
  1709. function CanBeCMOV(p : tai) : boolean;
  1710. begin
  1711. CanBeCMOV:=assigned(p) and
  1712. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1713. { we can't use cmov ref,reg because
  1714. ref could be nil and cmov still throws an exception
  1715. if ref=nil but the mov isn't done (FK)
  1716. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1717. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1718. }
  1719. MatchOpType(taicpu(p),top_reg,top_reg);
  1720. end;
  1721. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1722. var
  1723. hp1,hp2,hp3: tai;
  1724. carryadd_opcode : TAsmOp;
  1725. l : Longint;
  1726. condition : TAsmCond;
  1727. begin
  1728. { jb @@1 cmc
  1729. inc/dec operand --> adc/sbb operand,0
  1730. @@1:
  1731. ... and ...
  1732. jnb @@1
  1733. inc/dec operand --> adc/sbb operand,0
  1734. @@1: }
  1735. result:=false;
  1736. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1737. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1738. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1739. begin
  1740. carryadd_opcode:=A_NONE;
  1741. if Taicpu(p).condition in [C_NAE,C_B] then
  1742. begin
  1743. if Taicpu(hp1).opcode=A_INC then
  1744. carryadd_opcode:=A_ADC;
  1745. if Taicpu(hp1).opcode=A_DEC then
  1746. carryadd_opcode:=A_SBB;
  1747. if carryadd_opcode<>A_NONE then
  1748. begin
  1749. Taicpu(p).clearop(0);
  1750. Taicpu(p).ops:=0;
  1751. Taicpu(p).is_jmp:=false;
  1752. Taicpu(p).opcode:=A_CMC;
  1753. Taicpu(p).condition:=C_NONE;
  1754. Taicpu(hp1).ops:=2;
  1755. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1756. Taicpu(hp1).loadconst(0,0);
  1757. Taicpu(hp1).opcode:=carryadd_opcode;
  1758. result:=true;
  1759. exit;
  1760. end;
  1761. end;
  1762. if Taicpu(p).condition in [C_AE,C_NB] then
  1763. begin
  1764. if Taicpu(hp1).opcode=A_INC then
  1765. carryadd_opcode:=A_ADC;
  1766. if Taicpu(hp1).opcode=A_DEC then
  1767. carryadd_opcode:=A_SBB;
  1768. if carryadd_opcode<>A_NONE then
  1769. begin
  1770. asml.remove(p);
  1771. p.free;
  1772. Taicpu(hp1).ops:=2;
  1773. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1774. Taicpu(hp1).loadconst(0,0);
  1775. Taicpu(hp1).opcode:=carryadd_opcode;
  1776. p:=hp1;
  1777. result:=true;
  1778. exit;
  1779. end;
  1780. end;
  1781. end;
  1782. {$ifndef i8086}
  1783. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1784. begin
  1785. { check for
  1786. jCC xxx
  1787. <several movs>
  1788. xxx:
  1789. }
  1790. l:=0;
  1791. GetNextInstruction(p, hp1);
  1792. while assigned(hp1) and
  1793. CanBeCMOV(hp1) and
  1794. { stop on labels }
  1795. not(hp1.typ=ait_label) do
  1796. begin
  1797. inc(l);
  1798. GetNextInstruction(hp1,hp1);
  1799. end;
  1800. if assigned(hp1) then
  1801. begin
  1802. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1803. begin
  1804. if (l<=4) and (l>0) then
  1805. begin
  1806. condition:=inverse_cond(taicpu(p).condition);
  1807. hp2:=p;
  1808. GetNextInstruction(p,hp1);
  1809. p:=hp1;
  1810. repeat
  1811. taicpu(hp1).opcode:=A_CMOVcc;
  1812. taicpu(hp1).condition:=condition;
  1813. GetNextInstruction(hp1,hp1);
  1814. until not(assigned(hp1)) or
  1815. not(CanBeCMOV(hp1));
  1816. { wait with removing else GetNextInstruction could
  1817. ignore the label if it was the only usage in the
  1818. jump moved away }
  1819. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1820. { if the label refs. reach zero, remove any alignment before the label }
  1821. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1822. begin
  1823. asml.Remove(hp1);
  1824. hp1.Free;
  1825. end;
  1826. asml.remove(hp2);
  1827. hp2.free;
  1828. result:=true;
  1829. exit;
  1830. end;
  1831. end
  1832. else
  1833. begin
  1834. { check further for
  1835. jCC xxx
  1836. <several movs 1>
  1837. jmp yyy
  1838. xxx:
  1839. <several movs 2>
  1840. yyy:
  1841. }
  1842. { hp2 points to jmp yyy }
  1843. hp2:=hp1;
  1844. { skip hp1 to xxx }
  1845. GetNextInstruction(hp1, hp1);
  1846. if assigned(hp2) and
  1847. assigned(hp1) and
  1848. (l<=3) and
  1849. (hp2.typ=ait_instruction) and
  1850. (taicpu(hp2).is_jmp) and
  1851. (taicpu(hp2).condition=C_None) and
  1852. { real label and jump, no further references to the
  1853. label are allowed }
  1854. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1855. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1856. begin
  1857. l:=0;
  1858. { skip hp1 to <several moves 2> }
  1859. GetNextInstruction(hp1, hp1);
  1860. while assigned(hp1) and
  1861. CanBeCMOV(hp1) do
  1862. begin
  1863. inc(l);
  1864. GetNextInstruction(hp1, hp1);
  1865. end;
  1866. { hp1 points to yyy: }
  1867. if assigned(hp1) and
  1868. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1869. begin
  1870. condition:=inverse_cond(taicpu(p).condition);
  1871. GetNextInstruction(p,hp1);
  1872. hp3:=p;
  1873. p:=hp1;
  1874. repeat
  1875. taicpu(hp1).opcode:=A_CMOVcc;
  1876. taicpu(hp1).condition:=condition;
  1877. GetNextInstruction(hp1,hp1);
  1878. until not(assigned(hp1)) or
  1879. not(CanBeCMOV(hp1));
  1880. { hp2 is still at jmp yyy }
  1881. GetNextInstruction(hp2,hp1);
  1882. { hp2 is now at xxx: }
  1883. condition:=inverse_cond(condition);
  1884. GetNextInstruction(hp1,hp1);
  1885. { hp1 is now at <several movs 2> }
  1886. repeat
  1887. taicpu(hp1).opcode:=A_CMOVcc;
  1888. taicpu(hp1).condition:=condition;
  1889. GetNextInstruction(hp1,hp1);
  1890. until not(assigned(hp1)) or
  1891. not(CanBeCMOV(hp1));
  1892. {
  1893. asml.remove(hp1.next)
  1894. hp1.next.free;
  1895. asml.remove(hp1);
  1896. hp1.free;
  1897. }
  1898. { remove jCC }
  1899. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1900. asml.remove(hp3);
  1901. hp3.free;
  1902. { remove jmp }
  1903. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1904. asml.remove(hp2);
  1905. hp2.free;
  1906. result:=true;
  1907. exit;
  1908. end;
  1909. end;
  1910. end;
  1911. end;
  1912. end;
  1913. {$endif i8086}
  1914. end;
  1915. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  1916. var
  1917. hp1,hp2: tai;
  1918. begin
  1919. result:=false;
  1920. if (taicpu(p).oper[1]^.typ = top_reg) and
  1921. GetNextInstruction(p,hp1) and
  1922. (hp1.typ = ait_instruction) and
  1923. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1924. GetNextInstruction(hp1,hp2) and
  1925. MatchInstruction(hp2,A_MOV,[]) and
  1926. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1927. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1928. {$ifdef i386}
  1929. { not all registers have byte size sub registers on i386 }
  1930. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  1931. {$endif i386}
  1932. (((taicpu(hp1).ops=2) and
  1933. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1934. ((taicpu(hp1).ops=1) and
  1935. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1936. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1937. begin
  1938. { change movsX/movzX reg/ref, reg2
  1939. add/sub/or/... reg3/$const, reg2
  1940. mov reg2 reg/ref
  1941. to add/sub/or/... reg3/$const, reg/ref }
  1942. { by example:
  1943. movswl %si,%eax movswl %si,%eax p
  1944. decl %eax addl %edx,%eax hp1
  1945. movw %ax,%si movw %ax,%si hp2
  1946. ->
  1947. movswl %si,%eax movswl %si,%eax p
  1948. decw %eax addw %edx,%eax hp1
  1949. movw %ax,%si movw %ax,%si hp2
  1950. }
  1951. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1952. {
  1953. ->
  1954. movswl %si,%eax movswl %si,%eax p
  1955. decw %si addw %dx,%si hp1
  1956. movw %ax,%si movw %ax,%si hp2
  1957. }
  1958. case taicpu(hp1).ops of
  1959. 1:
  1960. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1961. 2:
  1962. begin
  1963. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1964. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1965. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1966. end;
  1967. else
  1968. internalerror(2008042701);
  1969. end;
  1970. {
  1971. ->
  1972. decw %si addw %dx,%si p
  1973. }
  1974. DebugMsg('PeepHole Optimization,var3',p);
  1975. asml.remove(p);
  1976. asml.remove(hp2);
  1977. p.free;
  1978. hp2.free;
  1979. p:=hp1;
  1980. end
  1981. { removes superfluous And's after movzx's }
  1982. else if taicpu(p).opcode=A_MOVZX then
  1983. begin
  1984. if (taicpu(p).oper[1]^.typ = top_reg) and
  1985. GetNextInstruction(p, hp1) and
  1986. (tai(hp1).typ = ait_instruction) and
  1987. (taicpu(hp1).opcode = A_AND) and
  1988. (taicpu(hp1).oper[0]^.typ = top_const) and
  1989. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1990. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1991. begin
  1992. case taicpu(p).opsize Of
  1993. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  1994. if (taicpu(hp1).oper[0]^.val = $ff) then
  1995. begin
  1996. DebugMsg('PeepHole Optimization,var4',p);
  1997. asml.remove(hp1);
  1998. hp1.free;
  1999. end;
  2000. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2001. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2002. begin
  2003. DebugMsg('PeepHole Optimization,var5',p);
  2004. asml.remove(hp1);
  2005. hp1.free;
  2006. end;
  2007. {$ifdef x86_64}
  2008. S_LQ:
  2009. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2010. begin
  2011. if (cs_asm_source in current_settings.globalswitches) then
  2012. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2013. asml.remove(hp1);
  2014. hp1.Free;
  2015. end;
  2016. {$endif x86_64}
  2017. end;
  2018. end;
  2019. { changes some movzx constructs to faster synonims (all examples
  2020. are given with eax/ax, but are also valid for other registers)}
  2021. if (taicpu(p).oper[1]^.typ = top_reg) then
  2022. if (taicpu(p).oper[0]^.typ = top_reg) then
  2023. case taicpu(p).opsize of
  2024. S_BW:
  2025. begin
  2026. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2027. not(cs_opt_size in current_settings.optimizerswitches) then
  2028. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2029. begin
  2030. taicpu(p).opcode := A_AND;
  2031. taicpu(p).changeopsize(S_W);
  2032. taicpu(p).loadConst(0,$ff);
  2033. DebugMsg('PeepHole Optimization,var7',p);
  2034. end
  2035. else if GetNextInstruction(p, hp1) and
  2036. (tai(hp1).typ = ait_instruction) and
  2037. (taicpu(hp1).opcode = A_AND) and
  2038. (taicpu(hp1).oper[0]^.typ = top_const) and
  2039. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2040. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2041. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2042. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2043. begin
  2044. DebugMsg('PeepHole Optimization,var8',p);
  2045. taicpu(p).opcode := A_MOV;
  2046. taicpu(p).changeopsize(S_W);
  2047. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2048. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2049. end;
  2050. end;
  2051. S_BL:
  2052. begin
  2053. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2054. not(cs_opt_size in current_settings.optimizerswitches) then
  2055. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2056. begin
  2057. taicpu(p).opcode := A_AND;
  2058. taicpu(p).changeopsize(S_L);
  2059. taicpu(p).loadConst(0,$ff)
  2060. end
  2061. else if GetNextInstruction(p, hp1) and
  2062. (tai(hp1).typ = ait_instruction) and
  2063. (taicpu(hp1).opcode = A_AND) and
  2064. (taicpu(hp1).oper[0]^.typ = top_const) and
  2065. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2066. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2067. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2068. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2069. begin
  2070. DebugMsg('PeepHole Optimization,var10',p);
  2071. taicpu(p).opcode := A_MOV;
  2072. taicpu(p).changeopsize(S_L);
  2073. { do not use R_SUBWHOLE
  2074. as movl %rdx,%eax
  2075. is invalid in assembler PM }
  2076. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2077. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2078. end
  2079. end;
  2080. {$ifndef i8086}
  2081. S_WL:
  2082. begin
  2083. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2084. not(cs_opt_size in current_settings.optimizerswitches) then
  2085. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2086. begin
  2087. DebugMsg('PeepHole Optimization,var11',p);
  2088. taicpu(p).opcode := A_AND;
  2089. taicpu(p).changeopsize(S_L);
  2090. taicpu(p).loadConst(0,$ffff);
  2091. end
  2092. else if GetNextInstruction(p, hp1) and
  2093. (tai(hp1).typ = ait_instruction) and
  2094. (taicpu(hp1).opcode = A_AND) and
  2095. (taicpu(hp1).oper[0]^.typ = top_const) and
  2096. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2097. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2098. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2099. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2100. begin
  2101. DebugMsg('PeepHole Optimization,var12',p);
  2102. taicpu(p).opcode := A_MOV;
  2103. taicpu(p).changeopsize(S_L);
  2104. { do not use R_SUBWHOLE
  2105. as movl %rdx,%eax
  2106. is invalid in assembler PM }
  2107. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2108. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2109. end;
  2110. end;
  2111. {$endif i8086}
  2112. end
  2113. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2114. begin
  2115. if GetNextInstruction(p, hp1) and
  2116. (tai(hp1).typ = ait_instruction) and
  2117. (taicpu(hp1).opcode = A_AND) and
  2118. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2119. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2120. begin
  2121. taicpu(p).opcode := A_MOV;
  2122. case taicpu(p).opsize Of
  2123. S_BL:
  2124. begin
  2125. DebugMsg('PeepHole Optimization,var13',p);
  2126. taicpu(p).changeopsize(S_L);
  2127. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2128. end;
  2129. S_WL:
  2130. begin
  2131. DebugMsg('PeepHole Optimization,var14',p);
  2132. taicpu(p).changeopsize(S_L);
  2133. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2134. end;
  2135. S_BW:
  2136. begin
  2137. DebugMsg('PeepHole Optimization,var15',p);
  2138. taicpu(p).changeopsize(S_W);
  2139. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2140. end;
  2141. {$ifdef x86_64}
  2142. S_BQ:
  2143. begin
  2144. DebugMsg('PeepHole Optimization,var16',p);
  2145. taicpu(p).changeopsize(S_Q);
  2146. taicpu(hp1).loadConst(
  2147. 0, taicpu(hp1).oper[0]^.val and $ff);
  2148. end;
  2149. S_WQ:
  2150. begin
  2151. DebugMsg('PeepHole Optimization,var17',p);
  2152. taicpu(p).changeopsize(S_Q);
  2153. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2154. end;
  2155. S_LQ:
  2156. begin
  2157. DebugMsg('PeepHole Optimization,var18',p);
  2158. taicpu(p).changeopsize(S_Q);
  2159. taicpu(hp1).loadConst(
  2160. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2161. end;
  2162. {$endif x86_64}
  2163. else
  2164. Internalerror(2017050704)
  2165. end;
  2166. end;
  2167. end;
  2168. end;
  2169. end;
  2170. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2171. var
  2172. hp1 : tai;
  2173. begin
  2174. Result:=false;
  2175. if not(GetNextInstruction(p, hp1)) then
  2176. exit;
  2177. if MatchOpType(taicpu(p),top_const,top_reg) and
  2178. MatchInstruction(hp1,A_AND,[]) and
  2179. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2180. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2181. { the second register must contain the first one, so compare their subreg types }
  2182. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2183. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2184. { change
  2185. and const1, reg
  2186. and const2, reg
  2187. to
  2188. and (const1 and const2), reg
  2189. }
  2190. begin
  2191. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2192. DebugMsg('Peephole AndAnd2And done',hp1);
  2193. asml.remove(p);
  2194. p.Free;
  2195. p:=hp1;
  2196. Result:=true;
  2197. exit;
  2198. end
  2199. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2200. MatchInstruction(hp1,A_MOVZX,[]) and
  2201. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2202. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2203. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2204. (((taicpu(p).opsize=S_W) and
  2205. (taicpu(hp1).opsize=S_BW)) or
  2206. ((taicpu(p).opsize=S_L) and
  2207. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2208. {$ifdef x86_64}
  2209. or
  2210. ((taicpu(p).opsize=S_Q) and
  2211. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2212. {$endif x86_64}
  2213. ) then
  2214. begin
  2215. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2216. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2217. ) or
  2218. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2219. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2220. {$ifdef x86_64}
  2221. or
  2222. (((taicpu(hp1).opsize)=S_LQ) and
  2223. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2224. )
  2225. {$endif x86_64}
  2226. then
  2227. begin
  2228. DebugMsg('Peephole AndMovzToAnd done',p);
  2229. asml.remove(hp1);
  2230. hp1.free;
  2231. end;
  2232. end
  2233. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2234. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2235. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2236. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2237. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2238. (((taicpu(p).opsize=S_W) and
  2239. (taicpu(hp1).opsize=S_BW)) or
  2240. ((taicpu(p).opsize=S_L) and
  2241. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2242. {$ifdef x86_64}
  2243. or
  2244. ((taicpu(p).opsize=S_Q) and
  2245. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2246. {$endif x86_64}
  2247. ) then
  2248. begin
  2249. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2250. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2251. ) or
  2252. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2253. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2254. {$ifdef x86_64}
  2255. or
  2256. (((taicpu(hp1).opsize)=S_LQ) and
  2257. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2258. )
  2259. {$endif x86_64}
  2260. then
  2261. begin
  2262. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2263. asml.remove(hp1);
  2264. hp1.free;
  2265. end;
  2266. end
  2267. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2268. (hp1.typ = ait_instruction) and
  2269. (taicpu(hp1).is_jmp) and
  2270. (taicpu(hp1).opcode<>A_JMP) and
  2271. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2272. { change
  2273. and x, reg
  2274. jxx
  2275. to
  2276. test x, reg
  2277. jxx
  2278. if reg is deallocated before the
  2279. jump, but only if it's a conditional jump (PFV)
  2280. }
  2281. taicpu(p).opcode := A_TEST;
  2282. end;
  2283. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2284. begin
  2285. if MatchOperand(taicpu(p).oper[0]^,0) and
  2286. (taicpu(p).oper[1]^.typ = Top_Reg) and
  2287. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2288. { change "mov $0, %reg" into "xor %reg, %reg" }
  2289. begin
  2290. taicpu(p).opcode := A_XOR;
  2291. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2292. end;
  2293. end;
  2294. end.