aoptx86.pas 135 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. class function IsExitCode(p : tai) : boolean;
  42. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  43. procedure RemoveLastDeallocForFuncRes(p : tai);
  44. function DoSubAddOpt(var p : tai) : Boolean;
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass1Sub(var p : tai) : boolean;
  56. function OptPass1SHLSAL(var p : tai) : boolean;
  57. function OptPass2MOV(var p : tai) : boolean;
  58. function OptPass2Imul(var p : tai) : boolean;
  59. function OptPass2Jmp(var p : tai) : boolean;
  60. function OptPass2Jcc(var p : tai) : boolean;
  61. function PostPeepholeOptMov(const p : tai) : Boolean;
  62. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  63. function PostPeepholeOptMovzx(const p : tai) : Boolean;
  64. function PostPeepholeOptXor(var p : tai) : Boolean;
  65. {$endif}
  66. function PostPeepholeOptCmp(var p : tai) : Boolean;
  67. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  68. function PostPeepholeOptCall(var p : tai) : Boolean;
  69. function PostPeepholeOptLea(const p : tai) : Boolean;
  70. procedure OptReferences;
  71. end;
  72. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  73. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  74. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  75. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  76. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  77. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  78. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  79. function RefsEqual(const r1, r2: treference): boolean;
  80. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  81. { returns true, if ref is a reference using only the registers passed as base and index
  82. and having an offset }
  83. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  84. const
  85. SPeepholeOptimization: string = 'Peephole Optimization: ';
  86. implementation
  87. uses
  88. cutils,verbose,
  89. globals,
  90. cpuinfo,
  91. procinfo,
  92. aasmbase,
  93. aoptutils,
  94. symconst,symsym,
  95. cgx86,
  96. itcpugas;
  97. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  98. begin
  99. result :=
  100. (instr.typ = ait_instruction) and
  101. (taicpu(instr).opcode = op) and
  102. ((opsize = []) or (taicpu(instr).opsize in opsize));
  103. end;
  104. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  105. begin
  106. result :=
  107. (instr.typ = ait_instruction) and
  108. ((taicpu(instr).opcode = op1) or
  109. (taicpu(instr).opcode = op2)
  110. ) and
  111. ((opsize = []) or (taicpu(instr).opsize in opsize));
  112. end;
  113. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  114. begin
  115. result :=
  116. (instr.typ = ait_instruction) and
  117. ((taicpu(instr).opcode = op1) or
  118. (taicpu(instr).opcode = op2) or
  119. (taicpu(instr).opcode = op3)
  120. ) and
  121. ((opsize = []) or (taicpu(instr).opsize in opsize));
  122. end;
  123. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  124. const opsize : topsizes) : boolean;
  125. var
  126. op : TAsmOp;
  127. begin
  128. result:=false;
  129. for op in ops do
  130. begin
  131. if (instr.typ = ait_instruction) and
  132. (taicpu(instr).opcode = op) and
  133. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  134. begin
  135. result:=true;
  136. exit;
  137. end;
  138. end;
  139. end;
  140. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  141. begin
  142. result := (oper.typ = top_reg) and (oper.reg = reg);
  143. end;
  144. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  145. begin
  146. result := (oper.typ = top_const) and (oper.val = a);
  147. end;
  148. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  149. begin
  150. result := oper1.typ = oper2.typ;
  151. if result then
  152. case oper1.typ of
  153. top_const:
  154. Result:=oper1.val = oper2.val;
  155. top_reg:
  156. Result:=oper1.reg = oper2.reg;
  157. top_ref:
  158. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  159. else
  160. internalerror(2013102801);
  161. end
  162. end;
  163. function RefsEqual(const r1, r2: treference): boolean;
  164. begin
  165. RefsEqual :=
  166. (r1.offset = r2.offset) and
  167. (r1.segment = r2.segment) and (r1.base = r2.base) and
  168. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  169. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  170. (r1.relsymbol = r2.relsymbol);
  171. end;
  172. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  173. begin
  174. Result:=(ref.offset=0) and
  175. (ref.scalefactor in [0,1]) and
  176. (ref.segment=NR_NO) and
  177. (ref.symbol=nil) and
  178. (ref.relsymbol=nil) and
  179. ((base=NR_INVALID) or
  180. (ref.base=base)) and
  181. ((index=NR_INVALID) or
  182. (ref.index=index));
  183. end;
  184. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  185. begin
  186. Result:=(ref.scalefactor in [0,1]) and
  187. (ref.segment=NR_NO) and
  188. (ref.symbol=nil) and
  189. (ref.relsymbol=nil) and
  190. ((base=NR_INVALID) or
  191. (ref.base=base)) and
  192. ((index=NR_INVALID) or
  193. (ref.index=index));
  194. end;
  195. function InstrReadsFlags(p: tai): boolean;
  196. var
  197. l: longint;
  198. begin
  199. InstrReadsFlags := true;
  200. case p.typ of
  201. ait_instruction:
  202. if InsProp[taicpu(p).opcode].Ch*
  203. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  204. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  205. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  206. exit;
  207. ait_label:
  208. exit;
  209. end;
  210. InstrReadsFlags := false;
  211. end;
  212. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  213. begin
  214. Result:=RegReadByInstruction(reg,hp);
  215. end;
  216. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  217. var
  218. p: taicpu;
  219. opcount: longint;
  220. begin
  221. RegReadByInstruction := false;
  222. if hp.typ <> ait_instruction then
  223. exit;
  224. p := taicpu(hp);
  225. case p.opcode of
  226. A_CALL:
  227. regreadbyinstruction := true;
  228. A_IMUL:
  229. case p.ops of
  230. 1:
  231. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  232. (
  233. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  234. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  235. );
  236. 2,3:
  237. regReadByInstruction :=
  238. reginop(reg,p.oper[0]^) or
  239. reginop(reg,p.oper[1]^);
  240. end;
  241. A_MUL:
  242. begin
  243. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  244. (
  245. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  246. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  247. );
  248. end;
  249. A_IDIV,A_DIV:
  250. begin
  251. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  252. (
  253. (getregtype(reg)=R_INTREGISTER) and
  254. (
  255. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  256. )
  257. );
  258. end;
  259. else
  260. begin
  261. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  262. begin
  263. RegReadByInstruction := false;
  264. exit;
  265. end;
  266. for opcount := 0 to p.ops-1 do
  267. if (p.oper[opCount]^.typ = top_ref) and
  268. RegInRef(reg,p.oper[opcount]^.ref^) then
  269. begin
  270. RegReadByInstruction := true;
  271. exit
  272. end;
  273. { special handling for SSE MOVSD }
  274. if (p.opcode=A_MOVSD) and (p.ops>0) then
  275. begin
  276. if p.ops<>2 then
  277. internalerror(2017042702);
  278. regReadByInstruction := reginop(reg,p.oper[0]^) or
  279. (
  280. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  281. );
  282. exit;
  283. end;
  284. with insprop[p.opcode] do
  285. begin
  286. if getregtype(reg)=R_INTREGISTER then
  287. begin
  288. case getsupreg(reg) of
  289. RS_EAX:
  290. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  291. begin
  292. RegReadByInstruction := true;
  293. exit
  294. end;
  295. RS_ECX:
  296. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  297. begin
  298. RegReadByInstruction := true;
  299. exit
  300. end;
  301. RS_EDX:
  302. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  303. begin
  304. RegReadByInstruction := true;
  305. exit
  306. end;
  307. RS_EBX:
  308. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  309. begin
  310. RegReadByInstruction := true;
  311. exit
  312. end;
  313. RS_ESP:
  314. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  315. begin
  316. RegReadByInstruction := true;
  317. exit
  318. end;
  319. RS_EBP:
  320. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  321. begin
  322. RegReadByInstruction := true;
  323. exit
  324. end;
  325. RS_ESI:
  326. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  327. begin
  328. RegReadByInstruction := true;
  329. exit
  330. end;
  331. RS_EDI:
  332. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  333. begin
  334. RegReadByInstruction := true;
  335. exit
  336. end;
  337. end;
  338. end;
  339. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  340. begin
  341. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  342. begin
  343. case p.condition of
  344. C_A,C_NBE, { CF=0 and ZF=0 }
  345. C_BE,C_NA: { CF=1 or ZF=1 }
  346. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  347. C_AE,C_NB,C_NC, { CF=0 }
  348. C_B,C_NAE,C_C: { CF=1 }
  349. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  350. C_NE,C_NZ, { ZF=0 }
  351. C_E,C_Z: { ZF=1 }
  352. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  353. C_G,C_NLE, { ZF=0 and SF=OF }
  354. C_LE,C_NG: { ZF=1 or SF<>OF }
  355. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  356. C_GE,C_NL, { SF=OF }
  357. C_L,C_NGE: { SF<>OF }
  358. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  359. C_NO, { OF=0 }
  360. C_O: { OF=1 }
  361. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  362. C_NP,C_PO, { PF=0 }
  363. C_P,C_PE: { PF=1 }
  364. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  365. C_NS, { SF=0 }
  366. C_S: { SF=1 }
  367. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  368. else
  369. internalerror(2017042701);
  370. end;
  371. if RegReadByInstruction then
  372. exit;
  373. end;
  374. case getsubreg(reg) of
  375. R_SUBW,R_SUBD,R_SUBQ:
  376. RegReadByInstruction :=
  377. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  378. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  379. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  380. R_SUBFLAGCARRY:
  381. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  382. R_SUBFLAGPARITY:
  383. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  384. R_SUBFLAGAUXILIARY:
  385. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  386. R_SUBFLAGZERO:
  387. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  388. R_SUBFLAGSIGN:
  389. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  390. R_SUBFLAGOVERFLOW:
  391. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  392. R_SUBFLAGINTERRUPT:
  393. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  394. R_SUBFLAGDIRECTION:
  395. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  396. else
  397. internalerror(2017042601);
  398. end;
  399. exit;
  400. end;
  401. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  402. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  403. (p.oper[0]^.reg=p.oper[1]^.reg) then
  404. exit;
  405. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  406. begin
  407. RegReadByInstruction := true;
  408. exit
  409. end;
  410. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  411. begin
  412. RegReadByInstruction := true;
  413. exit
  414. end;
  415. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  416. begin
  417. RegReadByInstruction := true;
  418. exit
  419. end;
  420. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  421. begin
  422. RegReadByInstruction := true;
  423. exit
  424. end;
  425. end;
  426. end;
  427. end;
  428. end;
  429. {$ifdef DEBUG_AOPTCPU}
  430. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  431. begin
  432. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  433. end;
  434. {$else DEBUG_AOPTCPU}
  435. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  436. begin
  437. end;
  438. {$endif DEBUG_AOPTCPU}
  439. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  440. begin
  441. if not SuperRegistersEqual(reg1,reg2) then
  442. exit(false);
  443. if getregtype(reg1)<>R_INTREGISTER then
  444. exit(true); {because SuperRegisterEqual is true}
  445. case getsubreg(reg1) of
  446. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  447. higher, it preserves the high bits, so the new value depends on
  448. reg2's previous value. In other words, it is equivalent to doing:
  449. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  450. R_SUBL:
  451. exit(getsubreg(reg2)=R_SUBL);
  452. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  453. higher, it actually does a:
  454. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  455. R_SUBH:
  456. exit(getsubreg(reg2)=R_SUBH);
  457. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  458. bits of reg2:
  459. reg2 := (reg2 and $ffff0000) or word(reg1); }
  460. R_SUBW:
  461. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  462. { a write to R_SUBD always overwrites every other subregister,
  463. because it clears the high 32 bits of R_SUBQ on x86_64 }
  464. R_SUBD,
  465. R_SUBQ:
  466. exit(true);
  467. else
  468. internalerror(2017042801);
  469. end;
  470. end;
  471. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  472. begin
  473. if not SuperRegistersEqual(reg1,reg2) then
  474. exit(false);
  475. if getregtype(reg1)<>R_INTREGISTER then
  476. exit(true); {because SuperRegisterEqual is true}
  477. case getsubreg(reg1) of
  478. R_SUBL:
  479. exit(getsubreg(reg2)<>R_SUBH);
  480. R_SUBH:
  481. exit(getsubreg(reg2)<>R_SUBL);
  482. R_SUBW,
  483. R_SUBD,
  484. R_SUBQ:
  485. exit(true);
  486. else
  487. internalerror(2017042802);
  488. end;
  489. end;
  490. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  491. var
  492. hp1 : tai;
  493. l : TCGInt;
  494. begin
  495. result:=false;
  496. { changes the code sequence
  497. shr/sar const1, x
  498. shl const2, x
  499. to
  500. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  501. if GetNextInstruction(p, hp1) and
  502. MatchInstruction(hp1,A_SHL,[]) and
  503. (taicpu(p).oper[0]^.typ = top_const) and
  504. (taicpu(hp1).oper[0]^.typ = top_const) and
  505. (taicpu(hp1).opsize = taicpu(p).opsize) and
  506. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  507. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  508. begin
  509. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  510. not(cs_opt_size in current_settings.optimizerswitches) then
  511. begin
  512. { shr/sar const1, %reg
  513. shl const2, %reg
  514. with const1 > const2 }
  515. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  516. taicpu(hp1).opcode := A_AND;
  517. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  518. case taicpu(p).opsize Of
  519. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  520. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  521. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  522. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  523. else
  524. Internalerror(2017050703)
  525. end;
  526. end
  527. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  528. not(cs_opt_size in current_settings.optimizerswitches) then
  529. begin
  530. { shr/sar const1, %reg
  531. shl const2, %reg
  532. with const1 < const2 }
  533. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  534. taicpu(p).opcode := A_AND;
  535. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  536. case taicpu(p).opsize Of
  537. S_B: taicpu(p).loadConst(0,l Xor $ff);
  538. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  539. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  540. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  541. else
  542. Internalerror(2017050702)
  543. end;
  544. end
  545. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  546. begin
  547. { shr/sar const1, %reg
  548. shl const2, %reg
  549. with const1 = const2 }
  550. taicpu(p).opcode := A_AND;
  551. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  552. case taicpu(p).opsize Of
  553. S_B: taicpu(p).loadConst(0,l Xor $ff);
  554. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  555. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  556. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  557. else
  558. Internalerror(2017050701)
  559. end;
  560. asml.remove(hp1);
  561. hp1.free;
  562. end;
  563. end;
  564. end;
  565. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  566. var
  567. p: taicpu;
  568. begin
  569. if not assigned(hp) or
  570. (hp.typ <> ait_instruction) then
  571. begin
  572. Result := false;
  573. exit;
  574. end;
  575. p := taicpu(hp);
  576. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  577. with insprop[p.opcode] do
  578. begin
  579. case getsubreg(reg) of
  580. R_SUBW,R_SUBD,R_SUBQ:
  581. Result:=
  582. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  583. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  584. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  585. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  586. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  587. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  588. R_SUBFLAGCARRY:
  589. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  590. R_SUBFLAGPARITY:
  591. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  592. R_SUBFLAGAUXILIARY:
  593. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  594. R_SUBFLAGZERO:
  595. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  596. R_SUBFLAGSIGN:
  597. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  598. R_SUBFLAGOVERFLOW:
  599. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  600. R_SUBFLAGINTERRUPT:
  601. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  602. R_SUBFLAGDIRECTION:
  603. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  604. else
  605. begin
  606. writeln(getsubreg(reg));
  607. internalerror(2017050501);
  608. end;
  609. end;
  610. exit;
  611. end;
  612. Result :=
  613. (((p.opcode = A_MOV) or
  614. (p.opcode = A_MOVZX) or
  615. (p.opcode = A_MOVSX) or
  616. (p.opcode = A_LEA) or
  617. (p.opcode = A_VMOVSS) or
  618. (p.opcode = A_VMOVSD) or
  619. (p.opcode = A_VMOVAPD) or
  620. (p.opcode = A_VMOVAPS) or
  621. (p.opcode = A_VMOVQ) or
  622. (p.opcode = A_MOVSS) or
  623. (p.opcode = A_MOVSD) or
  624. (p.opcode = A_MOVQ) or
  625. (p.opcode = A_MOVAPD) or
  626. (p.opcode = A_MOVAPS) or
  627. {$ifndef x86_64}
  628. (p.opcode = A_LDS) or
  629. (p.opcode = A_LES) or
  630. {$endif not x86_64}
  631. (p.opcode = A_LFS) or
  632. (p.opcode = A_LGS) or
  633. (p.opcode = A_LSS)) and
  634. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  635. (p.oper[1]^.typ = top_reg) and
  636. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  637. ((p.oper[0]^.typ = top_const) or
  638. ((p.oper[0]^.typ = top_reg) and
  639. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  640. ((p.oper[0]^.typ = top_ref) and
  641. not RegInRef(reg,p.oper[0]^.ref^)))) or
  642. ((p.opcode = A_POP) and
  643. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  644. ((p.opcode = A_IMUL) and
  645. (p.ops=3) and
  646. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  647. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  648. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  649. ((((p.opcode = A_IMUL) or
  650. (p.opcode = A_MUL)) and
  651. (p.ops=1)) and
  652. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  653. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  654. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  655. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  656. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  657. {$ifdef x86_64}
  658. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  659. {$endif x86_64}
  660. )) or
  661. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  662. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  663. {$ifdef x86_64}
  664. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  665. {$endif x86_64}
  666. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  667. {$ifndef x86_64}
  668. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  669. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  670. {$endif not x86_64}
  671. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  672. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  673. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  674. {$ifndef x86_64}
  675. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  676. {$endif not x86_64}
  677. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  678. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  679. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  680. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  681. {$ifdef x86_64}
  682. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  683. {$endif x86_64}
  684. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  685. (((p.opcode = A_FSTSW) or
  686. (p.opcode = A_FNSTSW)) and
  687. (p.oper[0]^.typ=top_reg) and
  688. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  689. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  690. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  691. (p.oper[0]^.reg=p.oper[1]^.reg) and
  692. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  693. end;
  694. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  695. var
  696. hp2,hp3 : tai;
  697. begin
  698. { some x86-64 issue a NOP before the real exit code }
  699. if MatchInstruction(p,A_NOP,[]) then
  700. GetNextInstruction(p,p);
  701. result:=assigned(p) and (p.typ=ait_instruction) and
  702. ((taicpu(p).opcode = A_RET) or
  703. ((taicpu(p).opcode=A_LEAVE) and
  704. GetNextInstruction(p,hp2) and
  705. MatchInstruction(hp2,A_RET,[S_NO])
  706. ) or
  707. ((((taicpu(p).opcode=A_MOV) and
  708. MatchOpType(taicpu(p),top_reg,top_reg) and
  709. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  710. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  711. ((taicpu(p).opcode=A_LEA) and
  712. MatchOpType(taicpu(p),top_ref,top_reg) and
  713. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  714. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  715. )
  716. ) and
  717. GetNextInstruction(p,hp2) and
  718. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  719. MatchOpType(taicpu(hp2),top_reg) and
  720. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  721. GetNextInstruction(hp2,hp3) and
  722. MatchInstruction(hp3,A_RET,[S_NO])
  723. )
  724. );
  725. end;
  726. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  727. begin
  728. isFoldableArithOp := False;
  729. case hp1.opcode of
  730. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  731. isFoldableArithOp :=
  732. ((taicpu(hp1).oper[0]^.typ = top_const) or
  733. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  734. (taicpu(hp1).oper[0]^.reg <> reg))) and
  735. (taicpu(hp1).oper[1]^.typ = top_reg) and
  736. (taicpu(hp1).oper[1]^.reg = reg);
  737. A_INC,A_DEC,A_NEG,A_NOT:
  738. isFoldableArithOp :=
  739. (taicpu(hp1).oper[0]^.typ = top_reg) and
  740. (taicpu(hp1).oper[0]^.reg = reg);
  741. end;
  742. end;
  743. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  744. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  745. var
  746. hp2: tai;
  747. begin
  748. hp2 := p;
  749. repeat
  750. hp2 := tai(hp2.previous);
  751. if assigned(hp2) and
  752. (hp2.typ = ait_regalloc) and
  753. (tai_regalloc(hp2).ratype=ra_dealloc) and
  754. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  755. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  756. begin
  757. asml.remove(hp2);
  758. hp2.free;
  759. break;
  760. end;
  761. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  762. end;
  763. begin
  764. case current_procinfo.procdef.returndef.typ of
  765. arraydef,recorddef,pointerdef,
  766. stringdef,enumdef,procdef,objectdef,errordef,
  767. filedef,setdef,procvardef,
  768. classrefdef,forwarddef:
  769. DoRemoveLastDeallocForFuncRes(RS_EAX);
  770. orddef:
  771. if current_procinfo.procdef.returndef.size <> 0 then
  772. begin
  773. DoRemoveLastDeallocForFuncRes(RS_EAX);
  774. { for int64/qword }
  775. if current_procinfo.procdef.returndef.size = 8 then
  776. DoRemoveLastDeallocForFuncRes(RS_EDX);
  777. end;
  778. end;
  779. end;
  780. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  781. var
  782. TmpUsedRegs : TAllUsedRegs;
  783. hp1,hp2 : tai;
  784. alloc ,dealloc: tai_regalloc;
  785. begin
  786. result:=false;
  787. if MatchOpType(taicpu(p),top_reg,top_reg) and
  788. GetNextInstruction(p, hp1) and
  789. (hp1.typ = ait_instruction) and
  790. GetNextInstruction(hp1, hp2) and
  791. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  792. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  793. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  794. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  795. (((taicpu(p).opcode=A_MOVAPS) and
  796. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  797. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  798. ((taicpu(p).opcode=A_MOVAPD) and
  799. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  800. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  801. ) then
  802. { change
  803. movapX reg,reg2
  804. addsX/subsX/... reg3, reg2
  805. movapX reg2,reg
  806. to
  807. addsX/subsX/... reg3,reg
  808. }
  809. begin
  810. CopyUsedRegs(TmpUsedRegs);
  811. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  812. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  813. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  814. begin
  815. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  816. std_op2str[taicpu(p).opcode]+' '+
  817. std_op2str[taicpu(hp1).opcode]+' '+
  818. std_op2str[taicpu(hp2).opcode]+') done',p);
  819. { we cannot eliminate the first move if
  820. the operations uses the same register for source and dest }
  821. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  822. begin
  823. asml.remove(p);
  824. p.Free;
  825. end;
  826. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  827. asml.remove(hp2);
  828. hp2.Free;
  829. p:=hp1;
  830. result:=true;
  831. end;
  832. ReleaseUsedRegs(TmpUsedRegs);
  833. end
  834. end;
  835. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  836. var
  837. TmpUsedRegs : TAllUsedRegs;
  838. hp1,hp2 : tai;
  839. begin
  840. result:=false;
  841. if MatchOpType(taicpu(p),top_reg,top_reg) then
  842. begin
  843. { vmova* reg1,reg1
  844. =>
  845. <nop> }
  846. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  847. begin
  848. GetNextInstruction(p,hp1);
  849. asml.Remove(p);
  850. p.Free;
  851. p:=hp1;
  852. result:=true;
  853. end
  854. else if GetNextInstruction(p,hp1) then
  855. begin
  856. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  857. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  858. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  859. begin
  860. { vmova* reg1,reg2
  861. vmova* reg2,reg3
  862. dealloc reg2
  863. =>
  864. vmova* reg1,reg3 }
  865. CopyUsedRegs(TmpUsedRegs);
  866. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  867. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  868. begin
  869. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  870. asml.Remove(hp1);
  871. hp1.Free;
  872. result:=true;
  873. end
  874. { special case:
  875. vmova* reg1,reg2
  876. vmova* reg2,reg1
  877. =>
  878. vmova* reg1,reg2 }
  879. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  880. begin
  881. asml.Remove(hp1);
  882. hp1.Free;
  883. result:=true;
  884. end
  885. end
  886. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  887. { we mix single and double opperations here because we assume that the compiler
  888. generates vmovapd only after double operations and vmovaps only after single operations }
  889. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  890. GetNextInstruction(hp1,hp2) and
  891. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  892. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  893. begin
  894. CopyUsedRegs(TmpUsedRegs);
  895. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  896. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  897. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  898. then
  899. begin
  900. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  901. asml.Remove(p);
  902. p.Free;
  903. asml.Remove(hp2);
  904. hp2.Free;
  905. p:=hp1;
  906. end;
  907. end;
  908. end;
  909. end;
  910. end;
  911. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  912. var
  913. TmpUsedRegs : TAllUsedRegs;
  914. hp1 : tai;
  915. begin
  916. result:=false;
  917. { replace
  918. V<Op>X %mreg1,%mreg2,%mreg3
  919. VMovX %mreg3,%mreg4
  920. dealloc %mreg3
  921. by
  922. V<Op>X %mreg1,%mreg2,%mreg4
  923. ?
  924. }
  925. if GetNextInstruction(p,hp1) and
  926. { we mix single and double operations here because we assume that the compiler
  927. generates vmovapd only after double operations and vmovaps only after single operations }
  928. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  929. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  930. (taicpu(hp1).oper[1]^.typ=top_reg) then
  931. begin
  932. CopyUsedRegs(TmpUsedRegs);
  933. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  934. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  935. ) then
  936. begin
  937. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  938. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  939. asml.Remove(hp1);
  940. hp1.Free;
  941. result:=true;
  942. end;
  943. end;
  944. end;
  945. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  946. var
  947. hp1, hp2: tai;
  948. TmpUsedRegs : TAllUsedRegs;
  949. GetNextInstruction_p: Boolean;
  950. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  951. NewSize: topsize;
  952. begin
  953. Result:=false;
  954. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  955. { remove mov reg1,reg1? }
  956. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  957. {$ifdef x86_64}
  958. { Exceptional case:
  959. if for example, "mov %eax,%eax" is followed by a command that then
  960. reads %rax, then mov actually has the effect of zeroing the upper
  961. 32 bits of the register and hence is not a null operation. [Kit]
  962. }
  963. and not (
  964. (taicpu(p).oper[0]^.typ = top_reg) and
  965. (taicpu(hp1).typ = ait_instruction) and
  966. (taicpu(hp1).opsize = S_Q) and
  967. (taicpu(hp1).ops > 0) and
  968. (
  969. (
  970. (taicpu(hp1).oper[0]^.typ = top_reg) and
  971. (getsupreg(taicpu(hp1).oper[0]^.reg) = getsupreg(taicpu(p).oper[0]^.reg))
  972. )
  973. or
  974. (
  975. (taicpu(hp1).opcode in [A_IMUL, A_IDIV]) and
  976. (taicpu(hp1).oper[1]^.typ = top_reg) and
  977. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[0]^.reg))
  978. )
  979. )
  980. )
  981. {$endif x86_64}
  982. then
  983. begin
  984. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  985. { take care of the register (de)allocs following p }
  986. UpdateUsedRegs(tai(p.next));
  987. asml.remove(p);
  988. p.free;
  989. p:=hp1;
  990. Result:=true;
  991. exit;
  992. end;
  993. if GetNextInstruction_p and
  994. MatchInstruction(hp1,A_AND,[]) and
  995. (taicpu(p).oper[1]^.typ = top_reg) and
  996. MatchOpType(taicpu(hp1),top_const,top_reg) then
  997. begin
  998. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  999. begin
  1000. case taicpu(p).opsize of
  1001. S_L:
  1002. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1003. begin
  1004. { Optimize out:
  1005. mov x, %reg
  1006. and ffffffffh, %reg
  1007. }
  1008. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1009. asml.remove(hp1);
  1010. hp1.free;
  1011. Result:=true;
  1012. exit;
  1013. end;
  1014. S_Q: { TODO: Confirm if this is even possible }
  1015. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1016. begin
  1017. { Optimize out:
  1018. mov x, %reg
  1019. and ffffffffffffffffh, %reg
  1020. }
  1021. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1022. asml.remove(hp1);
  1023. hp1.free;
  1024. Result:=true;
  1025. exit;
  1026. end;
  1027. end;
  1028. end
  1029. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1030. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1031. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1032. then
  1033. begin
  1034. if taicpu(p).oper[0]^.typ = top_reg then
  1035. InputVal := '%' + std_regname(taicpu(p).oper[0]^.reg)
  1036. else
  1037. InputVal := 'x';
  1038. MaskNum := tostr(taicpu(hp1).oper[0]^.val);
  1039. case taicpu(p).opsize of
  1040. S_B:
  1041. if (taicpu(hp1).oper[0]^.val = $ff) then
  1042. begin
  1043. { Convert:
  1044. movb x, %regl movb x, %regl
  1045. andw ffh, %regw andl ffh, %regd
  1046. To:
  1047. movzbw x, %regd movzbl x, %regd
  1048. (Identical registers, just different sizes)
  1049. }
  1050. RegName1 := std_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1051. RegName2 := std_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1052. case taicpu(hp1).opsize of
  1053. S_W: NewSize := S_BW;
  1054. S_L: NewSize := S_BL;
  1055. {$ifdef x86_64}
  1056. S_Q: NewSize := S_BQ;
  1057. {$endif x86_64}
  1058. else
  1059. InternalError(2018011510);
  1060. end;
  1061. end
  1062. else
  1063. NewSize := S_NO;
  1064. S_W:
  1065. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1066. begin
  1067. { Convert:
  1068. movw x, %regw
  1069. andl ffffh, %regd
  1070. To:
  1071. movzwl x, %regd
  1072. (Identical registers, just different sizes)
  1073. }
  1074. RegName1 := std_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1075. RegName2 := std_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1076. case taicpu(hp1).opsize of
  1077. S_L: NewSize := S_WL;
  1078. {$ifdef x86_64}
  1079. S_Q: NewSize := S_WQ;
  1080. {$endif x86_64}
  1081. else
  1082. InternalError(2018011511);
  1083. end;
  1084. end
  1085. else
  1086. NewSize := S_NO;
  1087. else
  1088. NewSize := S_NO;
  1089. end;
  1090. if NewSize <> S_NO then
  1091. begin
  1092. PreMessage := 'mov' + gas_opsize2str[taicpu(p).opsize] + ' ' + InputVal + ',%' + RegName1;
  1093. { The actual optimization }
  1094. taicpu(p).opcode := A_MOVZX;
  1095. taicpu(p).changeopsize(NewSize);
  1096. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1097. { Safeguard if "and" is followed by a conditional command }
  1098. CopyUsedRegs(TmpUsedRegs);
  1099. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1100. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, tai(hp1.next), TmpUsedRegs)) then
  1101. begin
  1102. { At this point, the "and" command is effectively equivalent to
  1103. "test %reg,%reg". This will be handled separately by the
  1104. Peephole Optimizer. [Kit] }
  1105. DebugMsg(SPeepholeOptimization + PreMessage +
  1106. ' -> movz' + gas_opsize2str[NewSize] + ' ' + InputVal + ',%' + RegName2, p);
  1107. end
  1108. else
  1109. begin
  1110. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + gas_opsize2str[taicpu(hp1).opsize] + ' $' + MaskNum + ',%' + RegName2 +
  1111. ' -> movz' + gas_opsize2str[NewSize] + ' ' + InputVal + ',%' + RegName2, p);
  1112. asml.Remove(hp1);
  1113. hp1.Free;
  1114. end;
  1115. Result := True;
  1116. ReleaseUsedRegs(TmpUsedRegs);
  1117. Exit;
  1118. end;
  1119. end;
  1120. end
  1121. else if GetNextInstruction_p and
  1122. MatchInstruction(hp1,A_MOV,[]) and
  1123. (taicpu(p).oper[1]^.typ = top_reg) and
  1124. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1125. begin
  1126. CopyUsedRegs(TmpUsedRegs);
  1127. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1128. { we have
  1129. mov x, %treg
  1130. mov %treg, y
  1131. }
  1132. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1133. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1134. { we've got
  1135. mov x, %treg
  1136. mov %treg, y
  1137. with %treg is not used after }
  1138. case taicpu(p).oper[0]^.typ Of
  1139. top_reg:
  1140. begin
  1141. { change
  1142. mov %reg, %treg
  1143. mov %treg, y
  1144. to
  1145. mov %reg, y
  1146. }
  1147. if taicpu(hp1).oper[1]^.typ=top_reg then
  1148. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1149. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1150. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1151. asml.remove(hp1);
  1152. hp1.free;
  1153. ReleaseUsedRegs(TmpUsedRegs);
  1154. Result:=true;
  1155. Exit;
  1156. end;
  1157. top_const:
  1158. begin
  1159. { change
  1160. mov const, %treg
  1161. mov %treg, y
  1162. to
  1163. mov const, y
  1164. }
  1165. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1166. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1167. begin
  1168. if taicpu(hp1).oper[1]^.typ=top_reg then
  1169. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1170. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1171. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1172. asml.remove(hp1);
  1173. hp1.free;
  1174. ReleaseUsedRegs(TmpUsedRegs);
  1175. Result:=true;
  1176. Exit;
  1177. end;
  1178. end;
  1179. top_ref:
  1180. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1181. begin
  1182. { change
  1183. mov mem, %treg
  1184. mov %treg, %reg
  1185. to
  1186. mov mem, %reg"
  1187. }
  1188. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1189. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1190. asml.remove(hp1);
  1191. hp1.free;
  1192. ReleaseUsedRegs(TmpUsedRegs);
  1193. Result:=true;
  1194. Exit;
  1195. end;
  1196. end;
  1197. ReleaseUsedRegs(TmpUsedRegs);
  1198. end
  1199. else
  1200. { Change
  1201. mov %reg1, %reg2
  1202. xxx %reg2, ???
  1203. to
  1204. mov %reg1, %reg2
  1205. xxx %reg1, ???
  1206. to avoid a write/read penalty
  1207. }
  1208. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1209. GetNextInstruction(p,hp1) and
  1210. (tai(hp1).typ = ait_instruction) and
  1211. (taicpu(hp1).ops >= 1) and
  1212. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1213. { we have
  1214. mov %reg1, %reg2
  1215. XXX %reg2, ???
  1216. }
  1217. begin
  1218. if ((taicpu(hp1).opcode = A_OR) or
  1219. (taicpu(hp1).opcode = A_AND) or
  1220. (taicpu(hp1).opcode = A_TEST)) and
  1221. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1222. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1223. { we have
  1224. mov %reg1, %reg2
  1225. test/or/and %reg2, %reg2
  1226. }
  1227. begin
  1228. CopyUsedRegs(TmpUsedRegs);
  1229. { reg1 will be used after the first instruction,
  1230. so update the allocation info }
  1231. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1232. if GetNextInstruction(hp1, hp2) and
  1233. (hp2.typ = ait_instruction) and
  1234. taicpu(hp2).is_jmp and
  1235. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1236. { change
  1237. mov %reg1, %reg2
  1238. test/or/and %reg2, %reg2
  1239. jxx
  1240. to
  1241. test %reg1, %reg1
  1242. jxx
  1243. }
  1244. begin
  1245. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1246. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1247. DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);
  1248. asml.remove(p);
  1249. p.free;
  1250. p := hp1;
  1251. ReleaseUsedRegs(TmpUsedRegs);
  1252. Exit;
  1253. end
  1254. else
  1255. { change
  1256. mov %reg1, %reg2
  1257. test/or/and %reg2, %reg2
  1258. to
  1259. mov %reg1, %reg2
  1260. test/or/and %reg1, %reg1
  1261. }
  1262. begin
  1263. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1264. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1265. DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);
  1266. end;
  1267. ReleaseUsedRegs(TmpUsedRegs);
  1268. end
  1269. end
  1270. else
  1271. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1272. x >= RetOffset) as it doesn't do anything (it writes either to a
  1273. parameter or to the temporary storage room for the function
  1274. result)
  1275. }
  1276. if GetNextInstruction_p and
  1277. (tai(hp1).typ = ait_instruction) then
  1278. begin
  1279. if IsExitCode(hp1) and
  1280. MatchOpType(taicpu(p),top_reg,top_ref) and
  1281. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1282. not(assigned(current_procinfo.procdef.funcretsym) and
  1283. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1284. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1285. begin
  1286. asml.remove(p);
  1287. p.free;
  1288. p:=hp1;
  1289. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1290. RemoveLastDeallocForFuncRes(p);
  1291. exit;
  1292. end
  1293. { change
  1294. mov reg1, mem1
  1295. test/cmp x, mem1
  1296. to
  1297. mov reg1, mem1
  1298. test/cmp x, reg1
  1299. }
  1300. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1301. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1302. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1303. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1304. begin
  1305. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1306. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1307. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1308. end;
  1309. end;
  1310. { Next instruction is also a MOV ? }
  1311. if GetNextInstruction_p and
  1312. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1313. begin
  1314. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1315. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1316. { mov reg1, mem1 or mov mem1, reg1
  1317. mov mem2, reg2 mov reg2, mem2}
  1318. begin
  1319. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1320. { mov reg1, mem1 or mov mem1, reg1
  1321. mov mem2, reg1 mov reg2, mem1}
  1322. begin
  1323. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1324. { Removes the second statement from
  1325. mov reg1, mem1/reg2
  1326. mov mem1/reg2, reg1 }
  1327. begin
  1328. if taicpu(p).oper[0]^.typ=top_reg then
  1329. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1330. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1331. asml.remove(hp1);
  1332. hp1.free;
  1333. Result:=true;
  1334. exit;
  1335. end
  1336. else
  1337. begin
  1338. CopyUsedRegs(TmpUsedRegs);
  1339. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1340. if (taicpu(p).oper[1]^.typ = top_ref) and
  1341. { mov reg1, mem1
  1342. mov mem2, reg1 }
  1343. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1344. GetNextInstruction(hp1, hp2) and
  1345. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1346. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1347. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1348. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1349. { change to
  1350. mov reg1, mem1 mov reg1, mem1
  1351. mov mem2, reg1 cmp reg1, mem2
  1352. cmp mem1, reg1
  1353. }
  1354. begin
  1355. asml.remove(hp2);
  1356. hp2.free;
  1357. taicpu(hp1).opcode := A_CMP;
  1358. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1359. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1360. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1361. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1362. end;
  1363. ReleaseUsedRegs(TmpUsedRegs);
  1364. end;
  1365. end
  1366. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1367. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1368. begin
  1369. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1370. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1371. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1372. end
  1373. else
  1374. begin
  1375. CopyUsedRegs(TmpUsedRegs);
  1376. if GetNextInstruction(hp1, hp2) and
  1377. MatchOpType(taicpu(p),top_ref,top_reg) and
  1378. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1379. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1380. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1381. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1382. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1383. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1384. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1385. { mov mem1, %reg1
  1386. mov %reg1, mem2
  1387. mov mem2, reg2
  1388. to:
  1389. mov mem1, reg2
  1390. mov reg2, mem2}
  1391. begin
  1392. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1393. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1394. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1395. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1396. asml.remove(hp2);
  1397. hp2.free;
  1398. end
  1399. {$ifdef i386}
  1400. { this is enabled for i386 only, as the rules to create the reg sets below
  1401. are too complicated for x86-64, so this makes this code too error prone
  1402. on x86-64
  1403. }
  1404. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1405. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1406. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1407. { mov mem1, reg1 mov mem1, reg1
  1408. mov reg1, mem2 mov reg1, mem2
  1409. mov mem2, reg2 mov mem2, reg1
  1410. to: to:
  1411. mov mem1, reg1 mov mem1, reg1
  1412. mov mem1, reg2 mov reg1, mem2
  1413. mov reg1, mem2
  1414. or (if mem1 depends on reg1
  1415. and/or if mem2 depends on reg2)
  1416. to:
  1417. mov mem1, reg1
  1418. mov reg1, mem2
  1419. mov reg1, reg2
  1420. }
  1421. begin
  1422. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1423. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1424. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1425. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1426. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1427. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1428. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1429. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1430. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1431. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1432. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1433. end
  1434. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1435. begin
  1436. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1437. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1438. end
  1439. else
  1440. begin
  1441. asml.remove(hp2);
  1442. hp2.free;
  1443. end
  1444. {$endif i386}
  1445. ;
  1446. ReleaseUsedRegs(TmpUsedRegs);
  1447. end;
  1448. end
  1449. (* { movl [mem1],reg1
  1450. movl [mem1],reg2
  1451. to
  1452. movl [mem1],reg1
  1453. movl reg1,reg2
  1454. }
  1455. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1456. (taicpu(p).oper[1]^.typ = top_reg) and
  1457. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1458. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1459. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1460. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1461. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1462. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1463. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1464. else*)
  1465. { movl const1,[mem1]
  1466. movl [mem1],reg1
  1467. to
  1468. movl const1,reg1
  1469. movl reg1,[mem1]
  1470. }
  1471. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1472. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1473. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1474. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1475. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1476. begin
  1477. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1478. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1479. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1480. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1481. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1482. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1483. end
  1484. {
  1485. mov* x,reg1
  1486. mov* y,reg1
  1487. to
  1488. mov* y,reg1
  1489. }
  1490. else if (taicpu(p).oper[1]^.typ=top_reg) and
  1491. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1492. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1493. begin
  1494. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1495. { take care of the register (de)allocs following p }
  1496. UpdateUsedRegs(tai(p.next));
  1497. asml.remove(p);
  1498. p.free;
  1499. p:=hp1;
  1500. Result:=true;
  1501. exit;
  1502. end;
  1503. end
  1504. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1505. GetNextInstruction_p and
  1506. (hp1.typ = ait_instruction) and
  1507. GetNextInstruction(hp1, hp2) and
  1508. MatchInstruction(hp2,A_MOV,[]) and
  1509. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1510. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1511. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1512. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1513. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1514. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1515. ) then
  1516. { change movsX/movzX reg/ref, reg2
  1517. add/sub/or/... reg3/$const, reg2
  1518. mov reg2 reg/ref
  1519. to add/sub/or/... reg3/$const, reg/ref }
  1520. begin
  1521. CopyUsedRegs(TmpUsedRegs);
  1522. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1523. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1524. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1525. begin
  1526. { by example:
  1527. movswl %si,%eax movswl %si,%eax p
  1528. decl %eax addl %edx,%eax hp1
  1529. movw %ax,%si movw %ax,%si hp2
  1530. ->
  1531. movswl %si,%eax movswl %si,%eax p
  1532. decw %eax addw %edx,%eax hp1
  1533. movw %ax,%si movw %ax,%si hp2
  1534. }
  1535. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  1536. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1537. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1538. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1539. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1540. {
  1541. ->
  1542. movswl %si,%eax movswl %si,%eax p
  1543. decw %si addw %dx,%si hp1
  1544. movw %ax,%si movw %ax,%si hp2
  1545. }
  1546. case taicpu(hp1).ops of
  1547. 1:
  1548. begin
  1549. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1550. if taicpu(hp1).oper[0]^.typ=top_reg then
  1551. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1552. end;
  1553. 2:
  1554. begin
  1555. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1556. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1557. (taicpu(hp1).opcode<>A_SHL) and
  1558. (taicpu(hp1).opcode<>A_SHR) and
  1559. (taicpu(hp1).opcode<>A_SAR) then
  1560. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1561. end;
  1562. else
  1563. internalerror(2008042701);
  1564. end;
  1565. {
  1566. ->
  1567. decw %si addw %dx,%si p
  1568. }
  1569. asml.remove(p);
  1570. asml.remove(hp2);
  1571. p.Free;
  1572. hp2.Free;
  1573. p := hp1;
  1574. end;
  1575. ReleaseUsedRegs(TmpUsedRegs);
  1576. end
  1577. else if GetNextInstruction_p and
  1578. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1579. GetNextInstruction(hp1, hp2) and
  1580. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1581. MatchOperand(Taicpu(p).oper[0]^,0) and
  1582. (Taicpu(p).oper[1]^.typ = top_reg) and
  1583. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1584. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1585. { mov reg1,0
  1586. bts reg1,operand1 --> mov reg1,operand2
  1587. or reg1,operand2 bts reg1,operand1}
  1588. begin
  1589. Taicpu(hp2).opcode:=A_MOV;
  1590. asml.remove(hp1);
  1591. insertllitem(hp2,hp2.next,hp1);
  1592. asml.remove(p);
  1593. p.free;
  1594. p:=hp1;
  1595. end
  1596. else if GetNextInstruction_p and
  1597. MatchInstruction(hp1,A_LEA,[S_L]) and
  1598. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1599. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1600. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1601. ) or
  1602. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1603. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1604. )
  1605. ) then
  1606. { mov reg1,ref
  1607. lea reg2,[reg1,reg2]
  1608. to
  1609. add reg2,ref}
  1610. begin
  1611. CopyUsedRegs(TmpUsedRegs);
  1612. { reg1 may not be used afterwards }
  1613. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1614. begin
  1615. Taicpu(hp1).opcode:=A_ADD;
  1616. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1617. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  1618. asml.remove(p);
  1619. p.free;
  1620. p:=hp1;
  1621. end;
  1622. ReleaseUsedRegs(TmpUsedRegs);
  1623. end;
  1624. end;
  1625. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1626. var
  1627. hp1 : tai;
  1628. begin
  1629. Result:=false;
  1630. if taicpu(p).ops <> 2 then
  1631. exit;
  1632. if GetNextInstruction(p,hp1) and
  1633. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1634. (taicpu(hp1).ops = 2) then
  1635. begin
  1636. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1637. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1638. { movXX reg1, mem1 or movXX mem1, reg1
  1639. movXX mem2, reg2 movXX reg2, mem2}
  1640. begin
  1641. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1642. { movXX reg1, mem1 or movXX mem1, reg1
  1643. movXX mem2, reg1 movXX reg2, mem1}
  1644. begin
  1645. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1646. begin
  1647. { Removes the second statement from
  1648. movXX reg1, mem1/reg2
  1649. movXX mem1/reg2, reg1
  1650. }
  1651. if taicpu(p).oper[0]^.typ=top_reg then
  1652. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1653. { Removes the second statement from
  1654. movXX mem1/reg1, reg2
  1655. movXX reg2, mem1/reg1
  1656. }
  1657. if (taicpu(p).oper[1]^.typ=top_reg) and
  1658. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1659. begin
  1660. asml.remove(p);
  1661. p.free;
  1662. GetNextInstruction(hp1,p);
  1663. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  1664. end
  1665. else
  1666. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  1667. asml.remove(hp1);
  1668. hp1.free;
  1669. Result:=true;
  1670. exit;
  1671. end
  1672. end;
  1673. end;
  1674. end;
  1675. end;
  1676. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1677. var
  1678. TmpUsedRegs : TAllUsedRegs;
  1679. hp1 : tai;
  1680. begin
  1681. result:=false;
  1682. { replace
  1683. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1684. MovX %mreg2,%mreg1
  1685. dealloc %mreg2
  1686. by
  1687. <Op>X %mreg2,%mreg1
  1688. ?
  1689. }
  1690. if GetNextInstruction(p,hp1) and
  1691. { we mix single and double opperations here because we assume that the compiler
  1692. generates vmovapd only after double operations and vmovaps only after single operations }
  1693. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1694. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1695. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1696. (taicpu(p).oper[0]^.typ=top_reg) then
  1697. begin
  1698. CopyUsedRegs(TmpUsedRegs);
  1699. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1700. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1701. begin
  1702. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1703. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1704. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  1705. asml.Remove(hp1);
  1706. hp1.Free;
  1707. result:=true;
  1708. end;
  1709. ReleaseUsedRegs(TmpUsedRegs);
  1710. end;
  1711. end;
  1712. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1713. var
  1714. hp1 : tai;
  1715. l : ASizeInt;
  1716. TmpUsedRegs : TAllUsedRegs;
  1717. begin
  1718. Result:=false;
  1719. { removes seg register prefixes from LEA operations, as they
  1720. don't do anything}
  1721. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1722. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1723. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1724. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1725. { do not mess with leas acessing the stack pointer }
  1726. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1727. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1728. begin
  1729. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1730. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1731. begin
  1732. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1733. taicpu(p).oper[1]^.reg);
  1734. InsertLLItem(p.previous,p.next, hp1);
  1735. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  1736. p.free;
  1737. p:=hp1;
  1738. Result:=true;
  1739. exit;
  1740. end
  1741. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1742. begin
  1743. hp1:=taicpu(p.Next);
  1744. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  1745. asml.remove(p);
  1746. p.free;
  1747. p:=hp1;
  1748. Result:=true;
  1749. exit;
  1750. end
  1751. { continue to use lea to adjust the stack pointer,
  1752. it is the recommended way, but only if not optimizing for size }
  1753. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1754. (cs_opt_size in current_settings.optimizerswitches) then
  1755. with taicpu(p).oper[0]^.ref^ do
  1756. if (base = taicpu(p).oper[1]^.reg) then
  1757. begin
  1758. l:=offset;
  1759. if (l=1) and UseIncDec then
  1760. begin
  1761. taicpu(p).opcode:=A_INC;
  1762. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1763. taicpu(p).ops:=1;
  1764. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  1765. end
  1766. else if (l=-1) and UseIncDec then
  1767. begin
  1768. taicpu(p).opcode:=A_DEC;
  1769. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1770. taicpu(p).ops:=1;
  1771. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  1772. end
  1773. else
  1774. begin
  1775. if (l<0) and (l<>-2147483648) then
  1776. begin
  1777. taicpu(p).opcode:=A_SUB;
  1778. taicpu(p).loadConst(0,-l);
  1779. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  1780. end
  1781. else
  1782. begin
  1783. taicpu(p).opcode:=A_ADD;
  1784. taicpu(p).loadConst(0,l);
  1785. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  1786. end;
  1787. end;
  1788. Result:=true;
  1789. exit;
  1790. end;
  1791. end;
  1792. if GetNextInstruction(p,hp1) and
  1793. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1794. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1795. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1796. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1797. begin
  1798. CopyUsedRegs(TmpUsedRegs);
  1799. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1800. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1801. begin
  1802. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1803. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  1804. asml.Remove(hp1);
  1805. hp1.Free;
  1806. result:=true;
  1807. end;
  1808. ReleaseUsedRegs(TmpUsedRegs);
  1809. end;
  1810. end;
  1811. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  1812. var
  1813. hp1 : tai;
  1814. begin
  1815. DoSubAddOpt := False;
  1816. if GetLastInstruction(p, hp1) and
  1817. (hp1.typ = ait_instruction) and
  1818. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1819. case taicpu(hp1).opcode Of
  1820. A_DEC:
  1821. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  1822. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1823. begin
  1824. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  1825. asml.remove(hp1);
  1826. hp1.free;
  1827. end;
  1828. A_SUB:
  1829. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1830. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1831. begin
  1832. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  1833. asml.remove(hp1);
  1834. hp1.free;
  1835. end;
  1836. A_ADD:
  1837. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1838. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1839. begin
  1840. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1841. asml.remove(hp1);
  1842. hp1.free;
  1843. if (taicpu(p).oper[0]^.val = 0) then
  1844. begin
  1845. hp1 := tai(p.next);
  1846. asml.remove(p);
  1847. p.free;
  1848. if not GetLastInstruction(hp1, p) then
  1849. p := hp1;
  1850. DoSubAddOpt := True;
  1851. end
  1852. end;
  1853. end;
  1854. end;
  1855. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  1856. var
  1857. hp1 : tai;
  1858. begin
  1859. Result:=false;
  1860. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1861. { * change "sub/add const1, reg" or "dec reg" followed by
  1862. "sub const2, reg" to one "sub ..., reg" }
  1863. if MatchOpType(taicpu(p),top_const,top_reg) then
  1864. begin
  1865. {$ifdef i386}
  1866. if (taicpu(p).oper[0]^.val = 2) and
  1867. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1868. { Don't do the sub/push optimization if the sub }
  1869. { comes from setting up the stack frame (JM) }
  1870. (not(GetLastInstruction(p,hp1)) or
  1871. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  1872. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  1873. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  1874. begin
  1875. hp1 := tai(p.next);
  1876. while Assigned(hp1) and
  1877. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1878. not RegReadByInstruction(NR_ESP,hp1) and
  1879. not RegModifiedByInstruction(NR_ESP,hp1) do
  1880. hp1 := tai(hp1.next);
  1881. if Assigned(hp1) and
  1882. MatchInstruction(hp1,A_PUSH,[S_W]) then
  1883. begin
  1884. taicpu(hp1).changeopsize(S_L);
  1885. if taicpu(hp1).oper[0]^.typ=top_reg then
  1886. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1887. hp1 := tai(p.next);
  1888. asml.remove(p);
  1889. p.free;
  1890. p := hp1;
  1891. Result:=true;
  1892. exit;
  1893. end;
  1894. end;
  1895. {$endif i386}
  1896. if DoSubAddOpt(p) then
  1897. Result:=true;
  1898. end;
  1899. end;
  1900. function TX86AsmOptimizer.OptPass1SHLSAL(var p : tai) : boolean;
  1901. var
  1902. TmpBool1,TmpBool2 : Boolean;
  1903. tmpref : treference;
  1904. hp1,hp2: tai;
  1905. begin
  1906. Result:=false;
  1907. if MatchOpType(taicpu(p),top_const,top_reg) and
  1908. (taicpu(p).opsize in [S_L{$ifdef x86_64},S_Q{$endif x86_64}]) and
  1909. (taicpu(p).oper[0]^.val <= 3) then
  1910. { Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement }
  1911. begin
  1912. { should we check the next instruction? }
  1913. TmpBool1 := True;
  1914. { have we found an add/sub which could be
  1915. integrated in the lea? }
  1916. TmpBool2 := False;
  1917. reference_reset(tmpref,2,[]);
  1918. TmpRef.index := taicpu(p).oper[1]^.reg;
  1919. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1920. while TmpBool1 and
  1921. GetNextInstruction(p, hp1) and
  1922. (tai(hp1).typ = ait_instruction) and
  1923. ((((taicpu(hp1).opcode = A_ADD) or
  1924. (taicpu(hp1).opcode = A_SUB)) and
  1925. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1926. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1927. (((taicpu(hp1).opcode = A_INC) or
  1928. (taicpu(hp1).opcode = A_DEC)) and
  1929. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1930. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1931. (not GetNextInstruction(hp1,hp2) or
  1932. not instrReadsFlags(hp2)) Do
  1933. begin
  1934. TmpBool1 := False;
  1935. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1936. begin
  1937. TmpBool1 := True;
  1938. TmpBool2 := True;
  1939. case taicpu(hp1).opcode of
  1940. A_ADD:
  1941. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1942. A_SUB:
  1943. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1944. end;
  1945. asml.remove(hp1);
  1946. hp1.free;
  1947. end
  1948. else
  1949. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1950. (((taicpu(hp1).opcode = A_ADD) and
  1951. (TmpRef.base = NR_NO)) or
  1952. (taicpu(hp1).opcode = A_INC) or
  1953. (taicpu(hp1).opcode = A_DEC)) then
  1954. begin
  1955. TmpBool1 := True;
  1956. TmpBool2 := True;
  1957. case taicpu(hp1).opcode of
  1958. A_ADD:
  1959. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1960. A_INC:
  1961. inc(TmpRef.offset);
  1962. A_DEC:
  1963. dec(TmpRef.offset);
  1964. end;
  1965. asml.remove(hp1);
  1966. hp1.free;
  1967. end;
  1968. end;
  1969. if TmpBool2
  1970. {$ifndef x86_64}
  1971. or
  1972. ((current_settings.optimizecputype < cpu_Pentium2) and
  1973. (taicpu(p).oper[0]^.val <= 3) and
  1974. not(cs_opt_size in current_settings.optimizerswitches))
  1975. {$endif x86_64}
  1976. then
  1977. begin
  1978. if not(TmpBool2) and
  1979. (taicpu(p).oper[0]^.val = 1) then
  1980. begin
  1981. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1982. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1983. end
  1984. else
  1985. hp1 := taicpu.op_ref_reg(A_LEA, taicpu(p).opsize, TmpRef,
  1986. taicpu(p).oper[1]^.reg);
  1987. InsertLLItem(p.previous, p.next, hp1);
  1988. p.free;
  1989. p := hp1;
  1990. end;
  1991. end
  1992. {$ifndef x86_64}
  1993. else if (current_settings.optimizecputype < cpu_Pentium2) and
  1994. MatchOpType(taicpu(p),top_const,top_reg) then
  1995. begin
  1996. { changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1997. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1998. (unlike shl, which is only Tairable in the U pipe) }
  1999. if taicpu(p).oper[0]^.val=1 then
  2000. begin
  2001. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  2002. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2003. InsertLLItem(p.previous, p.next, hp1);
  2004. p.free;
  2005. p := hp1;
  2006. end
  2007. { changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  2008. "shl $3, %reg" to "lea (,%reg,8), %reg }
  2009. else if (taicpu(p).opsize = S_L) and
  2010. (taicpu(p).oper[0]^.val<= 3) then
  2011. begin
  2012. reference_reset(tmpref,2,[]);
  2013. TmpRef.index := taicpu(p).oper[1]^.reg;
  2014. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  2015. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  2016. InsertLLItem(p.previous, p.next, hp1);
  2017. p.free;
  2018. p := hp1;
  2019. end;
  2020. end
  2021. {$endif x86_64}
  2022. ;
  2023. end;
  2024. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  2025. var
  2026. TmpUsedRegs : TAllUsedRegs;
  2027. hp1,hp2: tai;
  2028. begin
  2029. Result:=false;
  2030. if MatchOpType(taicpu(p),top_reg,top_reg) and
  2031. GetNextInstruction(p, hp1) and
  2032. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  2033. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2034. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  2035. or
  2036. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  2037. ) and
  2038. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2039. { mov reg1, reg2
  2040. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2041. begin
  2042. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2043. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2044. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2045. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2046. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  2047. asml.remove(p);
  2048. p.free;
  2049. p := hp1;
  2050. Result:=true;
  2051. exit;
  2052. end
  2053. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2054. GetNextInstruction(p,hp1) and
  2055. (hp1.typ = ait_instruction) and
  2056. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  2057. doing it separately in both branches allows to do the cheap checks
  2058. with low probability earlier }
  2059. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2060. GetNextInstruction(hp1,hp2) and
  2061. MatchInstruction(hp2,A_MOV,[])
  2062. ) or
  2063. ((taicpu(hp1).opcode=A_LEA) and
  2064. GetNextInstruction(hp1,hp2) and
  2065. MatchInstruction(hp2,A_MOV,[]) and
  2066. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2067. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  2068. ) or
  2069. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  2070. taicpu(p).oper[1]^.reg) and
  2071. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  2072. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  2073. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  2074. ) and
  2075. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  2076. )
  2077. ) and
  2078. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  2079. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2080. begin
  2081. CopyUsedRegs(TmpUsedRegs);
  2082. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2083. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2084. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  2085. { change mov (ref), reg
  2086. add/sub/or/... reg2/$const, reg
  2087. mov reg, (ref)
  2088. # release reg
  2089. to add/sub/or/... reg2/$const, (ref) }
  2090. begin
  2091. case taicpu(hp1).opcode of
  2092. A_INC,A_DEC,A_NOT,A_NEG :
  2093. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2094. A_LEA :
  2095. begin
  2096. taicpu(hp1).opcode:=A_ADD;
  2097. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  2098. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2099. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  2100. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  2101. else
  2102. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  2103. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2104. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  2105. end
  2106. else
  2107. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2108. end;
  2109. asml.remove(p);
  2110. asml.remove(hp2);
  2111. p.free;
  2112. hp2.free;
  2113. p := hp1
  2114. end;
  2115. ReleaseUsedRegs(TmpUsedRegs);
  2116. end;
  2117. end;
  2118. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  2119. var
  2120. TmpUsedRegs : TAllUsedRegs;
  2121. hp1 : tai;
  2122. begin
  2123. Result:=false;
  2124. if (taicpu(p).ops >= 2) and
  2125. ((taicpu(p).oper[0]^.typ = top_const) or
  2126. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2127. (taicpu(p).oper[1]^.typ = top_reg) and
  2128. ((taicpu(p).ops = 2) or
  2129. ((taicpu(p).oper[2]^.typ = top_reg) and
  2130. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2131. GetLastInstruction(p,hp1) and
  2132. MatchInstruction(hp1,A_MOV,[]) and
  2133. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2134. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  2135. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  2136. begin
  2137. CopyUsedRegs(TmpUsedRegs);
  2138. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  2139. { change
  2140. mov reg1,reg2
  2141. imul y,reg2 to imul y,reg1,reg2 }
  2142. begin
  2143. taicpu(p).ops := 3;
  2144. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2145. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2146. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  2147. asml.remove(hp1);
  2148. hp1.free;
  2149. result:=true;
  2150. end;
  2151. ReleaseUsedRegs(TmpUsedRegs);
  2152. end;
  2153. end;
  2154. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  2155. var
  2156. hp1 : tai;
  2157. begin
  2158. {
  2159. change
  2160. jmp .L1
  2161. ...
  2162. .L1:
  2163. ret
  2164. into
  2165. ret
  2166. }
  2167. result:=false;
  2168. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2169. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  2170. begin
  2171. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  2172. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  2173. MatchInstruction(hp1,A_RET,[S_NO]) then
  2174. begin
  2175. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  2176. taicpu(p).opcode:=A_RET;
  2177. taicpu(p).is_jmp:=false;
  2178. taicpu(p).ops:=taicpu(hp1).ops;
  2179. case taicpu(hp1).ops of
  2180. 0:
  2181. taicpu(p).clearop(0);
  2182. 1:
  2183. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2184. else
  2185. internalerror(2016041301);
  2186. end;
  2187. result:=true;
  2188. end;
  2189. end;
  2190. end;
  2191. function CanBeCMOV(p : tai) : boolean;
  2192. begin
  2193. CanBeCMOV:=assigned(p) and
  2194. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  2195. { we can't use cmov ref,reg because
  2196. ref could be nil and cmov still throws an exception
  2197. if ref=nil but the mov isn't done (FK)
  2198. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2199. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2200. }
  2201. MatchOpType(taicpu(p),top_reg,top_reg);
  2202. end;
  2203. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  2204. var
  2205. hp1,hp2,hp3: tai;
  2206. carryadd_opcode : TAsmOp;
  2207. l : Longint;
  2208. condition : TAsmCond;
  2209. begin
  2210. { jb @@1 cmc
  2211. inc/dec operand --> adc/sbb operand,0
  2212. @@1:
  2213. ... and ...
  2214. jnb @@1
  2215. inc/dec operand --> adc/sbb operand,0
  2216. @@1: }
  2217. result:=false;
  2218. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  2219. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2220. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  2221. begin
  2222. carryadd_opcode:=A_NONE;
  2223. if Taicpu(p).condition in [C_NAE,C_B] then
  2224. begin
  2225. if Taicpu(hp1).opcode=A_INC then
  2226. carryadd_opcode:=A_ADC;
  2227. if Taicpu(hp1).opcode=A_DEC then
  2228. carryadd_opcode:=A_SBB;
  2229. if carryadd_opcode<>A_NONE then
  2230. begin
  2231. Taicpu(p).clearop(0);
  2232. Taicpu(p).ops:=0;
  2233. Taicpu(p).is_jmp:=false;
  2234. Taicpu(p).opcode:=A_CMC;
  2235. Taicpu(p).condition:=C_NONE;
  2236. Taicpu(hp1).ops:=2;
  2237. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2238. Taicpu(hp1).loadconst(0,0);
  2239. Taicpu(hp1).opcode:=carryadd_opcode;
  2240. result:=true;
  2241. exit;
  2242. end;
  2243. end;
  2244. if Taicpu(p).condition in [C_AE,C_NB] then
  2245. begin
  2246. if Taicpu(hp1).opcode=A_INC then
  2247. carryadd_opcode:=A_ADC;
  2248. if Taicpu(hp1).opcode=A_DEC then
  2249. carryadd_opcode:=A_SBB;
  2250. if carryadd_opcode<>A_NONE then
  2251. begin
  2252. asml.remove(p);
  2253. p.free;
  2254. Taicpu(hp1).ops:=2;
  2255. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2256. Taicpu(hp1).loadconst(0,0);
  2257. Taicpu(hp1).opcode:=carryadd_opcode;
  2258. p:=hp1;
  2259. result:=true;
  2260. exit;
  2261. end;
  2262. end;
  2263. end;
  2264. {$ifndef i8086}
  2265. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2266. begin
  2267. { check for
  2268. jCC xxx
  2269. <several movs>
  2270. xxx:
  2271. }
  2272. l:=0;
  2273. GetNextInstruction(p, hp1);
  2274. while assigned(hp1) and
  2275. CanBeCMOV(hp1) and
  2276. { stop on labels }
  2277. not(hp1.typ=ait_label) do
  2278. begin
  2279. inc(l);
  2280. GetNextInstruction(hp1,hp1);
  2281. end;
  2282. if assigned(hp1) then
  2283. begin
  2284. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2285. begin
  2286. if (l<=4) and (l>0) then
  2287. begin
  2288. condition:=inverse_cond(taicpu(p).condition);
  2289. hp2:=p;
  2290. GetNextInstruction(p,hp1);
  2291. p:=hp1;
  2292. repeat
  2293. taicpu(hp1).opcode:=A_CMOVcc;
  2294. taicpu(hp1).condition:=condition;
  2295. GetNextInstruction(hp1,hp1);
  2296. until not(assigned(hp1)) or
  2297. not(CanBeCMOV(hp1));
  2298. { wait with removing else GetNextInstruction could
  2299. ignore the label if it was the only usage in the
  2300. jump moved away }
  2301. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2302. { if the label refs. reach zero, remove any alignment before the label }
  2303. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2304. begin
  2305. asml.Remove(hp1);
  2306. hp1.Free;
  2307. end;
  2308. asml.remove(hp2);
  2309. hp2.free;
  2310. result:=true;
  2311. exit;
  2312. end;
  2313. end
  2314. else
  2315. begin
  2316. { check further for
  2317. jCC xxx
  2318. <several movs 1>
  2319. jmp yyy
  2320. xxx:
  2321. <several movs 2>
  2322. yyy:
  2323. }
  2324. { hp2 points to jmp yyy }
  2325. hp2:=hp1;
  2326. { skip hp1 to xxx }
  2327. GetNextInstruction(hp1, hp1);
  2328. if assigned(hp2) and
  2329. assigned(hp1) and
  2330. (l<=3) and
  2331. (hp2.typ=ait_instruction) and
  2332. (taicpu(hp2).is_jmp) and
  2333. (taicpu(hp2).condition=C_None) and
  2334. { real label and jump, no further references to the
  2335. label are allowed }
  2336. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2337. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2338. begin
  2339. l:=0;
  2340. { skip hp1 to <several moves 2> }
  2341. GetNextInstruction(hp1, hp1);
  2342. while assigned(hp1) and
  2343. CanBeCMOV(hp1) do
  2344. begin
  2345. inc(l);
  2346. GetNextInstruction(hp1, hp1);
  2347. end;
  2348. { hp1 points to yyy: }
  2349. if assigned(hp1) and
  2350. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2351. begin
  2352. condition:=inverse_cond(taicpu(p).condition);
  2353. GetNextInstruction(p,hp1);
  2354. hp3:=p;
  2355. p:=hp1;
  2356. repeat
  2357. taicpu(hp1).opcode:=A_CMOVcc;
  2358. taicpu(hp1).condition:=condition;
  2359. GetNextInstruction(hp1,hp1);
  2360. until not(assigned(hp1)) or
  2361. not(CanBeCMOV(hp1));
  2362. { hp2 is still at jmp yyy }
  2363. GetNextInstruction(hp2,hp1);
  2364. { hp2 is now at xxx: }
  2365. condition:=inverse_cond(condition);
  2366. GetNextInstruction(hp1,hp1);
  2367. { hp1 is now at <several movs 2> }
  2368. repeat
  2369. taicpu(hp1).opcode:=A_CMOVcc;
  2370. taicpu(hp1).condition:=condition;
  2371. GetNextInstruction(hp1,hp1);
  2372. until not(assigned(hp1)) or
  2373. not(CanBeCMOV(hp1));
  2374. {
  2375. asml.remove(hp1.next)
  2376. hp1.next.free;
  2377. asml.remove(hp1);
  2378. hp1.free;
  2379. }
  2380. { remove jCC }
  2381. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2382. asml.remove(hp3);
  2383. hp3.free;
  2384. { remove jmp }
  2385. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2386. asml.remove(hp2);
  2387. hp2.free;
  2388. result:=true;
  2389. exit;
  2390. end;
  2391. end;
  2392. end;
  2393. end;
  2394. end;
  2395. {$endif i8086}
  2396. end;
  2397. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2398. var
  2399. hp1,hp2: tai;
  2400. begin
  2401. result:=false;
  2402. if (taicpu(p).oper[1]^.typ = top_reg) and
  2403. GetNextInstruction(p,hp1) and
  2404. (hp1.typ = ait_instruction) and
  2405. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2406. GetNextInstruction(hp1,hp2) and
  2407. MatchInstruction(hp2,A_MOV,[]) and
  2408. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2409. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2410. {$ifdef i386}
  2411. { not all registers have byte size sub registers on i386 }
  2412. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2413. {$endif i386}
  2414. (((taicpu(hp1).ops=2) and
  2415. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2416. ((taicpu(hp1).ops=1) and
  2417. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2418. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2419. begin
  2420. { change movsX/movzX reg/ref, reg2
  2421. add/sub/or/... reg3/$const, reg2
  2422. mov reg2 reg/ref
  2423. to add/sub/or/... reg3/$const, reg/ref }
  2424. { by example:
  2425. movswl %si,%eax movswl %si,%eax p
  2426. decl %eax addl %edx,%eax hp1
  2427. movw %ax,%si movw %ax,%si hp2
  2428. ->
  2429. movswl %si,%eax movswl %si,%eax p
  2430. decw %eax addw %edx,%eax hp1
  2431. movw %ax,%si movw %ax,%si hp2
  2432. }
  2433. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2434. {
  2435. ->
  2436. movswl %si,%eax movswl %si,%eax p
  2437. decw %si addw %dx,%si hp1
  2438. movw %ax,%si movw %ax,%si hp2
  2439. }
  2440. case taicpu(hp1).ops of
  2441. 1:
  2442. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2443. 2:
  2444. begin
  2445. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2446. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2447. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2448. end;
  2449. else
  2450. internalerror(2008042701);
  2451. end;
  2452. {
  2453. ->
  2454. decw %si addw %dx,%si p
  2455. }
  2456. DebugMsg(SPeepholeOptimization + 'var3',p);
  2457. asml.remove(p);
  2458. asml.remove(hp2);
  2459. p.free;
  2460. hp2.free;
  2461. p:=hp1;
  2462. end
  2463. else if taicpu(p).opcode=A_MOVZX then
  2464. begin
  2465. { removes superfluous And's after movzx's }
  2466. if (taicpu(p).oper[1]^.typ = top_reg) and
  2467. GetNextInstruction(p, hp1) and
  2468. (tai(hp1).typ = ait_instruction) and
  2469. (taicpu(hp1).opcode = A_AND) and
  2470. (taicpu(hp1).oper[0]^.typ = top_const) and
  2471. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2472. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2473. begin
  2474. case taicpu(p).opsize Of
  2475. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2476. if (taicpu(hp1).oper[0]^.val = $ff) then
  2477. begin
  2478. DebugMsg(SPeepholeOptimization + 'var4',p);
  2479. asml.remove(hp1);
  2480. hp1.free;
  2481. end;
  2482. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2483. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2484. begin
  2485. DebugMsg(SPeepholeOptimization + 'var5',p);
  2486. asml.remove(hp1);
  2487. hp1.free;
  2488. end;
  2489. {$ifdef x86_64}
  2490. S_LQ:
  2491. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2492. begin
  2493. if (cs_asm_source in current_settings.globalswitches) then
  2494. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  2495. asml.remove(hp1);
  2496. hp1.Free;
  2497. end;
  2498. {$endif x86_64}
  2499. end;
  2500. end;
  2501. { changes some movzx constructs to faster synonims (all examples
  2502. are given with eax/ax, but are also valid for other registers)}
  2503. if (taicpu(p).oper[1]^.typ = top_reg) then
  2504. if (taicpu(p).oper[0]^.typ = top_reg) then
  2505. case taicpu(p).opsize of
  2506. S_BW:
  2507. begin
  2508. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2509. not(cs_opt_size in current_settings.optimizerswitches) then
  2510. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2511. begin
  2512. taicpu(p).opcode := A_AND;
  2513. taicpu(p).changeopsize(S_W);
  2514. taicpu(p).loadConst(0,$ff);
  2515. DebugMsg(SPeepholeOptimization + 'var7',p);
  2516. end
  2517. else if GetNextInstruction(p, hp1) and
  2518. (tai(hp1).typ = ait_instruction) and
  2519. (taicpu(hp1).opcode = A_AND) and
  2520. (taicpu(hp1).oper[0]^.typ = top_const) and
  2521. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2522. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2523. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2524. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2525. begin
  2526. DebugMsg(SPeepholeOptimization + 'var8',p);
  2527. taicpu(p).opcode := A_MOV;
  2528. taicpu(p).changeopsize(S_W);
  2529. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2530. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2531. end;
  2532. end;
  2533. S_BL:
  2534. begin
  2535. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2536. not(cs_opt_size in current_settings.optimizerswitches) then
  2537. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2538. begin
  2539. taicpu(p).opcode := A_AND;
  2540. taicpu(p).changeopsize(S_L);
  2541. taicpu(p).loadConst(0,$ff)
  2542. end
  2543. else if GetNextInstruction(p, hp1) and
  2544. (tai(hp1).typ = ait_instruction) and
  2545. (taicpu(hp1).opcode = A_AND) and
  2546. (taicpu(hp1).oper[0]^.typ = top_const) and
  2547. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2548. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2549. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2550. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2551. begin
  2552. DebugMsg(SPeepholeOptimization + 'var10',p);
  2553. taicpu(p).opcode := A_MOV;
  2554. taicpu(p).changeopsize(S_L);
  2555. { do not use R_SUBWHOLE
  2556. as movl %rdx,%eax
  2557. is invalid in assembler PM }
  2558. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2559. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2560. end
  2561. end;
  2562. {$ifndef i8086}
  2563. S_WL:
  2564. begin
  2565. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2566. not(cs_opt_size in current_settings.optimizerswitches) then
  2567. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2568. begin
  2569. DebugMsg(SPeepholeOptimization + 'var11',p);
  2570. taicpu(p).opcode := A_AND;
  2571. taicpu(p).changeopsize(S_L);
  2572. taicpu(p).loadConst(0,$ffff);
  2573. end
  2574. else if GetNextInstruction(p, hp1) and
  2575. (tai(hp1).typ = ait_instruction) and
  2576. (taicpu(hp1).opcode = A_AND) and
  2577. (taicpu(hp1).oper[0]^.typ = top_const) and
  2578. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2579. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2580. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2581. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2582. begin
  2583. DebugMsg(SPeepholeOptimization + 'var12',p);
  2584. taicpu(p).opcode := A_MOV;
  2585. taicpu(p).changeopsize(S_L);
  2586. { do not use R_SUBWHOLE
  2587. as movl %rdx,%eax
  2588. is invalid in assembler PM }
  2589. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2590. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2591. end;
  2592. end;
  2593. {$endif i8086}
  2594. end
  2595. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2596. begin
  2597. if GetNextInstruction(p, hp1) and
  2598. (tai(hp1).typ = ait_instruction) and
  2599. (taicpu(hp1).opcode = A_AND) and
  2600. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2601. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2602. begin
  2603. taicpu(p).opcode := A_MOV;
  2604. case taicpu(p).opsize Of
  2605. S_BL:
  2606. begin
  2607. DebugMsg(SPeepholeOptimization + 'var13',p);
  2608. taicpu(p).changeopsize(S_L);
  2609. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2610. end;
  2611. S_WL:
  2612. begin
  2613. DebugMsg(SPeepholeOptimization + 'var14',p);
  2614. taicpu(p).changeopsize(S_L);
  2615. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2616. end;
  2617. S_BW:
  2618. begin
  2619. DebugMsg(SPeepholeOptimization + 'var15',p);
  2620. taicpu(p).changeopsize(S_W);
  2621. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2622. end;
  2623. {$ifdef x86_64}
  2624. S_BQ:
  2625. begin
  2626. DebugMsg(SPeepholeOptimization + 'var16',p);
  2627. taicpu(p).changeopsize(S_Q);
  2628. taicpu(hp1).loadConst(
  2629. 0, taicpu(hp1).oper[0]^.val and $ff);
  2630. end;
  2631. S_WQ:
  2632. begin
  2633. DebugMsg(SPeepholeOptimization + 'var17',p);
  2634. taicpu(p).changeopsize(S_Q);
  2635. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2636. end;
  2637. S_LQ:
  2638. begin
  2639. DebugMsg(SPeepholeOptimization + 'var18',p);
  2640. taicpu(p).changeopsize(S_Q);
  2641. taicpu(hp1).loadConst(
  2642. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2643. end;
  2644. {$endif x86_64}
  2645. else
  2646. Internalerror(2017050704)
  2647. end;
  2648. end;
  2649. end;
  2650. end;
  2651. end;
  2652. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2653. var
  2654. hp1 : tai;
  2655. RegName1, RegName2: string;
  2656. MaskLength : Cardinal;
  2657. begin
  2658. Result:=false;
  2659. if not(GetNextInstruction(p, hp1)) then
  2660. exit;
  2661. if MatchOpType(taicpu(p),top_const,top_reg) and
  2662. MatchInstruction(hp1,A_AND,[]) and
  2663. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2664. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2665. { the second register must contain the first one, so compare their subreg types }
  2666. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2667. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2668. { change
  2669. and const1, reg
  2670. and const2, reg
  2671. to
  2672. and (const1 and const2), reg
  2673. }
  2674. begin
  2675. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2676. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  2677. asml.remove(p);
  2678. p.Free;
  2679. p:=hp1;
  2680. Result:=true;
  2681. exit;
  2682. end
  2683. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2684. MatchInstruction(hp1,A_MOVZX,[]) and
  2685. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2686. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2687. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2688. (((taicpu(p).opsize=S_W) and
  2689. (taicpu(hp1).opsize=S_BW)) or
  2690. ((taicpu(p).opsize=S_L) and
  2691. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2692. {$ifdef x86_64}
  2693. or
  2694. ((taicpu(p).opsize=S_Q) and
  2695. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  2696. {$endif x86_64}
  2697. ) then
  2698. begin
  2699. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2700. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2701. ) or
  2702. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2703. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2704. then
  2705. begin
  2706. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  2707. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  2708. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  2709. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  2710. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  2711. }
  2712. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  2713. asml.remove(hp1);
  2714. hp1.free;
  2715. end;
  2716. end
  2717. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2718. MatchInstruction(hp1,A_SHL,[]) and
  2719. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2720. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  2721. begin
  2722. { get length of potential and mask }
  2723. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  2724. { really a mask? }
  2725. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  2726. { unmasked part shifted out? }
  2727. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  2728. begin
  2729. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  2730. { take care of the register (de)allocs following p }
  2731. UpdateUsedRegs(tai(p.next));
  2732. asml.remove(p);
  2733. p.free;
  2734. p:=hp1;
  2735. Result:=true;
  2736. exit;
  2737. end;
  2738. end
  2739. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2740. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2741. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2742. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2743. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2744. (((taicpu(p).opsize=S_W) and
  2745. (taicpu(hp1).opsize=S_BW)) or
  2746. ((taicpu(p).opsize=S_L) and
  2747. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2748. {$ifdef x86_64}
  2749. or
  2750. ((taicpu(p).opsize=S_Q) and
  2751. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2752. {$endif x86_64}
  2753. ) then
  2754. begin
  2755. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2756. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2757. ) or
  2758. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2759. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2760. {$ifdef x86_64}
  2761. or
  2762. (((taicpu(hp1).opsize)=S_LQ) and
  2763. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2764. )
  2765. {$endif x86_64}
  2766. then
  2767. begin
  2768. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  2769. asml.remove(hp1);
  2770. hp1.free;
  2771. end;
  2772. end
  2773. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2774. (hp1.typ = ait_instruction) and
  2775. (taicpu(hp1).is_jmp) and
  2776. (taicpu(hp1).opcode<>A_JMP) and
  2777. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2778. { change
  2779. and x, reg
  2780. jxx
  2781. to
  2782. test x, reg
  2783. jxx
  2784. if reg is deallocated before the
  2785. jump, but only if it's a conditional jump (PFV)
  2786. }
  2787. taicpu(p).opcode := A_TEST;
  2788. end;
  2789. function TX86AsmOptimizer.PostPeepholeOptLea(const p : tai) : Boolean;
  2790. begin
  2791. Result:=false;
  2792. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  2793. MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2794. (taicpu(p).oper[0]^.ref^.index<>NR_NO) then
  2795. begin
  2796. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.base);
  2797. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.index);
  2798. taicpu(p).opcode:=A_ADD;
  2799. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',p);
  2800. result:=true;
  2801. end
  2802. else if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) and
  2803. MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) and
  2804. (taicpu(p).oper[0]^.ref^.base<>NR_NO) then
  2805. begin
  2806. taicpu(p).loadreg(1,taicpu(p).oper[0]^.ref^.index);
  2807. taicpu(p).loadreg(0,taicpu(p).oper[0]^.ref^.base);
  2808. taicpu(p).opcode:=A_ADD;
  2809. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',p);
  2810. result:=true;
  2811. end;
  2812. end;
  2813. function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
  2814. var
  2815. Value, RegName: string;
  2816. begin
  2817. Result:=false;
  2818. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  2819. begin
  2820. case taicpu(p).oper[0]^.val of
  2821. 0:
  2822. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  2823. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2824. begin
  2825. { change "mov $0,%reg" into "xor %reg,%reg" }
  2826. taicpu(p).opcode := A_XOR;
  2827. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2828. Result := True;
  2829. end;
  2830. $1..$FFFFFFFF:
  2831. begin
  2832. { Code size reduction by J. Gareth "Kit" Moreton }
  2833. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2834. case taicpu(p).opsize of
  2835. S_Q:
  2836. begin
  2837. RegName := std_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  2838. Value := tostr(taicpu(p).oper[0]^.val);
  2839. { The actual optimization }
  2840. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2841. taicpu(p).changeopsize(S_L);
  2842. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',%' + RegName + ' -> movl $' + Value + ',%' + std_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  2843. Result := True;
  2844. end;
  2845. end;
  2846. end;
  2847. end;
  2848. end;
  2849. end;
  2850. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  2851. begin
  2852. Result:=false;
  2853. { change "cmp $0, %reg" to "test %reg, %reg" }
  2854. if MatchOpType(taicpu(p),top_const,top_reg) and
  2855. (taicpu(p).oper[0]^.val = 0) then
  2856. begin
  2857. taicpu(p).opcode := A_TEST;
  2858. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2859. Result:=true;
  2860. end;
  2861. end;
  2862. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  2863. var
  2864. IsTestConstX : Boolean;
  2865. hp1,hp2 : tai;
  2866. begin
  2867. Result:=false;
  2868. { removes the line marked with (x) from the sequence
  2869. and/or/xor/add/sub/... $x, %y
  2870. test/or %y, %y | test $-1, %y (x)
  2871. j(n)z _Label
  2872. as the first instruction already adjusts the ZF
  2873. %y operand may also be a reference }
  2874. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2875. MatchOperand(taicpu(p).oper[0]^,-1);
  2876. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2877. GetLastInstruction(p, hp1) and
  2878. (tai(hp1).typ = ait_instruction) and
  2879. GetNextInstruction(p,hp2) and
  2880. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2881. case taicpu(hp1).opcode Of
  2882. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2883. begin
  2884. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2885. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2886. { and in case of carry for A(E)/B(E)/C/NC }
  2887. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2888. ((taicpu(hp1).opcode <> A_ADD) and
  2889. (taicpu(hp1).opcode <> A_SUB))) then
  2890. begin
  2891. hp1 := tai(p.next);
  2892. asml.remove(p);
  2893. p.free;
  2894. p := tai(hp1);
  2895. Result:=true;
  2896. end;
  2897. end;
  2898. A_SHL, A_SAL, A_SHR, A_SAR:
  2899. begin
  2900. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2901. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2902. { therefore, it's only safe to do this optimization for }
  2903. { shifts by a (nonzero) constant }
  2904. (taicpu(hp1).oper[0]^.typ = top_const) and
  2905. (taicpu(hp1).oper[0]^.val <> 0) and
  2906. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2907. { and in case of carry for A(E)/B(E)/C/NC }
  2908. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2909. begin
  2910. hp1 := tai(p.next);
  2911. asml.remove(p);
  2912. p.free;
  2913. p := tai(hp1);
  2914. Result:=true;
  2915. end;
  2916. end;
  2917. A_DEC, A_INC, A_NEG:
  2918. begin
  2919. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2920. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2921. { and in case of carry for A(E)/B(E)/C/NC }
  2922. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2923. begin
  2924. case taicpu(hp1).opcode Of
  2925. A_DEC, A_INC:
  2926. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  2927. begin
  2928. case taicpu(hp1).opcode Of
  2929. A_DEC: taicpu(hp1).opcode := A_SUB;
  2930. A_INC: taicpu(hp1).opcode := A_ADD;
  2931. end;
  2932. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2933. taicpu(hp1).loadConst(0,1);
  2934. taicpu(hp1).ops:=2;
  2935. end
  2936. end;
  2937. hp1 := tai(p.next);
  2938. asml.remove(p);
  2939. p.free;
  2940. p := tai(hp1);
  2941. Result:=true;
  2942. end;
  2943. end
  2944. else
  2945. { change "test $-1,%reg" into "test %reg,%reg" }
  2946. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2947. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2948. end { case }
  2949. { change "test $-1,%reg" into "test %reg,%reg" }
  2950. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2951. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2952. end;
  2953. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  2954. var
  2955. hp1 : tai;
  2956. hp2 : taicpu;
  2957. begin
  2958. Result:=false;
  2959. {$ifndef x86_64}
  2960. { don't do this on modern CPUs, this really hurts them due to
  2961. broken call/ret pairing }
  2962. if (current_settings.optimizecputype < cpu_Pentium2) and
  2963. not(cs_create_pic in current_settings.moduleswitches) and
  2964. GetNextInstruction(p, hp1) and
  2965. MatchInstruction(hp1,A_JMP,[S_NO]) and
  2966. MatchOpType(taicpu(hp1),top_ref) and
  2967. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  2968. begin
  2969. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2970. InsertLLItem(p.previous, p, hp2);
  2971. taicpu(p).opcode := A_JMP;
  2972. taicpu(p).is_jmp := true;
  2973. asml.remove(hp1);
  2974. hp1.free;
  2975. Result:=true;
  2976. end
  2977. else
  2978. {$endif x86_64}
  2979. { replace
  2980. call procname
  2981. ret
  2982. by
  2983. jmp procname
  2984. this should never hurt except when pic is used, not sure
  2985. how to handle it then
  2986. but do it only on level 4 because it destroys stack back traces
  2987. }
  2988. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2989. not(cs_create_pic in current_settings.moduleswitches) and
  2990. GetNextInstruction(p, hp1) and
  2991. MatchInstruction(hp1,A_RET,[S_NO]) and
  2992. (taicpu(hp1).ops=0) then
  2993. begin
  2994. taicpu(p).opcode := A_JMP;
  2995. taicpu(p).is_jmp := true;
  2996. asml.remove(hp1);
  2997. hp1.free;
  2998. Result:=true;
  2999. end;
  3000. end;
  3001. {$ifdef x86_64}
  3002. function TX86AsmOptimizer.PostPeepholeOptMovzx(const p : tai) : Boolean;
  3003. var
  3004. PreMessage: string;
  3005. begin
  3006. Result := False;
  3007. { Code size reduction by J. Gareth "Kit" Moreton }
  3008. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  3009. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  3010. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  3011. then
  3012. begin
  3013. { Has 64-bit register name and opcode suffix }
  3014. PreMessage := 'movz' + gas_opsize2str[taicpu(p).opsize] + ' x,%' + std_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  3015. { The actual optimization }
  3016. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3017. if taicpu(p).opsize = S_BQ then
  3018. taicpu(p).changeopsize(S_BL)
  3019. else
  3020. taicpu(p).changeopsize(S_WL);
  3021. DebugMsg(SPeepholeOptimization + PreMessage +
  3022. gas_opsize2str[taicpu(p).opsize] + ' x,%' + std_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  3023. end;
  3024. end;
  3025. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  3026. var
  3027. PreMessage, RegName: string;
  3028. begin
  3029. { Code size reduction by J. Gareth "Kit" Moreton }
  3030. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  3031. as this removes the REX prefix }
  3032. Result := False;
  3033. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  3034. Exit;
  3035. if taicpu(p).oper[0]^.typ <> top_reg then
  3036. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  3037. InternalError(2018011500);
  3038. case taicpu(p).opsize of
  3039. S_Q:
  3040. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  3041. begin
  3042. RegName := std_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  3043. PreMessage := 'xorq %' + RegName + ',%' + RegName + ' -> xorl %';
  3044. { The actual optimization }
  3045. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3046. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3047. taicpu(p).changeopsize(S_L);
  3048. RegName := std_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  3049. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',%' + RegName + ' (removes REX prefix)', p);
  3050. end;
  3051. end;
  3052. end;
  3053. {$endif}
  3054. procedure TX86AsmOptimizer.OptReferences;
  3055. var
  3056. p: tai;
  3057. i: Integer;
  3058. begin
  3059. p := BlockStart;
  3060. while (p <> BlockEnd) Do
  3061. begin
  3062. if p.typ=ait_instruction then
  3063. begin
  3064. for i:=0 to taicpu(p).ops-1 do
  3065. if taicpu(p).oper[i]^.typ=top_ref then
  3066. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  3067. end;
  3068. p:=tai(p.next);
  3069. end;
  3070. end;
  3071. end.