aoptx86.pas 133 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function DoSubAddOpt(var p : tai) : Boolean;
  46. function PrePeepholeOptSxx(var p : tai) : boolean;
  47. function OptPass1AND(var p : tai) : boolean;
  48. function OptPass1VMOVAP(var p : tai) : boolean;
  49. function OptPass1VOP(const p : tai) : boolean;
  50. function OptPass1MOV(var p : tai) : boolean;
  51. function OptPass1Movx(var p : tai) : boolean;
  52. function OptPass1MOVAP(var p : tai) : boolean;
  53. function OptPass1MOVXX(var p : tai) : boolean;
  54. function OptPass1OP(const p : tai) : boolean;
  55. function OptPass1LEA(var p : tai) : boolean;
  56. function OptPass1Sub(var p : tai) : boolean;
  57. function OptPass2MOV(var p : tai) : boolean;
  58. function OptPass2Imul(var p : tai) : boolean;
  59. function OptPass2Jmp(var p : tai) : boolean;
  60. function OptPass2Jcc(var p : tai) : boolean;
  61. function PostPeepholeOptMov(const p : tai) : Boolean;
  62. {$ifdef x86_64} { These post-peephole optimisations only affect 64-bit registers. [Kit] }
  63. function PostPeepholeOptMovzx(const p : tai) : Boolean;
  64. function PostPeepholeOptXor(var p : tai) : Boolean;
  65. {$endif}
  66. function PostPeepholeOptCmp(var p : tai) : Boolean;
  67. function PostPeepholeOptTestOr(var p : tai) : Boolean;
  68. function PostPeepholeOptCall(var p : tai) : Boolean;
  69. procedure OptReferences;
  70. end;
  71. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  72. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  73. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  74. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  75. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  76. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  77. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  78. function RefsEqual(const r1, r2: treference): boolean;
  79. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  80. { returns true, if ref is a reference using only the registers passed as base and index
  81. and having an offset }
  82. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  83. const
  84. SPeepholeOptimization: string = 'Peephole Optimization: ';
  85. implementation
  86. uses
  87. cutils,verbose,
  88. globals,
  89. cpuinfo,
  90. procinfo,
  91. aasmbase,
  92. aoptutils,
  93. symconst,symsym,
  94. cgx86,
  95. itcpugas;
  96. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  97. begin
  98. result :=
  99. (instr.typ = ait_instruction) and
  100. (taicpu(instr).opcode = op) and
  101. ((opsize = []) or (taicpu(instr).opsize in opsize));
  102. end;
  103. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  104. begin
  105. result :=
  106. (instr.typ = ait_instruction) and
  107. ((taicpu(instr).opcode = op1) or
  108. (taicpu(instr).opcode = op2)
  109. ) and
  110. ((opsize = []) or (taicpu(instr).opsize in opsize));
  111. end;
  112. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  113. begin
  114. result :=
  115. (instr.typ = ait_instruction) and
  116. ((taicpu(instr).opcode = op1) or
  117. (taicpu(instr).opcode = op2) or
  118. (taicpu(instr).opcode = op3)
  119. ) and
  120. ((opsize = []) or (taicpu(instr).opsize in opsize));
  121. end;
  122. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  123. const opsize : topsizes) : boolean;
  124. var
  125. op : TAsmOp;
  126. begin
  127. result:=false;
  128. for op in ops do
  129. begin
  130. if (instr.typ = ait_instruction) and
  131. (taicpu(instr).opcode = op) and
  132. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  133. begin
  134. result:=true;
  135. exit;
  136. end;
  137. end;
  138. end;
  139. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  140. begin
  141. result := (oper.typ = top_reg) and (oper.reg = reg);
  142. end;
  143. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  144. begin
  145. result := (oper.typ = top_const) and (oper.val = a);
  146. end;
  147. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  148. begin
  149. result := oper1.typ = oper2.typ;
  150. if result then
  151. case oper1.typ of
  152. top_const:
  153. Result:=oper1.val = oper2.val;
  154. top_reg:
  155. Result:=oper1.reg = oper2.reg;
  156. top_ref:
  157. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  158. else
  159. internalerror(2013102801);
  160. end
  161. end;
  162. function RefsEqual(const r1, r2: treference): boolean;
  163. begin
  164. RefsEqual :=
  165. (r1.offset = r2.offset) and
  166. (r1.segment = r2.segment) and (r1.base = r2.base) and
  167. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  168. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  169. (r1.relsymbol = r2.relsymbol);
  170. end;
  171. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  172. begin
  173. Result:=(ref.offset=0) and
  174. (ref.scalefactor in [0,1]) and
  175. (ref.segment=NR_NO) and
  176. (ref.symbol=nil) and
  177. (ref.relsymbol=nil) and
  178. ((base=NR_INVALID) or
  179. (ref.base=base)) and
  180. ((index=NR_INVALID) or
  181. (ref.index=index));
  182. end;
  183. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  184. begin
  185. Result:=(ref.scalefactor in [0,1]) and
  186. (ref.segment=NR_NO) and
  187. (ref.symbol=nil) and
  188. (ref.relsymbol=nil) and
  189. ((base=NR_INVALID) or
  190. (ref.base=base)) and
  191. ((index=NR_INVALID) or
  192. (ref.index=index));
  193. end;
  194. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  195. begin
  196. Result:=RegReadByInstruction(reg,hp);
  197. end;
  198. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  199. var
  200. p: taicpu;
  201. opcount: longint;
  202. begin
  203. RegReadByInstruction := false;
  204. if hp.typ <> ait_instruction then
  205. exit;
  206. p := taicpu(hp);
  207. case p.opcode of
  208. A_CALL:
  209. regreadbyinstruction := true;
  210. A_IMUL:
  211. case p.ops of
  212. 1:
  213. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  214. (
  215. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  216. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  217. );
  218. 2,3:
  219. regReadByInstruction :=
  220. reginop(reg,p.oper[0]^) or
  221. reginop(reg,p.oper[1]^);
  222. end;
  223. A_MUL:
  224. begin
  225. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  226. (
  227. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  228. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  229. );
  230. end;
  231. A_IDIV,A_DIV:
  232. begin
  233. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  234. (
  235. (getregtype(reg)=R_INTREGISTER) and
  236. (
  237. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  238. )
  239. );
  240. end;
  241. else
  242. begin
  243. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  244. begin
  245. RegReadByInstruction := false;
  246. exit;
  247. end;
  248. for opcount := 0 to p.ops-1 do
  249. if (p.oper[opCount]^.typ = top_ref) and
  250. RegInRef(reg,p.oper[opcount]^.ref^) then
  251. begin
  252. RegReadByInstruction := true;
  253. exit
  254. end;
  255. { special handling for SSE MOVSD }
  256. if (p.opcode=A_MOVSD) and (p.ops>0) then
  257. begin
  258. if p.ops<>2 then
  259. internalerror(2017042702);
  260. regReadByInstruction := reginop(reg,p.oper[0]^) or
  261. (
  262. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  263. );
  264. exit;
  265. end;
  266. with insprop[p.opcode] do
  267. begin
  268. if getregtype(reg)=R_INTREGISTER then
  269. begin
  270. case getsupreg(reg) of
  271. RS_EAX:
  272. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  273. begin
  274. RegReadByInstruction := true;
  275. exit
  276. end;
  277. RS_ECX:
  278. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  279. begin
  280. RegReadByInstruction := true;
  281. exit
  282. end;
  283. RS_EDX:
  284. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  285. begin
  286. RegReadByInstruction := true;
  287. exit
  288. end;
  289. RS_EBX:
  290. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  291. begin
  292. RegReadByInstruction := true;
  293. exit
  294. end;
  295. RS_ESP:
  296. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  297. begin
  298. RegReadByInstruction := true;
  299. exit
  300. end;
  301. RS_EBP:
  302. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  303. begin
  304. RegReadByInstruction := true;
  305. exit
  306. end;
  307. RS_ESI:
  308. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  309. begin
  310. RegReadByInstruction := true;
  311. exit
  312. end;
  313. RS_EDI:
  314. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  315. begin
  316. RegReadByInstruction := true;
  317. exit
  318. end;
  319. end;
  320. end;
  321. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  322. begin
  323. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  324. begin
  325. case p.condition of
  326. C_A,C_NBE, { CF=0 and ZF=0 }
  327. C_BE,C_NA: { CF=1 or ZF=1 }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  329. C_AE,C_NB,C_NC, { CF=0 }
  330. C_B,C_NAE,C_C: { CF=1 }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  332. C_NE,C_NZ, { ZF=0 }
  333. C_E,C_Z: { ZF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  335. C_G,C_NLE, { ZF=0 and SF=OF }
  336. C_LE,C_NG: { ZF=1 or SF<>OF }
  337. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  338. C_GE,C_NL, { SF=OF }
  339. C_L,C_NGE: { SF<>OF }
  340. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  341. C_NO, { OF=0 }
  342. C_O: { OF=1 }
  343. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  344. C_NP,C_PO, { PF=0 }
  345. C_P,C_PE: { PF=1 }
  346. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  347. C_NS, { SF=0 }
  348. C_S: { SF=1 }
  349. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  350. else
  351. internalerror(2017042701);
  352. end;
  353. if RegReadByInstruction then
  354. exit;
  355. end;
  356. case getsubreg(reg) of
  357. R_SUBW,R_SUBD,R_SUBQ:
  358. RegReadByInstruction :=
  359. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  360. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  361. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  362. R_SUBFLAGCARRY:
  363. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  364. R_SUBFLAGPARITY:
  365. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  366. R_SUBFLAGAUXILIARY:
  367. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  368. R_SUBFLAGZERO:
  369. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  370. R_SUBFLAGSIGN:
  371. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  372. R_SUBFLAGOVERFLOW:
  373. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  374. R_SUBFLAGINTERRUPT:
  375. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  376. R_SUBFLAGDIRECTION:
  377. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  378. else
  379. internalerror(2017042601);
  380. end;
  381. exit;
  382. end;
  383. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  384. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  385. (p.oper[0]^.reg=p.oper[1]^.reg) then
  386. exit;
  387. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  388. begin
  389. RegReadByInstruction := true;
  390. exit
  391. end;
  392. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  393. begin
  394. RegReadByInstruction := true;
  395. exit
  396. end;
  397. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  398. begin
  399. RegReadByInstruction := true;
  400. exit
  401. end;
  402. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  403. begin
  404. RegReadByInstruction := true;
  405. exit
  406. end;
  407. end;
  408. end;
  409. end;
  410. end;
  411. {$ifdef DEBUG_AOPTCPU}
  412. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  413. begin
  414. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  415. end;
  416. {$else DEBUG_AOPTCPU}
  417. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  418. begin
  419. end;
  420. {$endif DEBUG_AOPTCPU}
  421. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  422. begin
  423. if not SuperRegistersEqual(reg1,reg2) then
  424. exit(false);
  425. if getregtype(reg1)<>R_INTREGISTER then
  426. exit(true); {because SuperRegisterEqual is true}
  427. case getsubreg(reg1) of
  428. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  429. higher, it preserves the high bits, so the new value depends on
  430. reg2's previous value. In other words, it is equivalent to doing:
  431. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  432. R_SUBL:
  433. exit(getsubreg(reg2)=R_SUBL);
  434. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  435. higher, it actually does a:
  436. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  437. R_SUBH:
  438. exit(getsubreg(reg2)=R_SUBH);
  439. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  440. bits of reg2:
  441. reg2 := (reg2 and $ffff0000) or word(reg1); }
  442. R_SUBW:
  443. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  444. { a write to R_SUBD always overwrites every other subregister,
  445. because it clears the high 32 bits of R_SUBQ on x86_64 }
  446. R_SUBD,
  447. R_SUBQ:
  448. exit(true);
  449. else
  450. internalerror(2017042801);
  451. end;
  452. end;
  453. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  454. begin
  455. if not SuperRegistersEqual(reg1,reg2) then
  456. exit(false);
  457. if getregtype(reg1)<>R_INTREGISTER then
  458. exit(true); {because SuperRegisterEqual is true}
  459. case getsubreg(reg1) of
  460. R_SUBL:
  461. exit(getsubreg(reg2)<>R_SUBH);
  462. R_SUBH:
  463. exit(getsubreg(reg2)<>R_SUBL);
  464. R_SUBW,
  465. R_SUBD,
  466. R_SUBQ:
  467. exit(true);
  468. else
  469. internalerror(2017042802);
  470. end;
  471. end;
  472. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  473. var
  474. hp1 : tai;
  475. l : TCGInt;
  476. begin
  477. result:=false;
  478. { changes the code sequence
  479. shr/sar const1, x
  480. shl const2, x
  481. to
  482. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  483. if GetNextInstruction(p, hp1) and
  484. MatchInstruction(hp1,A_SHL,[]) and
  485. (taicpu(p).oper[0]^.typ = top_const) and
  486. (taicpu(hp1).oper[0]^.typ = top_const) and
  487. (taicpu(hp1).opsize = taicpu(p).opsize) and
  488. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  489. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  490. begin
  491. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  492. not(cs_opt_size in current_settings.optimizerswitches) then
  493. begin
  494. { shr/sar const1, %reg
  495. shl const2, %reg
  496. with const1 > const2 }
  497. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  498. taicpu(hp1).opcode := A_AND;
  499. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  500. case taicpu(p).opsize Of
  501. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  502. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  503. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  504. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  505. else
  506. Internalerror(2017050703)
  507. end;
  508. end
  509. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  510. not(cs_opt_size in current_settings.optimizerswitches) then
  511. begin
  512. { shr/sar const1, %reg
  513. shl const2, %reg
  514. with const1 < const2 }
  515. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  516. taicpu(p).opcode := A_AND;
  517. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  518. case taicpu(p).opsize Of
  519. S_B: taicpu(p).loadConst(0,l Xor $ff);
  520. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  521. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  522. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  523. else
  524. Internalerror(2017050702)
  525. end;
  526. end
  527. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  528. begin
  529. { shr/sar const1, %reg
  530. shl const2, %reg
  531. with const1 = const2 }
  532. taicpu(p).opcode := A_AND;
  533. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  534. case taicpu(p).opsize Of
  535. S_B: taicpu(p).loadConst(0,l Xor $ff);
  536. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  537. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  538. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  539. else
  540. Internalerror(2017050701)
  541. end;
  542. asml.remove(hp1);
  543. hp1.free;
  544. end;
  545. end;
  546. end;
  547. { allocates register reg between (and including) instructions p1 and p2
  548. the type of p1 and p2 must not be in SkipInstr
  549. note that this routine is both called from the peephole optimizer
  550. where optinfo is not yet initialised) and from the cse (where it is) }
  551. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  552. var
  553. hp, start: tai;
  554. removedsomething,
  555. firstRemovedWasAlloc,
  556. lastRemovedWasDealloc: boolean;
  557. begin
  558. {$ifdef EXTDEBUG}
  559. { if assigned(p1.optinfo) and
  560. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  561. internalerror(2004101010); }
  562. {$endif EXTDEBUG}
  563. start := p1;
  564. if (reg = NR_ESP) or
  565. (reg = current_procinfo.framepointer) or
  566. not(assigned(p1)) then
  567. { this happens with registers which are loaded implicitely, outside the }
  568. { current block (e.g. esi with self) }
  569. exit;
  570. { make sure we allocate it for this instruction }
  571. getnextinstruction(p2,p2);
  572. lastRemovedWasDealloc := false;
  573. removedSomething := false;
  574. firstRemovedWasAlloc := false;
  575. {$ifdef allocregdebug}
  576. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  577. ' from here...'));
  578. insertllitem(asml,p1.previous,p1,hp);
  579. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  580. ' till here...'));
  581. insertllitem(asml,p2,p2.next,hp);
  582. {$endif allocregdebug}
  583. { do it the safe way: always allocate the full super register,
  584. as we do no register re-allocation in the peephole optimizer,
  585. this does not hurt
  586. }
  587. case getregtype(reg) of
  588. R_MMREGISTER:
  589. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  590. R_INTREGISTER:
  591. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  592. end;
  593. if not(RegInUsedRegs(reg,initialusedregs)) then
  594. begin
  595. hp := tai_regalloc.alloc(reg,nil);
  596. insertllItem(p1.previous,p1,hp);
  597. IncludeRegInUsedRegs(reg,initialusedregs);
  598. end;
  599. while assigned(p1) and
  600. (p1 <> p2) do
  601. begin
  602. if assigned(p1.optinfo) then
  603. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  604. p1 := tai(p1.next);
  605. repeat
  606. while assigned(p1) and
  607. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  608. p1 := tai(p1.next);
  609. { remove all allocation/deallocation info about the register in between }
  610. if assigned(p1) and
  611. (p1.typ = ait_regalloc) then
  612. begin
  613. { same super register, different sub register? }
  614. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  615. begin
  616. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  617. internalerror(2016101501);
  618. tai_regalloc(p1).reg:=reg;
  619. end;
  620. if tai_regalloc(p1).reg=reg then
  621. begin
  622. if not removedSomething then
  623. begin
  624. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  625. removedSomething := true;
  626. end;
  627. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  628. hp := tai(p1.Next);
  629. asml.Remove(p1);
  630. p1.free;
  631. p1 := hp;
  632. end
  633. else
  634. p1 := tai(p1.next);
  635. end;
  636. until not(assigned(p1)) or
  637. not(p1.typ in SkipInstr);
  638. end;
  639. if assigned(p1) then
  640. begin
  641. if firstRemovedWasAlloc then
  642. begin
  643. hp := tai_regalloc.Alloc(reg,nil);
  644. insertLLItem(start.previous,start,hp);
  645. end;
  646. if lastRemovedWasDealloc then
  647. begin
  648. hp := tai_regalloc.DeAlloc(reg,nil);
  649. insertLLItem(p1.previous,p1,hp);
  650. end;
  651. end;
  652. end;
  653. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  654. var
  655. p: taicpu;
  656. begin
  657. if not assigned(hp) or
  658. (hp.typ <> ait_instruction) then
  659. begin
  660. Result := false;
  661. exit;
  662. end;
  663. p := taicpu(hp);
  664. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  665. with insprop[p.opcode] do
  666. begin
  667. case getsubreg(reg) of
  668. R_SUBW,R_SUBD,R_SUBQ:
  669. Result:=
  670. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  671. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  672. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  673. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  674. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  675. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  676. R_SUBFLAGCARRY:
  677. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  678. R_SUBFLAGPARITY:
  679. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  680. R_SUBFLAGAUXILIARY:
  681. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  682. R_SUBFLAGZERO:
  683. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  684. R_SUBFLAGSIGN:
  685. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  686. R_SUBFLAGOVERFLOW:
  687. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  688. R_SUBFLAGINTERRUPT:
  689. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  690. R_SUBFLAGDIRECTION:
  691. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  692. else
  693. internalerror(2017050501);
  694. end;
  695. exit;
  696. end;
  697. Result :=
  698. (((p.opcode = A_MOV) or
  699. (p.opcode = A_MOVZX) or
  700. (p.opcode = A_MOVSX) or
  701. (p.opcode = A_LEA) or
  702. (p.opcode = A_VMOVSS) or
  703. (p.opcode = A_VMOVSD) or
  704. (p.opcode = A_VMOVAPD) or
  705. (p.opcode = A_VMOVAPS) or
  706. (p.opcode = A_VMOVQ) or
  707. (p.opcode = A_MOVSS) or
  708. (p.opcode = A_MOVSD) or
  709. (p.opcode = A_MOVQ) or
  710. (p.opcode = A_MOVAPD) or
  711. (p.opcode = A_MOVAPS) or
  712. {$ifndef x86_64}
  713. (p.opcode = A_LDS) or
  714. (p.opcode = A_LES) or
  715. {$endif not x86_64}
  716. (p.opcode = A_LFS) or
  717. (p.opcode = A_LGS) or
  718. (p.opcode = A_LSS)) and
  719. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  720. (p.oper[1]^.typ = top_reg) and
  721. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  722. ((p.oper[0]^.typ = top_const) or
  723. ((p.oper[0]^.typ = top_reg) and
  724. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  725. ((p.oper[0]^.typ = top_ref) and
  726. not RegInRef(reg,p.oper[0]^.ref^)))) or
  727. ((p.opcode = A_POP) and
  728. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  729. ((p.opcode = A_IMUL) and
  730. (p.ops=3) and
  731. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  732. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  733. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  734. ((((p.opcode = A_IMUL) or
  735. (p.opcode = A_MUL)) and
  736. (p.ops=1)) and
  737. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  738. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  739. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  740. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  741. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  742. {$ifdef x86_64}
  743. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  744. {$endif x86_64}
  745. )) or
  746. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  747. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  748. {$ifdef x86_64}
  749. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  750. {$endif x86_64}
  751. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  752. {$ifndef x86_64}
  753. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  754. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  755. {$endif not x86_64}
  756. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  757. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  758. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  759. {$ifndef x86_64}
  760. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  761. {$endif not x86_64}
  762. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  763. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  764. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  765. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  766. {$ifdef x86_64}
  767. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  768. {$endif x86_64}
  769. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  770. (((p.opcode = A_FSTSW) or
  771. (p.opcode = A_FNSTSW)) and
  772. (p.oper[0]^.typ=top_reg) and
  773. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  774. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  775. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  776. (p.oper[0]^.reg=p.oper[1]^.reg) and
  777. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  778. end;
  779. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  780. var
  781. hp2,hp3 : tai;
  782. begin
  783. { some x86-64 issue a NOP before the real exit code }
  784. if MatchInstruction(p,A_NOP,[]) then
  785. GetNextInstruction(p,p);
  786. result:=assigned(p) and (p.typ=ait_instruction) and
  787. ((taicpu(p).opcode = A_RET) or
  788. ((taicpu(p).opcode=A_LEAVE) and
  789. GetNextInstruction(p,hp2) and
  790. MatchInstruction(hp2,A_RET,[S_NO])
  791. ) or
  792. ((((taicpu(p).opcode=A_MOV) and
  793. MatchOpType(taicpu(p),top_reg,top_reg) and
  794. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  795. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  796. ((taicpu(p).opcode=A_LEA) and
  797. MatchOpType(taicpu(p),top_ref,top_reg) and
  798. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  799. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  800. )
  801. ) and
  802. GetNextInstruction(p,hp2) and
  803. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  804. MatchOpType(taicpu(hp2),top_reg) and
  805. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  806. GetNextInstruction(hp2,hp3) and
  807. MatchInstruction(hp3,A_RET,[S_NO])
  808. )
  809. );
  810. end;
  811. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  812. begin
  813. isFoldableArithOp := False;
  814. case hp1.opcode of
  815. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  816. isFoldableArithOp :=
  817. ((taicpu(hp1).oper[0]^.typ = top_const) or
  818. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  819. (taicpu(hp1).oper[0]^.reg <> reg))) and
  820. (taicpu(hp1).oper[1]^.typ = top_reg) and
  821. (taicpu(hp1).oper[1]^.reg = reg);
  822. A_INC,A_DEC,A_NEG,A_NOT:
  823. isFoldableArithOp :=
  824. (taicpu(hp1).oper[0]^.typ = top_reg) and
  825. (taicpu(hp1).oper[0]^.reg = reg);
  826. end;
  827. end;
  828. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  829. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  830. var
  831. hp2: tai;
  832. begin
  833. hp2 := p;
  834. repeat
  835. hp2 := tai(hp2.previous);
  836. if assigned(hp2) and
  837. (hp2.typ = ait_regalloc) and
  838. (tai_regalloc(hp2).ratype=ra_dealloc) and
  839. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  840. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  841. begin
  842. asml.remove(hp2);
  843. hp2.free;
  844. break;
  845. end;
  846. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  847. end;
  848. begin
  849. case current_procinfo.procdef.returndef.typ of
  850. arraydef,recorddef,pointerdef,
  851. stringdef,enumdef,procdef,objectdef,errordef,
  852. filedef,setdef,procvardef,
  853. classrefdef,forwarddef:
  854. DoRemoveLastDeallocForFuncRes(RS_EAX);
  855. orddef:
  856. if current_procinfo.procdef.returndef.size <> 0 then
  857. begin
  858. DoRemoveLastDeallocForFuncRes(RS_EAX);
  859. { for int64/qword }
  860. if current_procinfo.procdef.returndef.size = 8 then
  861. DoRemoveLastDeallocForFuncRes(RS_EDX);
  862. end;
  863. end;
  864. end;
  865. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  866. var
  867. TmpUsedRegs : TAllUsedRegs;
  868. hp1,hp2 : tai;
  869. alloc ,dealloc: tai_regalloc;
  870. begin
  871. result:=false;
  872. if MatchOpType(taicpu(p),top_reg,top_reg) and
  873. GetNextInstruction(p, hp1) and
  874. (hp1.typ = ait_instruction) and
  875. GetNextInstruction(hp1, hp2) and
  876. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  877. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  878. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  879. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  880. (((taicpu(p).opcode=A_MOVAPS) and
  881. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  882. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  883. ((taicpu(p).opcode=A_MOVAPD) and
  884. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  885. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  886. ) then
  887. { change
  888. movapX reg,reg2
  889. addsX/subsX/... reg3, reg2
  890. movapX reg2,reg
  891. to
  892. addsX/subsX/... reg3,reg
  893. }
  894. begin
  895. CopyUsedRegs(TmpUsedRegs);
  896. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  897. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  898. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  899. begin
  900. DebugMsg(SPeepholeOptimization + 'MovapXOpMovapX2Op ('+
  901. std_op2str[taicpu(p).opcode]+' '+
  902. std_op2str[taicpu(hp1).opcode]+' '+
  903. std_op2str[taicpu(hp2).opcode]+') done',p);
  904. { we cannot eliminate the first move if
  905. the operations uses the same register for source and dest }
  906. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  907. begin
  908. asml.remove(p);
  909. p.Free;
  910. end;
  911. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  912. asml.remove(hp2);
  913. hp2.Free;
  914. p:=hp1;
  915. result:=true;
  916. end;
  917. ReleaseUsedRegs(TmpUsedRegs);
  918. end
  919. end;
  920. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  921. var
  922. TmpUsedRegs : TAllUsedRegs;
  923. hp1,hp2 : tai;
  924. begin
  925. result:=false;
  926. if MatchOpType(taicpu(p),top_reg,top_reg) then
  927. begin
  928. { vmova* reg1,reg1
  929. =>
  930. <nop> }
  931. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  932. begin
  933. GetNextInstruction(p,hp1);
  934. asml.Remove(p);
  935. p.Free;
  936. p:=hp1;
  937. result:=true;
  938. end
  939. else if GetNextInstruction(p,hp1) then
  940. begin
  941. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  942. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  943. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  944. begin
  945. { vmova* reg1,reg2
  946. vmova* reg2,reg3
  947. dealloc reg2
  948. =>
  949. vmova* reg1,reg3 }
  950. CopyUsedRegs(TmpUsedRegs);
  951. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  952. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  953. begin
  954. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  955. asml.Remove(hp1);
  956. hp1.Free;
  957. result:=true;
  958. end
  959. { special case:
  960. vmova* reg1,reg2
  961. vmova* reg2,reg1
  962. =>
  963. vmova* reg1,reg2 }
  964. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  965. begin
  966. asml.Remove(hp1);
  967. hp1.Free;
  968. result:=true;
  969. end
  970. end
  971. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  972. { we mix single and double opperations here because we assume that the compiler
  973. generates vmovapd only after double operations and vmovaps only after single operations }
  974. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  975. GetNextInstruction(hp1,hp2) and
  976. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  977. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  978. begin
  979. CopyUsedRegs(TmpUsedRegs);
  980. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  981. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  982. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  983. then
  984. begin
  985. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  986. asml.Remove(p);
  987. p.Free;
  988. asml.Remove(hp2);
  989. hp2.Free;
  990. p:=hp1;
  991. end;
  992. end;
  993. end;
  994. end;
  995. end;
  996. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  997. var
  998. TmpUsedRegs : TAllUsedRegs;
  999. hp1 : tai;
  1000. begin
  1001. result:=false;
  1002. { replace
  1003. V<Op>X %mreg1,%mreg2,%mreg3
  1004. VMovX %mreg3,%mreg4
  1005. dealloc %mreg3
  1006. by
  1007. V<Op>X %mreg1,%mreg2,%mreg4
  1008. ?
  1009. }
  1010. if GetNextInstruction(p,hp1) and
  1011. { we mix single and double operations here because we assume that the compiler
  1012. generates vmovapd only after double operations and vmovaps only after single operations }
  1013. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1014. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1015. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1016. begin
  1017. CopyUsedRegs(TmpUsedRegs);
  1018. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1019. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1020. ) then
  1021. begin
  1022. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1023. DebugMsg(SPeepholeOptimization + 'VOpVmov2VOp done',p);
  1024. asml.Remove(hp1);
  1025. hp1.Free;
  1026. result:=true;
  1027. end;
  1028. end;
  1029. end;
  1030. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1031. var
  1032. hp1, hp2: tai;
  1033. TmpUsedRegs : TAllUsedRegs;
  1034. GetNextInstruction_p: Boolean;
  1035. PreMessage, RegName1, RegName2, InputVal, MaskNum: string;
  1036. NewSize: topsize;
  1037. begin
  1038. Result:=false;
  1039. GetNextInstruction_p:=GetNextInstruction(p, hp1);
  1040. { remove mov reg1,reg1? }
  1041. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^)
  1042. {$ifdef x86_64}
  1043. { Exceptional case:
  1044. if for example, "mov %eax,%eax" is followed by a command that then
  1045. reads %rax, then mov actually has the effect of zeroing the upper
  1046. 32 bits of the register and hence is not a null operation. [Kit]
  1047. }
  1048. and not (
  1049. (taicpu(p).oper[0]^.typ = top_reg) and
  1050. (taicpu(hp1).typ = ait_instruction) and
  1051. (taicpu(hp1).opsize = S_Q) and
  1052. (taicpu(hp1).ops > 0) and
  1053. (
  1054. (
  1055. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1056. (getsupreg(taicpu(hp1).oper[0]^.reg) = getsupreg(taicpu(p).oper[0]^.reg))
  1057. )
  1058. or
  1059. (
  1060. (taicpu(hp1).opcode in [A_IMUL, A_IDIV]) and
  1061. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1062. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[0]^.reg))
  1063. )
  1064. )
  1065. )
  1066. {$endif x86_64}
  1067. then
  1068. begin
  1069. DebugMsg(SPeepholeOptimization + 'Mov2Nop done',p);
  1070. { take care of the register (de)allocs following p }
  1071. UpdateUsedRegs(tai(p.next));
  1072. asml.remove(p);
  1073. p.free;
  1074. p:=hp1;
  1075. Result:=true;
  1076. exit;
  1077. end;
  1078. if GetNextInstruction_p and
  1079. MatchInstruction(hp1,A_AND,[]) and
  1080. (taicpu(p).oper[1]^.typ = top_reg) and
  1081. MatchOpType(taicpu(hp1),top_const,top_reg) then
  1082. begin
  1083. if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1084. begin
  1085. case taicpu(p).opsize of
  1086. S_L:
  1087. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1088. begin
  1089. { Optimize out:
  1090. mov x, %reg
  1091. and ffffffffh, %reg
  1092. }
  1093. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p);
  1094. asml.remove(hp1);
  1095. hp1.free;
  1096. Result:=true;
  1097. exit;
  1098. end;
  1099. S_Q: { TODO: Confirm if this is even possible }
  1100. if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then
  1101. begin
  1102. { Optimize out:
  1103. mov x, %reg
  1104. and ffffffffffffffffh, %reg
  1105. }
  1106. DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p);
  1107. asml.remove(hp1);
  1108. hp1.free;
  1109. Result:=true;
  1110. exit;
  1111. end;
  1112. end;
  1113. end
  1114. else if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and
  1115. (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) }
  1116. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg))
  1117. then
  1118. begin
  1119. if taicpu(p).oper[0]^.typ = top_reg then
  1120. InputVal := '%' + std_regname(taicpu(p).oper[0]^.reg)
  1121. else
  1122. InputVal := 'x';
  1123. MaskNum := tostr(taicpu(hp1).oper[0]^.val);
  1124. case taicpu(p).opsize of
  1125. S_B:
  1126. if (taicpu(hp1).oper[0]^.val = $ff) then
  1127. begin
  1128. { Convert:
  1129. movb x, %regl movb x, %regl
  1130. andw ffh, %regw andl ffh, %regd
  1131. To:
  1132. movzbw x, %regd movzbl x, %regd
  1133. (Identical registers, just different sizes)
  1134. }
  1135. RegName1 := std_regname(taicpu(p).oper[1]^.reg); { 8-bit register name }
  1136. RegName2 := std_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name }
  1137. case taicpu(hp1).opsize of
  1138. S_W: NewSize := S_BW;
  1139. S_L: NewSize := S_BL;
  1140. {$ifdef x86_64}
  1141. S_Q: NewSize := S_BQ;
  1142. {$endif x86_64}
  1143. else
  1144. InternalError(2018011510);
  1145. end;
  1146. end
  1147. else
  1148. NewSize := S_NO;
  1149. S_W:
  1150. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1151. begin
  1152. { Convert:
  1153. movw x, %regw
  1154. andl ffffh, %regd
  1155. To:
  1156. movzwl x, %regd
  1157. (Identical registers, just different sizes)
  1158. }
  1159. RegName1 := std_regname(taicpu(p).oper[1]^.reg); { 16-bit register name }
  1160. RegName2 := std_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name }
  1161. case taicpu(hp1).opsize of
  1162. S_L: NewSize := S_WL;
  1163. {$ifdef x86_64}
  1164. S_Q: NewSize := S_WQ;
  1165. {$endif x86_64}
  1166. else
  1167. InternalError(2018011511);
  1168. end;
  1169. end
  1170. else
  1171. NewSize := S_NO;
  1172. else
  1173. NewSize := S_NO;
  1174. end;
  1175. if NewSize <> S_NO then
  1176. begin
  1177. PreMessage := 'mov' + gas_opsize2str[taicpu(p).opsize] + ' ' + InputVal + ',%' + RegName1;
  1178. { The actual optimization }
  1179. taicpu(p).opcode := A_MOVZX;
  1180. taicpu(p).changeopsize(NewSize);
  1181. taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^;
  1182. { Safeguard if "and" is followed by a conditional command }
  1183. CopyUsedRegs(TmpUsedRegs);
  1184. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1185. if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, tai(hp1.next), TmpUsedRegs)) then
  1186. begin
  1187. { At this point, the "and" command is effectively equivalent to
  1188. "test %reg,%reg". This will be handled separately by the
  1189. Peephole Optimizer. [Kit] }
  1190. DebugMsg(SPeepholeOptimization + PreMessage +
  1191. ' -> movz' + gas_opsize2str[NewSize] + ' ' + InputVal + ',%' + RegName2, p);
  1192. end
  1193. else
  1194. begin
  1195. DebugMsg(SPeepholeOptimization + PreMessage + '; and' + gas_opsize2str[taicpu(hp1).opsize] + ' $' + MaskNum + ',%' + RegName2 +
  1196. ' -> movz' + gas_opsize2str[NewSize] + ' ' + InputVal + ',%' + RegName2, p);
  1197. asml.Remove(hp1);
  1198. hp1.Free;
  1199. end;
  1200. Result := True;
  1201. ReleaseUsedRegs(TmpUsedRegs);
  1202. Exit;
  1203. end;
  1204. end;
  1205. end
  1206. else if GetNextInstruction_p and
  1207. MatchInstruction(hp1,A_MOV,[]) and
  1208. (taicpu(p).oper[1]^.typ = top_reg) and
  1209. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1210. begin
  1211. CopyUsedRegs(TmpUsedRegs);
  1212. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1213. { we have
  1214. mov x, %treg
  1215. mov %treg, y
  1216. }
  1217. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1218. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1219. { we've got
  1220. mov x, %treg
  1221. mov %treg, y
  1222. with %treg is not used after }
  1223. case taicpu(p).oper[0]^.typ Of
  1224. top_reg:
  1225. begin
  1226. { change
  1227. mov %reg, %treg
  1228. mov %treg, y
  1229. to
  1230. mov %reg, y
  1231. }
  1232. if taicpu(hp1).oper[1]^.typ=top_reg then
  1233. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1234. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1235. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 2 done',p);
  1236. asml.remove(hp1);
  1237. hp1.free;
  1238. ReleaseUsedRegs(TmpUsedRegs);
  1239. Result:=true;
  1240. Exit;
  1241. end;
  1242. top_const:
  1243. begin
  1244. { change
  1245. mov const, %treg
  1246. mov %treg, y
  1247. to
  1248. mov const, y
  1249. }
  1250. if (taicpu(hp1).oper[1]^.typ=top_reg) or
  1251. ((taicpu(p).oper[0]^.val>=low(longint)) and (taicpu(p).oper[0]^.val<=high(longint))) then
  1252. begin
  1253. if taicpu(hp1).oper[1]^.typ=top_reg then
  1254. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1255. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1256. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 5 done',p);
  1257. asml.remove(hp1);
  1258. hp1.free;
  1259. ReleaseUsedRegs(TmpUsedRegs);
  1260. Result:=true;
  1261. Exit;
  1262. end;
  1263. end;
  1264. top_ref:
  1265. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1266. begin
  1267. { change
  1268. mov mem, %treg
  1269. mov %treg, %reg
  1270. to
  1271. mov mem, %reg"
  1272. }
  1273. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1274. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 3 done',p);
  1275. asml.remove(hp1);
  1276. hp1.free;
  1277. ReleaseUsedRegs(TmpUsedRegs);
  1278. Result:=true;
  1279. Exit;
  1280. end;
  1281. end;
  1282. ReleaseUsedRegs(TmpUsedRegs);
  1283. end
  1284. else
  1285. { Change
  1286. mov %reg1, %reg2
  1287. xxx %reg2, ???
  1288. to
  1289. mov %reg1, %reg2
  1290. xxx %reg1, ???
  1291. to avoid a write/read penalty
  1292. }
  1293. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1294. GetNextInstruction(p,hp1) and
  1295. (tai(hp1).typ = ait_instruction) and
  1296. (taicpu(hp1).ops >= 1) and
  1297. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1298. { we have
  1299. mov %reg1, %reg2
  1300. XXX %reg2, ???
  1301. }
  1302. begin
  1303. if ((taicpu(hp1).opcode = A_OR) or
  1304. (taicpu(hp1).opcode = A_AND) or
  1305. (taicpu(hp1).opcode = A_TEST)) and
  1306. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1307. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1308. { we have
  1309. mov %reg1, %reg2
  1310. test/or/and %reg2, %reg2
  1311. }
  1312. begin
  1313. CopyUsedRegs(TmpUsedRegs);
  1314. { reg1 will be used after the first instruction,
  1315. so update the allocation info }
  1316. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1317. if GetNextInstruction(hp1, hp2) and
  1318. (hp2.typ = ait_instruction) and
  1319. taicpu(hp2).is_jmp and
  1320. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1321. { change
  1322. mov %reg1, %reg2
  1323. test/or/and %reg2, %reg2
  1324. jxx
  1325. to
  1326. test %reg1, %reg1
  1327. jxx
  1328. }
  1329. begin
  1330. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1331. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1332. DebugMsg(SPeepholeOptimization + 'MovTestJxx2TestMov done',p);
  1333. asml.remove(p);
  1334. p.free;
  1335. p := hp1;
  1336. ReleaseUsedRegs(TmpUsedRegs);
  1337. Exit;
  1338. end
  1339. else
  1340. { change
  1341. mov %reg1, %reg2
  1342. test/or/and %reg2, %reg2
  1343. to
  1344. mov %reg1, %reg2
  1345. test/or/and %reg1, %reg1
  1346. }
  1347. begin
  1348. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1349. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1350. DebugMsg(SPeepholeOptimization + 'MovTestJxx2MovTestJxx done',p);
  1351. end;
  1352. ReleaseUsedRegs(TmpUsedRegs);
  1353. end
  1354. end
  1355. else
  1356. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1357. x >= RetOffset) as it doesn't do anything (it writes either to a
  1358. parameter or to the temporary storage room for the function
  1359. result)
  1360. }
  1361. if GetNextInstruction_p and
  1362. (tai(hp1).typ = ait_instruction) then
  1363. begin
  1364. if IsExitCode(hp1) and
  1365. MatchOpType(taicpu(p),top_reg,top_ref) and
  1366. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1367. not(assigned(current_procinfo.procdef.funcretsym) and
  1368. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1369. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1370. begin
  1371. asml.remove(p);
  1372. p.free;
  1373. p:=hp1;
  1374. DebugMsg(SPeepholeOptimization + 'removed deadstore before leave/ret',p);
  1375. RemoveLastDeallocForFuncRes(p);
  1376. exit;
  1377. end
  1378. { change
  1379. mov reg1, mem1
  1380. test/cmp x, mem1
  1381. to
  1382. mov reg1, mem1
  1383. test/cmp x, reg1
  1384. }
  1385. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1386. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1387. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1388. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1389. begin
  1390. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1391. DebugMsg(SPeepholeOptimization + 'MovTestCmp2MovTestCmp 1',hp1);
  1392. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1393. end;
  1394. end;
  1395. { Next instruction is also a MOV ? }
  1396. if GetNextInstruction_p and
  1397. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1398. begin
  1399. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1400. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1401. { mov reg1, mem1 or mov mem1, reg1
  1402. mov mem2, reg2 mov reg2, mem2}
  1403. begin
  1404. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1405. { mov reg1, mem1 or mov mem1, reg1
  1406. mov mem2, reg1 mov reg2, mem1}
  1407. begin
  1408. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1409. { Removes the second statement from
  1410. mov reg1, mem1/reg2
  1411. mov mem1/reg2, reg1 }
  1412. begin
  1413. if taicpu(p).oper[0]^.typ=top_reg then
  1414. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1415. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 1',p);
  1416. asml.remove(hp1);
  1417. hp1.free;
  1418. Result:=true;
  1419. exit;
  1420. end
  1421. else
  1422. begin
  1423. CopyUsedRegs(TmpUsedRegs);
  1424. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1425. if (taicpu(p).oper[1]^.typ = top_ref) and
  1426. { mov reg1, mem1
  1427. mov mem2, reg1 }
  1428. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1429. GetNextInstruction(hp1, hp2) and
  1430. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1431. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1432. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1433. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1434. { change to
  1435. mov reg1, mem1 mov reg1, mem1
  1436. mov mem2, reg1 cmp reg1, mem2
  1437. cmp mem1, reg1
  1438. }
  1439. begin
  1440. asml.remove(hp2);
  1441. hp2.free;
  1442. taicpu(hp1).opcode := A_CMP;
  1443. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1444. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1445. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1446. DebugMsg(SPeepholeOptimization + 'MovMovCmp2MovCmp done',hp1);
  1447. end;
  1448. ReleaseUsedRegs(TmpUsedRegs);
  1449. end;
  1450. end
  1451. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1452. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1453. begin
  1454. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1455. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1456. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov1 done',p);
  1457. end
  1458. else
  1459. begin
  1460. CopyUsedRegs(TmpUsedRegs);
  1461. if GetNextInstruction(hp1, hp2) and
  1462. MatchOpType(taicpu(p),top_ref,top_reg) and
  1463. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1464. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1465. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1466. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1467. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1468. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1469. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1470. { mov mem1, %reg1
  1471. mov %reg1, mem2
  1472. mov mem2, reg2
  1473. to:
  1474. mov mem1, reg2
  1475. mov reg2, mem2}
  1476. begin
  1477. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1478. DebugMsg(SPeepholeOptimization + 'MovMovMov2MovMov 1 done',p);
  1479. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1480. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1481. asml.remove(hp2);
  1482. hp2.free;
  1483. end
  1484. {$ifdef i386}
  1485. { this is enabled for i386 only, as the rules to create the reg sets below
  1486. are too complicated for x86-64, so this makes this code too error prone
  1487. on x86-64
  1488. }
  1489. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1490. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1491. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1492. { mov mem1, reg1 mov mem1, reg1
  1493. mov reg1, mem2 mov reg1, mem2
  1494. mov mem2, reg2 mov mem2, reg1
  1495. to: to:
  1496. mov mem1, reg1 mov mem1, reg1
  1497. mov mem1, reg2 mov reg1, mem2
  1498. mov reg1, mem2
  1499. or (if mem1 depends on reg1
  1500. and/or if mem2 depends on reg2)
  1501. to:
  1502. mov mem1, reg1
  1503. mov reg1, mem2
  1504. mov reg1, reg2
  1505. }
  1506. begin
  1507. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1508. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1509. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1510. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1511. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1512. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1513. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1514. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1515. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1516. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1517. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1518. end
  1519. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1520. begin
  1521. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1522. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1523. end
  1524. else
  1525. begin
  1526. asml.remove(hp2);
  1527. hp2.free;
  1528. end
  1529. {$endif i386}
  1530. ;
  1531. ReleaseUsedRegs(TmpUsedRegs);
  1532. end;
  1533. end
  1534. (* { movl [mem1],reg1
  1535. movl [mem1],reg2
  1536. to
  1537. movl [mem1],reg1
  1538. movl reg1,reg2
  1539. }
  1540. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1541. (taicpu(p).oper[1]^.typ = top_reg) and
  1542. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1543. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1544. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1545. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1546. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1547. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1548. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1549. else*)
  1550. { movl const1,[mem1]
  1551. movl [mem1],reg1
  1552. to
  1553. movl const1,reg1
  1554. movl reg1,[mem1]
  1555. }
  1556. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1557. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1558. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1559. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1560. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1561. begin
  1562. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1563. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1564. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1565. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1566. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1567. DebugMsg(SPeepholeOptimization + 'MovMov2MovMov 1',p);
  1568. end
  1569. {
  1570. mov* x,reg1
  1571. mov* y,reg1
  1572. to
  1573. mov* y,reg1
  1574. }
  1575. else if (taicpu(p).oper[1]^.typ=top_reg) and
  1576. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  1577. not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[0]^)) then
  1578. begin
  1579. DebugMsg(SPeepholeOptimization + 'MovMov2Mov 4 done',p);
  1580. { take care of the register (de)allocs following p }
  1581. UpdateUsedRegs(tai(p.next));
  1582. asml.remove(p);
  1583. p.free;
  1584. p:=hp1;
  1585. Result:=true;
  1586. exit;
  1587. end;
  1588. end
  1589. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1590. GetNextInstruction_p and
  1591. (hp1.typ = ait_instruction) and
  1592. GetNextInstruction(hp1, hp2) and
  1593. MatchInstruction(hp2,A_MOV,[]) and
  1594. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1595. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1596. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1597. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1598. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and (taicpu(hp2).opsize=S_L) and
  1599. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1600. ) then
  1601. { change movsX/movzX reg/ref, reg2
  1602. add/sub/or/... reg3/$const, reg2
  1603. mov reg2 reg/ref
  1604. to add/sub/or/... reg3/$const, reg/ref }
  1605. begin
  1606. CopyUsedRegs(TmpUsedRegs);
  1607. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1608. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1609. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1610. begin
  1611. { by example:
  1612. movswl %si,%eax movswl %si,%eax p
  1613. decl %eax addl %edx,%eax hp1
  1614. movw %ax,%si movw %ax,%si hp2
  1615. ->
  1616. movswl %si,%eax movswl %si,%eax p
  1617. decw %eax addw %edx,%eax hp1
  1618. movw %ax,%si movw %ax,%si hp2
  1619. }
  1620. DebugMsg(SPeepholeOptimization + 'MovOpMov2Op ('+
  1621. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1622. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1623. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1624. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1625. {
  1626. ->
  1627. movswl %si,%eax movswl %si,%eax p
  1628. decw %si addw %dx,%si hp1
  1629. movw %ax,%si movw %ax,%si hp2
  1630. }
  1631. case taicpu(hp1).ops of
  1632. 1:
  1633. begin
  1634. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1635. if taicpu(hp1).oper[0]^.typ=top_reg then
  1636. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1637. end;
  1638. 2:
  1639. begin
  1640. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1641. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1642. (taicpu(hp1).opcode<>A_SHL) and
  1643. (taicpu(hp1).opcode<>A_SHR) and
  1644. (taicpu(hp1).opcode<>A_SAR) then
  1645. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1646. end;
  1647. else
  1648. internalerror(2008042701);
  1649. end;
  1650. {
  1651. ->
  1652. decw %si addw %dx,%si p
  1653. }
  1654. asml.remove(p);
  1655. asml.remove(hp2);
  1656. p.Free;
  1657. hp2.Free;
  1658. p := hp1;
  1659. end;
  1660. ReleaseUsedRegs(TmpUsedRegs);
  1661. end
  1662. else if GetNextInstruction_p and
  1663. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1664. GetNextInstruction(hp1, hp2) and
  1665. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1666. MatchOperand(Taicpu(p).oper[0]^,0) and
  1667. (Taicpu(p).oper[1]^.typ = top_reg) and
  1668. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1669. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1670. { mov reg1,0
  1671. bts reg1,operand1 --> mov reg1,operand2
  1672. or reg1,operand2 bts reg1,operand1}
  1673. begin
  1674. Taicpu(hp2).opcode:=A_MOV;
  1675. asml.remove(hp1);
  1676. insertllitem(hp2,hp2.next,hp1);
  1677. asml.remove(p);
  1678. p.free;
  1679. p:=hp1;
  1680. end
  1681. else if GetNextInstruction_p and
  1682. MatchInstruction(hp1,A_LEA,[S_L]) and
  1683. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1684. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1685. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1686. ) or
  1687. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1688. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1689. )
  1690. ) then
  1691. { mov reg1,ref
  1692. lea reg2,[reg1,reg2]
  1693. to
  1694. add reg2,ref}
  1695. begin
  1696. CopyUsedRegs(TmpUsedRegs);
  1697. { reg1 may not be used afterwards }
  1698. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1699. begin
  1700. Taicpu(hp1).opcode:=A_ADD;
  1701. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1702. DebugMsg(SPeepholeOptimization + 'MovLea2Add done',hp1);
  1703. asml.remove(p);
  1704. p.free;
  1705. p:=hp1;
  1706. end;
  1707. ReleaseUsedRegs(TmpUsedRegs);
  1708. end;
  1709. end;
  1710. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1711. var
  1712. hp1 : tai;
  1713. begin
  1714. Result:=false;
  1715. if taicpu(p).ops <> 2 then
  1716. exit;
  1717. if GetNextInstruction(p,hp1) and
  1718. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1719. (taicpu(hp1).ops = 2) then
  1720. begin
  1721. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1722. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1723. { movXX reg1, mem1 or movXX mem1, reg1
  1724. movXX mem2, reg2 movXX reg2, mem2}
  1725. begin
  1726. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1727. { movXX reg1, mem1 or movXX mem1, reg1
  1728. movXX mem2, reg1 movXX reg2, mem1}
  1729. begin
  1730. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1731. begin
  1732. { Removes the second statement from
  1733. movXX reg1, mem1/reg2
  1734. movXX mem1/reg2, reg1
  1735. }
  1736. if taicpu(p).oper[0]^.typ=top_reg then
  1737. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1738. { Removes the second statement from
  1739. movXX mem1/reg1, reg2
  1740. movXX reg2, mem1/reg1
  1741. }
  1742. if (taicpu(p).oper[1]^.typ=top_reg) and
  1743. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1744. begin
  1745. asml.remove(p);
  1746. p.free;
  1747. GetNextInstruction(hp1,p);
  1748. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2Nop 1 done',p);
  1749. end
  1750. else
  1751. DebugMsg(SPeepholeOptimization + 'MovXXMovXX2MoVXX 1 done',p);
  1752. asml.remove(hp1);
  1753. hp1.free;
  1754. Result:=true;
  1755. exit;
  1756. end
  1757. end;
  1758. end;
  1759. end;
  1760. end;
  1761. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1762. var
  1763. TmpUsedRegs : TAllUsedRegs;
  1764. hp1 : tai;
  1765. begin
  1766. result:=false;
  1767. { replace
  1768. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1769. MovX %mreg2,%mreg1
  1770. dealloc %mreg2
  1771. by
  1772. <Op>X %mreg2,%mreg1
  1773. ?
  1774. }
  1775. if GetNextInstruction(p,hp1) and
  1776. { we mix single and double opperations here because we assume that the compiler
  1777. generates vmovapd only after double operations and vmovaps only after single operations }
  1778. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1779. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1780. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1781. (taicpu(p).oper[0]^.typ=top_reg) then
  1782. begin
  1783. CopyUsedRegs(TmpUsedRegs);
  1784. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1785. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1786. begin
  1787. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1788. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1789. DebugMsg(SPeepholeOptimization + 'OpMov2Op done',p);
  1790. asml.Remove(hp1);
  1791. hp1.Free;
  1792. result:=true;
  1793. end;
  1794. ReleaseUsedRegs(TmpUsedRegs);
  1795. end;
  1796. end;
  1797. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1798. var
  1799. hp1 : tai;
  1800. l : ASizeInt;
  1801. TmpUsedRegs : TAllUsedRegs;
  1802. begin
  1803. Result:=false;
  1804. { removes seg register prefixes from LEA operations, as they
  1805. don't do anything}
  1806. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1807. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1808. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1809. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1810. { do not mess with leas acessing the stack pointer }
  1811. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1812. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1813. begin
  1814. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1815. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1816. begin
  1817. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1818. taicpu(p).oper[1]^.reg);
  1819. InsertLLItem(p.previous,p.next, hp1);
  1820. DebugMsg(SPeepholeOptimization + 'Lea2Mov done',hp1);
  1821. p.free;
  1822. p:=hp1;
  1823. Result:=true;
  1824. exit;
  1825. end
  1826. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1827. begin
  1828. hp1:=taicpu(p.Next);
  1829. DebugMsg(SPeepholeOptimization + 'Lea2Nop done',p);
  1830. asml.remove(p);
  1831. p.free;
  1832. p:=hp1;
  1833. Result:=true;
  1834. exit;
  1835. end
  1836. { continue to use lea to adjust the stack pointer,
  1837. it is the recommended way, but only if not optimizing for size }
  1838. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1839. (cs_opt_size in current_settings.optimizerswitches) then
  1840. with taicpu(p).oper[0]^.ref^ do
  1841. if (base = taicpu(p).oper[1]^.reg) then
  1842. begin
  1843. l:=offset;
  1844. if (l=1) and UseIncDec then
  1845. begin
  1846. taicpu(p).opcode:=A_INC;
  1847. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1848. taicpu(p).ops:=1;
  1849. DebugMsg(SPeepholeOptimization + 'Lea2Inc done',p);
  1850. end
  1851. else if (l=-1) and UseIncDec then
  1852. begin
  1853. taicpu(p).opcode:=A_DEC;
  1854. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1855. taicpu(p).ops:=1;
  1856. DebugMsg(SPeepholeOptimization + 'Lea2Dec done',p);
  1857. end
  1858. else
  1859. begin
  1860. if (l<0) and (l<>-2147483648) then
  1861. begin
  1862. taicpu(p).opcode:=A_SUB;
  1863. taicpu(p).loadConst(0,-l);
  1864. DebugMsg(SPeepholeOptimization + 'Lea2Sub done',p);
  1865. end
  1866. else
  1867. begin
  1868. taicpu(p).opcode:=A_ADD;
  1869. taicpu(p).loadConst(0,l);
  1870. DebugMsg(SPeepholeOptimization + 'Lea2Add done',p);
  1871. end;
  1872. end;
  1873. Result:=true;
  1874. exit;
  1875. end;
  1876. end;
  1877. if GetNextInstruction(p,hp1) and
  1878. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1879. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1880. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1881. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1882. begin
  1883. CopyUsedRegs(TmpUsedRegs);
  1884. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1885. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1886. begin
  1887. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1888. DebugMsg(SPeepholeOptimization + 'LeaMov2Lea done',p);
  1889. asml.Remove(hp1);
  1890. hp1.Free;
  1891. result:=true;
  1892. end;
  1893. ReleaseUsedRegs(TmpUsedRegs);
  1894. end;
  1895. (*
  1896. This is unsafe, lea doesn't modify the flags but "add"
  1897. does. This breaks webtbs/tw15694.pp. The above
  1898. transformations are also unsafe, but they don't seem to
  1899. be triggered by code that FPC generators (or that at
  1900. least does not occur in the tests...). This needs to be
  1901. fixed by checking for the liveness of the flags register.
  1902. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1903. begin
  1904. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1905. taicpu(p).oper[0]^.ref^.base);
  1906. InsertLLItem(asml,p.previous,p.next, hp1);
  1907. DebugMsg(SPeepholeOptimization + 'Lea2AddBase done',hp1);
  1908. p.free;
  1909. p:=hp1;
  1910. continue;
  1911. end
  1912. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1913. begin
  1914. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1915. taicpu(p).oper[0]^.ref^.index);
  1916. InsertLLItem(asml,p.previous,p.next,hp1);
  1917. DebugMsg(SPeepholeOptimization + 'Lea2AddIndex done',hp1);
  1918. p.free;
  1919. p:=hp1;
  1920. continue;
  1921. end
  1922. *)
  1923. end;
  1924. function TX86AsmOptimizer.DoSubAddOpt(var p: tai): Boolean;
  1925. var
  1926. hp1 : tai;
  1927. begin
  1928. DoSubAddOpt := False;
  1929. if GetLastInstruction(p, hp1) and
  1930. (hp1.typ = ait_instruction) and
  1931. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1932. case taicpu(hp1).opcode Of
  1933. A_DEC:
  1934. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  1935. MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1936. begin
  1937. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  1938. asml.remove(hp1);
  1939. hp1.free;
  1940. end;
  1941. A_SUB:
  1942. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1943. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1944. begin
  1945. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  1946. asml.remove(hp1);
  1947. hp1.free;
  1948. end;
  1949. A_ADD:
  1950. if MatchOpType(taicpu(hp1),top_const,top_reg) and
  1951. MatchOperand(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) then
  1952. begin
  1953. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  1954. asml.remove(hp1);
  1955. hp1.free;
  1956. if (taicpu(p).oper[0]^.val = 0) then
  1957. begin
  1958. hp1 := tai(p.next);
  1959. asml.remove(p);
  1960. p.free;
  1961. if not GetLastInstruction(hp1, p) then
  1962. p := hp1;
  1963. DoSubAddOpt := True;
  1964. end
  1965. end;
  1966. end;
  1967. end;
  1968. function TX86AsmOptimizer.OptPass1Sub(var p : tai) : boolean;
  1969. var
  1970. hp1 : tai;
  1971. begin
  1972. Result:=false;
  1973. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1974. { * change "sub/add const1, reg" or "dec reg" followed by
  1975. "sub const2, reg" to one "sub ..., reg" }
  1976. if MatchOpType(taicpu(p),top_const,top_reg) then
  1977. begin
  1978. {$ifdef i386}
  1979. if (taicpu(p).oper[0]^.val = 2) and
  1980. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1981. { Don't do the sub/push optimization if the sub }
  1982. { comes from setting up the stack frame (JM) }
  1983. (not(GetLastInstruction(p,hp1)) or
  1984. not(MatchInstruction(hp1,A_MOV,[S_L]) and
  1985. MatchOperand(taicpu(hp1).oper[0]^,NR_ESP) and
  1986. MatchOperand(taicpu(hp1).oper[0]^,NR_EBP))) then
  1987. begin
  1988. hp1 := tai(p.next);
  1989. while Assigned(hp1) and
  1990. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1991. not RegReadByInstruction(NR_ESP,hp1) and
  1992. not RegModifiedByInstruction(NR_ESP,hp1) do
  1993. hp1 := tai(hp1.next);
  1994. if Assigned(hp1) and
  1995. MatchInstruction(hp1,A_PUSH,[S_W]) then
  1996. begin
  1997. taicpu(hp1).changeopsize(S_L);
  1998. if taicpu(hp1).oper[0]^.typ=top_reg then
  1999. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  2000. hp1 := tai(p.next);
  2001. asml.remove(p);
  2002. p.free;
  2003. p := hp1;
  2004. Result:=true;
  2005. exit;
  2006. end;
  2007. end;
  2008. {$endif i386}
  2009. if DoSubAddOpt(p) then
  2010. Result:=true;
  2011. end;
  2012. end;
  2013. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  2014. var
  2015. TmpUsedRegs : TAllUsedRegs;
  2016. hp1,hp2: tai;
  2017. begin
  2018. Result:=false;
  2019. if MatchOpType(taicpu(p),top_reg,top_reg) and
  2020. GetNextInstruction(p, hp1) and
  2021. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  2022. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  2023. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  2024. or
  2025. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  2026. ) and
  2027. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  2028. { mov reg1, reg2
  2029. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  2030. begin
  2031. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  2032. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  2033. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  2034. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  2035. DebugMsg(SPeepholeOptimization + 'MovMovXX2MoVXX 1 done',p);
  2036. asml.remove(p);
  2037. p.free;
  2038. p := hp1;
  2039. Result:=true;
  2040. exit;
  2041. end
  2042. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2043. GetNextInstruction(p,hp1) and
  2044. (hp1.typ = ait_instruction) and
  2045. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  2046. doing it separately in both branches allows to do the cheap checks
  2047. with low probability earlier }
  2048. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2049. GetNextInstruction(hp1,hp2) and
  2050. MatchInstruction(hp2,A_MOV,[])
  2051. ) or
  2052. ((taicpu(hp1).opcode=A_LEA) and
  2053. GetNextInstruction(hp1,hp2) and
  2054. MatchInstruction(hp2,A_MOV,[]) and
  2055. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  2056. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  2057. ) or
  2058. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  2059. taicpu(p).oper[1]^.reg) and
  2060. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  2061. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  2062. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  2063. ) and
  2064. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  2065. )
  2066. ) and
  2067. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  2068. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2069. begin
  2070. CopyUsedRegs(TmpUsedRegs);
  2071. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2072. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2073. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  2074. { change mov (ref), reg
  2075. add/sub/or/... reg2/$const, reg
  2076. mov reg, (ref)
  2077. # release reg
  2078. to add/sub/or/... reg2/$const, (ref) }
  2079. begin
  2080. case taicpu(hp1).opcode of
  2081. A_INC,A_DEC,A_NOT,A_NEG :
  2082. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  2083. A_LEA :
  2084. begin
  2085. taicpu(hp1).opcode:=A_ADD;
  2086. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  2087. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  2088. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  2089. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  2090. else
  2091. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  2092. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2093. DebugMsg(SPeepholeOptimization + 'FoldLea done',hp1);
  2094. end
  2095. else
  2096. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2097. end;
  2098. asml.remove(p);
  2099. asml.remove(hp2);
  2100. p.free;
  2101. hp2.free;
  2102. p := hp1
  2103. end;
  2104. ReleaseUsedRegs(TmpUsedRegs);
  2105. end;
  2106. end;
  2107. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  2108. var
  2109. TmpUsedRegs : TAllUsedRegs;
  2110. hp1 : tai;
  2111. begin
  2112. Result:=false;
  2113. if (taicpu(p).ops >= 2) and
  2114. ((taicpu(p).oper[0]^.typ = top_const) or
  2115. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  2116. (taicpu(p).oper[1]^.typ = top_reg) and
  2117. ((taicpu(p).ops = 2) or
  2118. ((taicpu(p).oper[2]^.typ = top_reg) and
  2119. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  2120. GetLastInstruction(p,hp1) and
  2121. MatchInstruction(hp1,A_MOV,[]) and
  2122. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  2123. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  2124. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  2125. begin
  2126. CopyUsedRegs(TmpUsedRegs);
  2127. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  2128. { change
  2129. mov reg1,reg2
  2130. imul y,reg2 to imul y,reg1,reg2 }
  2131. begin
  2132. taicpu(p).ops := 3;
  2133. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  2134. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  2135. DebugMsg(SPeepholeOptimization + 'MovImul2Imul done',p);
  2136. asml.remove(hp1);
  2137. hp1.free;
  2138. result:=true;
  2139. end;
  2140. ReleaseUsedRegs(TmpUsedRegs);
  2141. end;
  2142. end;
  2143. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  2144. var
  2145. hp1 : tai;
  2146. begin
  2147. {
  2148. change
  2149. jmp .L1
  2150. ...
  2151. .L1:
  2152. ret
  2153. into
  2154. ret
  2155. }
  2156. result:=false;
  2157. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  2158. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  2159. begin
  2160. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  2161. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  2162. MatchInstruction(hp1,A_RET,[S_NO]) then
  2163. begin
  2164. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  2165. taicpu(p).opcode:=A_RET;
  2166. taicpu(p).is_jmp:=false;
  2167. taicpu(p).ops:=taicpu(hp1).ops;
  2168. case taicpu(hp1).ops of
  2169. 0:
  2170. taicpu(p).clearop(0);
  2171. 1:
  2172. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  2173. else
  2174. internalerror(2016041301);
  2175. end;
  2176. result:=true;
  2177. end;
  2178. end;
  2179. end;
  2180. function CanBeCMOV(p : tai) : boolean;
  2181. begin
  2182. CanBeCMOV:=assigned(p) and
  2183. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  2184. { we can't use cmov ref,reg because
  2185. ref could be nil and cmov still throws an exception
  2186. if ref=nil but the mov isn't done (FK)
  2187. or ((taicpu(p).oper[0]^.typ = top_ref) and
  2188. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  2189. }
  2190. MatchOpType(taicpu(p),top_reg,top_reg);
  2191. end;
  2192. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  2193. var
  2194. hp1,hp2,hp3: tai;
  2195. carryadd_opcode : TAsmOp;
  2196. l : Longint;
  2197. condition : TAsmCond;
  2198. begin
  2199. { jb @@1 cmc
  2200. inc/dec operand --> adc/sbb operand,0
  2201. @@1:
  2202. ... and ...
  2203. jnb @@1
  2204. inc/dec operand --> adc/sbb operand,0
  2205. @@1: }
  2206. result:=false;
  2207. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  2208. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  2209. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  2210. begin
  2211. carryadd_opcode:=A_NONE;
  2212. if Taicpu(p).condition in [C_NAE,C_B] then
  2213. begin
  2214. if Taicpu(hp1).opcode=A_INC then
  2215. carryadd_opcode:=A_ADC;
  2216. if Taicpu(hp1).opcode=A_DEC then
  2217. carryadd_opcode:=A_SBB;
  2218. if carryadd_opcode<>A_NONE then
  2219. begin
  2220. Taicpu(p).clearop(0);
  2221. Taicpu(p).ops:=0;
  2222. Taicpu(p).is_jmp:=false;
  2223. Taicpu(p).opcode:=A_CMC;
  2224. Taicpu(p).condition:=C_NONE;
  2225. Taicpu(hp1).ops:=2;
  2226. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2227. Taicpu(hp1).loadconst(0,0);
  2228. Taicpu(hp1).opcode:=carryadd_opcode;
  2229. result:=true;
  2230. exit;
  2231. end;
  2232. end;
  2233. if Taicpu(p).condition in [C_AE,C_NB] then
  2234. begin
  2235. if Taicpu(hp1).opcode=A_INC then
  2236. carryadd_opcode:=A_ADC;
  2237. if Taicpu(hp1).opcode=A_DEC then
  2238. carryadd_opcode:=A_SBB;
  2239. if carryadd_opcode<>A_NONE then
  2240. begin
  2241. asml.remove(p);
  2242. p.free;
  2243. Taicpu(hp1).ops:=2;
  2244. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  2245. Taicpu(hp1).loadconst(0,0);
  2246. Taicpu(hp1).opcode:=carryadd_opcode;
  2247. p:=hp1;
  2248. result:=true;
  2249. exit;
  2250. end;
  2251. end;
  2252. end;
  2253. {$ifndef i8086}
  2254. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  2255. begin
  2256. { check for
  2257. jCC xxx
  2258. <several movs>
  2259. xxx:
  2260. }
  2261. l:=0;
  2262. GetNextInstruction(p, hp1);
  2263. while assigned(hp1) and
  2264. CanBeCMOV(hp1) and
  2265. { stop on labels }
  2266. not(hp1.typ=ait_label) do
  2267. begin
  2268. inc(l);
  2269. GetNextInstruction(hp1,hp1);
  2270. end;
  2271. if assigned(hp1) then
  2272. begin
  2273. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2274. begin
  2275. if (l<=4) and (l>0) then
  2276. begin
  2277. condition:=inverse_cond(taicpu(p).condition);
  2278. hp2:=p;
  2279. GetNextInstruction(p,hp1);
  2280. p:=hp1;
  2281. repeat
  2282. taicpu(hp1).opcode:=A_CMOVcc;
  2283. taicpu(hp1).condition:=condition;
  2284. GetNextInstruction(hp1,hp1);
  2285. until not(assigned(hp1)) or
  2286. not(CanBeCMOV(hp1));
  2287. { wait with removing else GetNextInstruction could
  2288. ignore the label if it was the only usage in the
  2289. jump moved away }
  2290. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2291. { if the label refs. reach zero, remove any alignment before the label }
  2292. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2293. begin
  2294. asml.Remove(hp1);
  2295. hp1.Free;
  2296. end;
  2297. asml.remove(hp2);
  2298. hp2.free;
  2299. result:=true;
  2300. exit;
  2301. end;
  2302. end
  2303. else
  2304. begin
  2305. { check further for
  2306. jCC xxx
  2307. <several movs 1>
  2308. jmp yyy
  2309. xxx:
  2310. <several movs 2>
  2311. yyy:
  2312. }
  2313. { hp2 points to jmp yyy }
  2314. hp2:=hp1;
  2315. { skip hp1 to xxx }
  2316. GetNextInstruction(hp1, hp1);
  2317. if assigned(hp2) and
  2318. assigned(hp1) and
  2319. (l<=3) and
  2320. (hp2.typ=ait_instruction) and
  2321. (taicpu(hp2).is_jmp) and
  2322. (taicpu(hp2).condition=C_None) and
  2323. { real label and jump, no further references to the
  2324. label are allowed }
  2325. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2326. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2327. begin
  2328. l:=0;
  2329. { skip hp1 to <several moves 2> }
  2330. GetNextInstruction(hp1, hp1);
  2331. while assigned(hp1) and
  2332. CanBeCMOV(hp1) do
  2333. begin
  2334. inc(l);
  2335. GetNextInstruction(hp1, hp1);
  2336. end;
  2337. { hp1 points to yyy: }
  2338. if assigned(hp1) and
  2339. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2340. begin
  2341. condition:=inverse_cond(taicpu(p).condition);
  2342. GetNextInstruction(p,hp1);
  2343. hp3:=p;
  2344. p:=hp1;
  2345. repeat
  2346. taicpu(hp1).opcode:=A_CMOVcc;
  2347. taicpu(hp1).condition:=condition;
  2348. GetNextInstruction(hp1,hp1);
  2349. until not(assigned(hp1)) or
  2350. not(CanBeCMOV(hp1));
  2351. { hp2 is still at jmp yyy }
  2352. GetNextInstruction(hp2,hp1);
  2353. { hp2 is now at xxx: }
  2354. condition:=inverse_cond(condition);
  2355. GetNextInstruction(hp1,hp1);
  2356. { hp1 is now at <several movs 2> }
  2357. repeat
  2358. taicpu(hp1).opcode:=A_CMOVcc;
  2359. taicpu(hp1).condition:=condition;
  2360. GetNextInstruction(hp1,hp1);
  2361. until not(assigned(hp1)) or
  2362. not(CanBeCMOV(hp1));
  2363. {
  2364. asml.remove(hp1.next)
  2365. hp1.next.free;
  2366. asml.remove(hp1);
  2367. hp1.free;
  2368. }
  2369. { remove jCC }
  2370. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2371. asml.remove(hp3);
  2372. hp3.free;
  2373. { remove jmp }
  2374. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2375. asml.remove(hp2);
  2376. hp2.free;
  2377. result:=true;
  2378. exit;
  2379. end;
  2380. end;
  2381. end;
  2382. end;
  2383. end;
  2384. {$endif i8086}
  2385. end;
  2386. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2387. var
  2388. hp1,hp2: tai;
  2389. begin
  2390. result:=false;
  2391. if (taicpu(p).oper[1]^.typ = top_reg) and
  2392. GetNextInstruction(p,hp1) and
  2393. (hp1.typ = ait_instruction) and
  2394. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2395. GetNextInstruction(hp1,hp2) and
  2396. MatchInstruction(hp2,A_MOV,[]) and
  2397. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2398. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2399. {$ifdef i386}
  2400. { not all registers have byte size sub registers on i386 }
  2401. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2402. {$endif i386}
  2403. (((taicpu(hp1).ops=2) and
  2404. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2405. ((taicpu(hp1).ops=1) and
  2406. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2407. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2408. begin
  2409. { change movsX/movzX reg/ref, reg2
  2410. add/sub/or/... reg3/$const, reg2
  2411. mov reg2 reg/ref
  2412. to add/sub/or/... reg3/$const, reg/ref }
  2413. { by example:
  2414. movswl %si,%eax movswl %si,%eax p
  2415. decl %eax addl %edx,%eax hp1
  2416. movw %ax,%si movw %ax,%si hp2
  2417. ->
  2418. movswl %si,%eax movswl %si,%eax p
  2419. decw %eax addw %edx,%eax hp1
  2420. movw %ax,%si movw %ax,%si hp2
  2421. }
  2422. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2423. {
  2424. ->
  2425. movswl %si,%eax movswl %si,%eax p
  2426. decw %si addw %dx,%si hp1
  2427. movw %ax,%si movw %ax,%si hp2
  2428. }
  2429. case taicpu(hp1).ops of
  2430. 1:
  2431. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2432. 2:
  2433. begin
  2434. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2435. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2436. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2437. end;
  2438. else
  2439. internalerror(2008042701);
  2440. end;
  2441. {
  2442. ->
  2443. decw %si addw %dx,%si p
  2444. }
  2445. DebugMsg(SPeepholeOptimization + 'var3',p);
  2446. asml.remove(p);
  2447. asml.remove(hp2);
  2448. p.free;
  2449. hp2.free;
  2450. p:=hp1;
  2451. end
  2452. else if taicpu(p).opcode=A_MOVZX then
  2453. begin
  2454. { removes superfluous And's after movzx's }
  2455. if (taicpu(p).oper[1]^.typ = top_reg) and
  2456. GetNextInstruction(p, hp1) and
  2457. (tai(hp1).typ = ait_instruction) and
  2458. (taicpu(hp1).opcode = A_AND) and
  2459. (taicpu(hp1).oper[0]^.typ = top_const) and
  2460. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2461. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2462. begin
  2463. case taicpu(p).opsize Of
  2464. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2465. if (taicpu(hp1).oper[0]^.val = $ff) then
  2466. begin
  2467. DebugMsg(SPeepholeOptimization + 'var4',p);
  2468. asml.remove(hp1);
  2469. hp1.free;
  2470. end;
  2471. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2472. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2473. begin
  2474. DebugMsg(SPeepholeOptimization + 'var5',p);
  2475. asml.remove(hp1);
  2476. hp1.free;
  2477. end;
  2478. {$ifdef x86_64}
  2479. S_LQ:
  2480. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2481. begin
  2482. if (cs_asm_source in current_settings.globalswitches) then
  2483. asml.insertbefore(tai_comment.create(strpnew(SPeepholeOptimization + 'var6')),p);
  2484. asml.remove(hp1);
  2485. hp1.Free;
  2486. end;
  2487. {$endif x86_64}
  2488. end;
  2489. end;
  2490. { changes some movzx constructs to faster synonims (all examples
  2491. are given with eax/ax, but are also valid for other registers)}
  2492. if (taicpu(p).oper[1]^.typ = top_reg) then
  2493. if (taicpu(p).oper[0]^.typ = top_reg) then
  2494. case taicpu(p).opsize of
  2495. S_BW:
  2496. begin
  2497. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2498. not(cs_opt_size in current_settings.optimizerswitches) then
  2499. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2500. begin
  2501. taicpu(p).opcode := A_AND;
  2502. taicpu(p).changeopsize(S_W);
  2503. taicpu(p).loadConst(0,$ff);
  2504. DebugMsg(SPeepholeOptimization + 'var7',p);
  2505. end
  2506. else if GetNextInstruction(p, hp1) and
  2507. (tai(hp1).typ = ait_instruction) and
  2508. (taicpu(hp1).opcode = A_AND) and
  2509. (taicpu(hp1).oper[0]^.typ = top_const) and
  2510. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2511. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2512. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2513. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2514. begin
  2515. DebugMsg(SPeepholeOptimization + 'var8',p);
  2516. taicpu(p).opcode := A_MOV;
  2517. taicpu(p).changeopsize(S_W);
  2518. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2519. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2520. end;
  2521. end;
  2522. S_BL:
  2523. begin
  2524. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2525. not(cs_opt_size in current_settings.optimizerswitches) then
  2526. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2527. begin
  2528. taicpu(p).opcode := A_AND;
  2529. taicpu(p).changeopsize(S_L);
  2530. taicpu(p).loadConst(0,$ff)
  2531. end
  2532. else if GetNextInstruction(p, hp1) and
  2533. (tai(hp1).typ = ait_instruction) and
  2534. (taicpu(hp1).opcode = A_AND) and
  2535. (taicpu(hp1).oper[0]^.typ = top_const) and
  2536. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2537. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2538. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2539. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2540. begin
  2541. DebugMsg(SPeepholeOptimization + 'var10',p);
  2542. taicpu(p).opcode := A_MOV;
  2543. taicpu(p).changeopsize(S_L);
  2544. { do not use R_SUBWHOLE
  2545. as movl %rdx,%eax
  2546. is invalid in assembler PM }
  2547. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2548. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2549. end
  2550. end;
  2551. {$ifndef i8086}
  2552. S_WL:
  2553. begin
  2554. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2555. not(cs_opt_size in current_settings.optimizerswitches) then
  2556. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2557. begin
  2558. DebugMsg(SPeepholeOptimization + 'var11',p);
  2559. taicpu(p).opcode := A_AND;
  2560. taicpu(p).changeopsize(S_L);
  2561. taicpu(p).loadConst(0,$ffff);
  2562. end
  2563. else if GetNextInstruction(p, hp1) and
  2564. (tai(hp1).typ = ait_instruction) and
  2565. (taicpu(hp1).opcode = A_AND) and
  2566. (taicpu(hp1).oper[0]^.typ = top_const) and
  2567. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2568. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2569. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2570. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2571. begin
  2572. DebugMsg(SPeepholeOptimization + 'var12',p);
  2573. taicpu(p).opcode := A_MOV;
  2574. taicpu(p).changeopsize(S_L);
  2575. { do not use R_SUBWHOLE
  2576. as movl %rdx,%eax
  2577. is invalid in assembler PM }
  2578. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2579. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2580. end;
  2581. end;
  2582. {$endif i8086}
  2583. end
  2584. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2585. begin
  2586. if GetNextInstruction(p, hp1) and
  2587. (tai(hp1).typ = ait_instruction) and
  2588. (taicpu(hp1).opcode = A_AND) and
  2589. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2590. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2591. begin
  2592. taicpu(p).opcode := A_MOV;
  2593. case taicpu(p).opsize Of
  2594. S_BL:
  2595. begin
  2596. DebugMsg(SPeepholeOptimization + 'var13',p);
  2597. taicpu(p).changeopsize(S_L);
  2598. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2599. end;
  2600. S_WL:
  2601. begin
  2602. DebugMsg(SPeepholeOptimization + 'var14',p);
  2603. taicpu(p).changeopsize(S_L);
  2604. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2605. end;
  2606. S_BW:
  2607. begin
  2608. DebugMsg(SPeepholeOptimization + 'var15',p);
  2609. taicpu(p).changeopsize(S_W);
  2610. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2611. end;
  2612. {$ifdef x86_64}
  2613. S_BQ:
  2614. begin
  2615. DebugMsg(SPeepholeOptimization + 'var16',p);
  2616. taicpu(p).changeopsize(S_Q);
  2617. taicpu(hp1).loadConst(
  2618. 0, taicpu(hp1).oper[0]^.val and $ff);
  2619. end;
  2620. S_WQ:
  2621. begin
  2622. DebugMsg(SPeepholeOptimization + 'var17',p);
  2623. taicpu(p).changeopsize(S_Q);
  2624. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2625. end;
  2626. S_LQ:
  2627. begin
  2628. DebugMsg(SPeepholeOptimization + 'var18',p);
  2629. taicpu(p).changeopsize(S_Q);
  2630. taicpu(hp1).loadConst(
  2631. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2632. end;
  2633. {$endif x86_64}
  2634. else
  2635. Internalerror(2017050704)
  2636. end;
  2637. end;
  2638. end;
  2639. end;
  2640. end;
  2641. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2642. var
  2643. hp1 : tai;
  2644. RegName1, RegName2: string;
  2645. MaskLength : Cardinal;
  2646. begin
  2647. Result:=false;
  2648. if not(GetNextInstruction(p, hp1)) then
  2649. exit;
  2650. if MatchOpType(taicpu(p),top_const,top_reg) and
  2651. MatchInstruction(hp1,A_AND,[]) and
  2652. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2653. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2654. { the second register must contain the first one, so compare their subreg types }
  2655. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2656. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2657. { change
  2658. and const1, reg
  2659. and const2, reg
  2660. to
  2661. and (const1 and const2), reg
  2662. }
  2663. begin
  2664. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2665. DebugMsg(SPeepholeOptimization + 'AndAnd2And done',hp1);
  2666. asml.remove(p);
  2667. p.Free;
  2668. p:=hp1;
  2669. Result:=true;
  2670. exit;
  2671. end
  2672. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2673. MatchInstruction(hp1,A_MOVZX,[]) and
  2674. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2675. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2676. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2677. (((taicpu(p).opsize=S_W) and
  2678. (taicpu(hp1).opsize=S_BW)) or
  2679. ((taicpu(p).opsize=S_L) and
  2680. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2681. {$ifdef x86_64}
  2682. or
  2683. ((taicpu(p).opsize=S_Q) and
  2684. (taicpu(hp1).opsize in [S_BQ,S_WQ]))
  2685. {$endif x86_64}
  2686. ) then
  2687. begin
  2688. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2689. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2690. ) or
  2691. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2692. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2693. then
  2694. begin
  2695. { Unlike MOVSX, MOVZX doesn't actually have a version that zero-extends a
  2696. 32-bit register to a 64-bit register, or even a version called MOVZXD, so
  2697. code that tests for the presence of AND 0xffffffff followed by MOVZX is
  2698. wasted, and is indictive of a compiler bug if it were triggered. [Kit]
  2699. NOTE: To zero-extend from 32 bits to 64 bits, simply use the standard MOV.
  2700. }
  2701. DebugMsg(SPeepholeOptimization + 'AndMovzToAnd done',p);
  2702. asml.remove(hp1);
  2703. hp1.free;
  2704. end;
  2705. end
  2706. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2707. MatchInstruction(hp1,A_SHL,[]) and
  2708. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2709. (getsupreg(taicpu(p).oper[1]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) then
  2710. begin
  2711. { get length of potential and mask }
  2712. MaskLength:=SizeOf(taicpu(p).oper[0]^.val)*8-BsrQWord(taicpu(p).oper[0]^.val)-1;
  2713. { really a mask? }
  2714. if (((QWord(1) shl MaskLength)-1)=taicpu(p).oper[0]^.val) and
  2715. { unmasked part shifted out? }
  2716. ((MaskLength+taicpu(hp1).oper[0]^.val)>=topsize2memsize[taicpu(hp1).opsize]) then
  2717. begin
  2718. DebugMsg(SPeepholeOptimization + 'AndShlToShl done',p);
  2719. { take care of the register (de)allocs following p }
  2720. UpdateUsedRegs(tai(p.next));
  2721. asml.remove(p);
  2722. p.free;
  2723. p:=hp1;
  2724. Result:=true;
  2725. exit;
  2726. end;
  2727. end
  2728. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2729. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2730. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2731. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2732. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2733. (((taicpu(p).opsize=S_W) and
  2734. (taicpu(hp1).opsize=S_BW)) or
  2735. ((taicpu(p).opsize=S_L) and
  2736. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2737. {$ifdef x86_64}
  2738. or
  2739. ((taicpu(p).opsize=S_Q) and
  2740. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2741. {$endif x86_64}
  2742. ) then
  2743. begin
  2744. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2745. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2746. ) or
  2747. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2748. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2749. {$ifdef x86_64}
  2750. or
  2751. (((taicpu(hp1).opsize)=S_LQ) and
  2752. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2753. )
  2754. {$endif x86_64}
  2755. then
  2756. begin
  2757. DebugMsg(SPeepholeOptimization + 'AndMovsxToAnd',p);
  2758. asml.remove(hp1);
  2759. hp1.free;
  2760. end;
  2761. end
  2762. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2763. (hp1.typ = ait_instruction) and
  2764. (taicpu(hp1).is_jmp) and
  2765. (taicpu(hp1).opcode<>A_JMP) and
  2766. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2767. { change
  2768. and x, reg
  2769. jxx
  2770. to
  2771. test x, reg
  2772. jxx
  2773. if reg is deallocated before the
  2774. jump, but only if it's a conditional jump (PFV)
  2775. }
  2776. taicpu(p).opcode := A_TEST;
  2777. end;
  2778. function TX86AsmOptimizer.PostPeepholeOptMov(const p : tai) : Boolean;
  2779. var
  2780. Value, RegName: string;
  2781. begin
  2782. Result:=false;
  2783. if (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(p).oper[0]^.typ = top_const) then
  2784. begin
  2785. case taicpu(p).oper[0]^.val of
  2786. 0:
  2787. { Don't make this optimisation if the CPU flags are required, since XOR scrambles them }
  2788. if not (RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2789. begin
  2790. { change "mov $0,%reg" into "xor %reg,%reg" }
  2791. taicpu(p).opcode := A_XOR;
  2792. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2793. Result := True;
  2794. end;
  2795. $1..$FFFFFFFF:
  2796. begin
  2797. { Code size reduction by J. Gareth "Kit" Moreton }
  2798. { change 64-bit register to 32-bit register to reduce code size (upper 32 bits will be set to zero) }
  2799. case taicpu(p).opsize of
  2800. S_Q:
  2801. begin
  2802. RegName := std_regname(taicpu(p).oper[1]^.reg); { 64-bit register name }
  2803. Value := tostr(taicpu(p).oper[0]^.val);
  2804. { The actual optimization }
  2805. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2806. taicpu(p).changeopsize(S_L);
  2807. DebugMsg(SPeepholeOptimization + 'movq $' + Value + ',%' + RegName + ' -> movl $' + Value + ',%' + std_regname(taicpu(p).oper[1]^.reg) + ' (immediate can be represented with just 32 bits)', p);
  2808. Result := True;
  2809. end;
  2810. end;
  2811. end;
  2812. end;
  2813. end;
  2814. end;
  2815. function TX86AsmOptimizer.PostPeepholeOptCmp(var p : tai) : Boolean;
  2816. begin
  2817. Result:=false;
  2818. { change "cmp $0, %reg" to "test %reg, %reg" }
  2819. if MatchOpType(taicpu(p),top_const,top_reg) and
  2820. (taicpu(p).oper[0]^.val = 0) then
  2821. begin
  2822. taicpu(p).opcode := A_TEST;
  2823. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2824. Result:=true;
  2825. end;
  2826. end;
  2827. function TX86AsmOptimizer.PostPeepholeOptTestOr(var p : tai) : Boolean;
  2828. var
  2829. IsTestConstX : Boolean;
  2830. hp1,hp2 : tai;
  2831. begin
  2832. Result:=false;
  2833. { removes the line marked with (x) from the sequence
  2834. and/or/xor/add/sub/... $x, %y
  2835. test/or %y, %y | test $-1, %y (x)
  2836. j(n)z _Label
  2837. as the first instruction already adjusts the ZF
  2838. %y operand may also be a reference }
  2839. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2840. MatchOperand(taicpu(p).oper[0]^,-1);
  2841. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2842. GetLastInstruction(p, hp1) and
  2843. (tai(hp1).typ = ait_instruction) and
  2844. GetNextInstruction(p,hp2) and
  2845. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2846. case taicpu(hp1).opcode Of
  2847. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2848. begin
  2849. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2850. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2851. { and in case of carry for A(E)/B(E)/C/NC }
  2852. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2853. ((taicpu(hp1).opcode <> A_ADD) and
  2854. (taicpu(hp1).opcode <> A_SUB))) then
  2855. begin
  2856. hp1 := tai(p.next);
  2857. asml.remove(p);
  2858. p.free;
  2859. p := tai(hp1);
  2860. Result:=true;
  2861. end;
  2862. end;
  2863. A_SHL, A_SAL, A_SHR, A_SAR:
  2864. begin
  2865. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2866. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2867. { therefore, it's only safe to do this optimization for }
  2868. { shifts by a (nonzero) constant }
  2869. (taicpu(hp1).oper[0]^.typ = top_const) and
  2870. (taicpu(hp1).oper[0]^.val <> 0) and
  2871. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2872. { and in case of carry for A(E)/B(E)/C/NC }
  2873. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2874. begin
  2875. hp1 := tai(p.next);
  2876. asml.remove(p);
  2877. p.free;
  2878. p := tai(hp1);
  2879. Result:=true;
  2880. end;
  2881. end;
  2882. A_DEC, A_INC, A_NEG:
  2883. begin
  2884. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2885. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2886. { and in case of carry for A(E)/B(E)/C/NC }
  2887. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2888. begin
  2889. case taicpu(hp1).opcode Of
  2890. A_DEC, A_INC:
  2891. { replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag }
  2892. begin
  2893. case taicpu(hp1).opcode Of
  2894. A_DEC: taicpu(hp1).opcode := A_SUB;
  2895. A_INC: taicpu(hp1).opcode := A_ADD;
  2896. end;
  2897. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2898. taicpu(hp1).loadConst(0,1);
  2899. taicpu(hp1).ops:=2;
  2900. end
  2901. end;
  2902. hp1 := tai(p.next);
  2903. asml.remove(p);
  2904. p.free;
  2905. p := tai(hp1);
  2906. Result:=true;
  2907. end;
  2908. end
  2909. else
  2910. { change "test $-1,%reg" into "test %reg,%reg" }
  2911. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2912. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2913. end { case }
  2914. { change "test $-1,%reg" into "test %reg,%reg" }
  2915. else if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2916. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2917. end;
  2918. function TX86AsmOptimizer.PostPeepholeOptCall(var p : tai) : Boolean;
  2919. var
  2920. hp1 : tai;
  2921. hp2 : taicpu;
  2922. begin
  2923. Result:=false;
  2924. {$ifndef x86_64}
  2925. { don't do this on modern CPUs, this really hurts them due to
  2926. broken call/ret pairing }
  2927. if (current_settings.optimizecputype < cpu_Pentium2) and
  2928. not(cs_create_pic in current_settings.moduleswitches) and
  2929. GetNextInstruction(p, hp1) and
  2930. MatchInstruction(hp1,A_JMP,[S_NO]) and
  2931. MatchOpType(taicpu(hp1),top_ref) and
  2932. (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full) then
  2933. begin
  2934. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2935. InsertLLItem(p.previous, p, hp2);
  2936. taicpu(p).opcode := A_JMP;
  2937. taicpu(p).is_jmp := true;
  2938. asml.remove(hp1);
  2939. hp1.free;
  2940. Result:=true;
  2941. end
  2942. else
  2943. {$endif x86_64}
  2944. { replace
  2945. call procname
  2946. ret
  2947. by
  2948. jmp procname
  2949. this should never hurt except when pic is used, not sure
  2950. how to handle it then
  2951. but do it only on level 4 because it destroys stack back traces
  2952. }
  2953. if (cs_opt_level4 in current_settings.optimizerswitches) and
  2954. not(cs_create_pic in current_settings.moduleswitches) and
  2955. GetNextInstruction(p, hp1) and
  2956. MatchInstruction(hp1,A_RET,[S_NO]) and
  2957. (taicpu(hp1).ops=0) then
  2958. begin
  2959. taicpu(p).opcode := A_JMP;
  2960. taicpu(p).is_jmp := true;
  2961. asml.remove(hp1);
  2962. hp1.free;
  2963. Result:=true;
  2964. end;
  2965. end;
  2966. {$ifdef x86_64}
  2967. function TX86AsmOptimizer.PostPeepholeOptMovzx(const p : tai) : Boolean;
  2968. var
  2969. PreMessage: string;
  2970. begin
  2971. Result := False;
  2972. { Code size reduction by J. Gareth "Kit" Moreton }
  2973. { Convert MOVZBQ and MOVZWQ to MOVZBL and MOVZWL respectively if it removes the REX prefix }
  2974. if (taicpu(p).opsize in [S_BQ, S_WQ]) and
  2975. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP])
  2976. then
  2977. begin
  2978. { Has 64-bit register name and opcode suffix }
  2979. PreMessage := 'movz' + gas_opsize2str[taicpu(p).opsize] + ' x,%' + std_regname(taicpu(p).oper[1]^.reg) + ' -> movz';
  2980. { The actual optimization }
  2981. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  2982. if taicpu(p).opsize = S_BQ then
  2983. taicpu(p).changeopsize(S_BL)
  2984. else
  2985. taicpu(p).changeopsize(S_WL);
  2986. DebugMsg(SPeepholeOptimization + PreMessage +
  2987. gas_opsize2str[taicpu(p).opsize] + ' x,%' + std_regname(taicpu(p).oper[1]^.reg) + ' (removes REX prefix)', p);
  2988. end;
  2989. end;
  2990. function TX86AsmOptimizer.PostPeepholeOptXor(var p : tai) : Boolean;
  2991. var
  2992. PreMessage, RegName: string;
  2993. begin
  2994. { Code size reduction by J. Gareth "Kit" Moreton }
  2995. { change "xorq %reg,%reg" to "xorl %reg,%reg" for %rax, %rcx, %rdx, %rbx, %rsi, %rdi, %rbp and %rsp,
  2996. as this removes the REX prefix }
  2997. Result := False;
  2998. if not OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2999. Exit;
  3000. if taicpu(p).oper[0]^.typ <> top_reg then
  3001. { Should be impossible if both operands were equal, since one of XOR's operands must be a register }
  3002. InternalError(2018011500);
  3003. case taicpu(p).opsize of
  3004. S_Q:
  3005. if (getsupreg(taicpu(p).oper[0]^.reg) in [RS_RAX, RS_RCX, RS_RDX, RS_RBX, RS_RSI, RS_RDI, RS_RBP, RS_RSP]) then
  3006. begin
  3007. RegName := std_regname(taicpu(p).oper[0]^.reg); { 64-bit register name }
  3008. PreMessage := 'xorq %' + RegName + ',%' + RegName + ' -> xorl %';
  3009. { The actual optimization }
  3010. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  3011. setsubreg(taicpu(p).oper[1]^.reg, R_SUBD);
  3012. taicpu(p).changeopsize(S_L);
  3013. RegName := std_regname(taicpu(p).oper[0]^.reg); { 32-bit register name }
  3014. DebugMsg(SPeepholeOptimization + PreMessage + RegName + ',%' + RegName + ' (removes REX prefix)', p);
  3015. end;
  3016. end;
  3017. end;
  3018. {$endif}
  3019. procedure TX86AsmOptimizer.OptReferences;
  3020. var
  3021. p: tai;
  3022. i: Integer;
  3023. begin
  3024. p := BlockStart;
  3025. while (p <> BlockEnd) Do
  3026. begin
  3027. if p.typ=ait_instruction then
  3028. begin
  3029. for i:=0 to taicpu(p).ops-1 do
  3030. if taicpu(p).oper[i]^.typ=top_ref then
  3031. optimize_ref(taicpu(p).oper[i]^.ref^,false);
  3032. end;
  3033. p:=tai(p.next);
  3034. end;
  3035. end;
  3036. end.