aoptx86.pas 109 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass1MOVAP(var p : tai) : boolean;
  52. function OptPass1MOVXX(var p : tai) : boolean;
  53. function OptPass1OP(const p : tai) : boolean;
  54. function OptPass1LEA(var p : tai) : boolean;
  55. function OptPass2MOV(var p : tai) : boolean;
  56. function OptPass2Imul(var p : tai) : boolean;
  57. function OptPass2Jmp(var p : tai) : boolean;
  58. function OptPass2Jcc(var p : tai) : boolean;
  59. procedure PostPeepholeOptMov(const p : tai);
  60. end;
  61. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  62. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  63. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  64. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  65. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  66. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  67. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  68. function RefsEqual(const r1, r2: treference): boolean;
  69. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  70. { returns true, if ref is a reference using only the registers passed as base and index
  71. and having an offset }
  72. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  73. implementation
  74. uses
  75. cutils,verbose,
  76. globals,
  77. cpuinfo,
  78. procinfo,
  79. aasmbase,
  80. aoptutils,
  81. symconst,symsym,
  82. cgx86,
  83. itcpugas;
  84. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  85. begin
  86. result :=
  87. (instr.typ = ait_instruction) and
  88. (taicpu(instr).opcode = op) and
  89. ((opsize = []) or (taicpu(instr).opsize in opsize));
  90. end;
  91. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  92. begin
  93. result :=
  94. (instr.typ = ait_instruction) and
  95. ((taicpu(instr).opcode = op1) or
  96. (taicpu(instr).opcode = op2)
  97. ) and
  98. ((opsize = []) or (taicpu(instr).opsize in opsize));
  99. end;
  100. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  101. begin
  102. result :=
  103. (instr.typ = ait_instruction) and
  104. ((taicpu(instr).opcode = op1) or
  105. (taicpu(instr).opcode = op2) or
  106. (taicpu(instr).opcode = op3)
  107. ) and
  108. ((opsize = []) or (taicpu(instr).opsize in opsize));
  109. end;
  110. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  111. const opsize : topsizes) : boolean;
  112. var
  113. op : TAsmOp;
  114. begin
  115. result:=false;
  116. for op in ops do
  117. begin
  118. if (instr.typ = ait_instruction) and
  119. (taicpu(instr).opcode = op) and
  120. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  121. begin
  122. result:=true;
  123. exit;
  124. end;
  125. end;
  126. end;
  127. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  128. begin
  129. result := (oper.typ = top_reg) and (oper.reg = reg);
  130. end;
  131. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  132. begin
  133. result := (oper.typ = top_const) and (oper.val = a);
  134. end;
  135. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  136. begin
  137. result := oper1.typ = oper2.typ;
  138. if result then
  139. case oper1.typ of
  140. top_const:
  141. Result:=oper1.val = oper2.val;
  142. top_reg:
  143. Result:=oper1.reg = oper2.reg;
  144. top_ref:
  145. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  146. else
  147. internalerror(2013102801);
  148. end
  149. end;
  150. function RefsEqual(const r1, r2: treference): boolean;
  151. begin
  152. RefsEqual :=
  153. (r1.offset = r2.offset) and
  154. (r1.segment = r2.segment) and (r1.base = r2.base) and
  155. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  156. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  157. (r1.relsymbol = r2.relsymbol);
  158. end;
  159. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  160. begin
  161. Result:=(ref.offset=0) and
  162. (ref.scalefactor in [0,1]) and
  163. (ref.segment=NR_NO) and
  164. (ref.symbol=nil) and
  165. (ref.relsymbol=nil) and
  166. ((base=NR_INVALID) or
  167. (ref.base=base)) and
  168. ((index=NR_INVALID) or
  169. (ref.index=index));
  170. end;
  171. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  172. begin
  173. Result:=(ref.scalefactor in [0,1]) and
  174. (ref.segment=NR_NO) and
  175. (ref.symbol=nil) and
  176. (ref.relsymbol=nil) and
  177. ((base=NR_INVALID) or
  178. (ref.base=base)) and
  179. ((index=NR_INVALID) or
  180. (ref.index=index));
  181. end;
  182. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  183. begin
  184. Result:=RegReadByInstruction(reg,hp);
  185. end;
  186. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  187. var
  188. p: taicpu;
  189. opcount: longint;
  190. begin
  191. RegReadByInstruction := false;
  192. if hp.typ <> ait_instruction then
  193. exit;
  194. p := taicpu(hp);
  195. case p.opcode of
  196. A_CALL:
  197. regreadbyinstruction := true;
  198. A_IMUL:
  199. case p.ops of
  200. 1:
  201. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  202. (
  203. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  204. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  205. );
  206. 2,3:
  207. regReadByInstruction :=
  208. reginop(reg,p.oper[0]^) or
  209. reginop(reg,p.oper[1]^);
  210. end;
  211. A_MUL:
  212. begin
  213. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  214. (
  215. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  216. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  217. );
  218. end;
  219. A_IDIV,A_DIV:
  220. begin
  221. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  222. (
  223. (getregtype(reg)=R_INTREGISTER) and
  224. (
  225. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  226. )
  227. );
  228. end;
  229. else
  230. begin
  231. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  232. begin
  233. RegReadByInstruction := false;
  234. exit;
  235. end;
  236. for opcount := 0 to p.ops-1 do
  237. if (p.oper[opCount]^.typ = top_ref) and
  238. RegInRef(reg,p.oper[opcount]^.ref^) then
  239. begin
  240. RegReadByInstruction := true;
  241. exit
  242. end;
  243. { special handling for SSE MOVSD }
  244. if (p.opcode=A_MOVSD) and (p.ops>0) then
  245. begin
  246. if p.ops<>2 then
  247. internalerror(2017042702);
  248. regReadByInstruction := reginop(reg,p.oper[0]^) or
  249. (
  250. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  251. );
  252. exit;
  253. end;
  254. with insprop[p.opcode] do
  255. begin
  256. if getregtype(reg)=R_INTREGISTER then
  257. begin
  258. case getsupreg(reg) of
  259. RS_EAX:
  260. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  261. begin
  262. RegReadByInstruction := true;
  263. exit
  264. end;
  265. RS_ECX:
  266. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  267. begin
  268. RegReadByInstruction := true;
  269. exit
  270. end;
  271. RS_EDX:
  272. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  273. begin
  274. RegReadByInstruction := true;
  275. exit
  276. end;
  277. RS_EBX:
  278. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  279. begin
  280. RegReadByInstruction := true;
  281. exit
  282. end;
  283. RS_ESP:
  284. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  285. begin
  286. RegReadByInstruction := true;
  287. exit
  288. end;
  289. RS_EBP:
  290. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  291. begin
  292. RegReadByInstruction := true;
  293. exit
  294. end;
  295. RS_ESI:
  296. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  297. begin
  298. RegReadByInstruction := true;
  299. exit
  300. end;
  301. RS_EDI:
  302. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  303. begin
  304. RegReadByInstruction := true;
  305. exit
  306. end;
  307. end;
  308. end;
  309. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  310. begin
  311. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  312. begin
  313. case p.condition of
  314. C_A,C_NBE, { CF=0 and ZF=0 }
  315. C_BE,C_NA: { CF=1 or ZF=1 }
  316. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  317. C_AE,C_NB,C_NC, { CF=0 }
  318. C_B,C_NAE,C_C: { CF=1 }
  319. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  320. C_NE,C_NZ, { ZF=0 }
  321. C_E,C_Z: { ZF=1 }
  322. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  323. C_G,C_NLE, { ZF=0 and SF=OF }
  324. C_LE,C_NG: { ZF=1 or SF<>OF }
  325. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  326. C_GE,C_NL, { SF=OF }
  327. C_L,C_NGE: { SF<>OF }
  328. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  329. C_NO, { OF=0 }
  330. C_O: { OF=1 }
  331. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  332. C_NP,C_PO, { PF=0 }
  333. C_P,C_PE: { PF=1 }
  334. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  335. C_NS, { SF=0 }
  336. C_S: { SF=1 }
  337. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  338. else
  339. internalerror(2017042701);
  340. end;
  341. if RegReadByInstruction then
  342. exit;
  343. end;
  344. case getsubreg(reg) of
  345. R_SUBW,R_SUBD,R_SUBQ:
  346. RegReadByInstruction :=
  347. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  348. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  349. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  350. R_SUBFLAGCARRY:
  351. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  352. R_SUBFLAGPARITY:
  353. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  354. R_SUBFLAGAUXILIARY:
  355. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  356. R_SUBFLAGZERO:
  357. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  358. R_SUBFLAGSIGN:
  359. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  360. R_SUBFLAGOVERFLOW:
  361. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  362. R_SUBFLAGINTERRUPT:
  363. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  364. R_SUBFLAGDIRECTION:
  365. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  366. else
  367. internalerror(2017042601);
  368. end;
  369. exit;
  370. end;
  371. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  372. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  373. (p.oper[0]^.reg=p.oper[1]^.reg) then
  374. exit;
  375. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  376. begin
  377. RegReadByInstruction := true;
  378. exit
  379. end;
  380. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  381. begin
  382. RegReadByInstruction := true;
  383. exit
  384. end;
  385. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  386. begin
  387. RegReadByInstruction := true;
  388. exit
  389. end;
  390. if ([Ch_RWOP4,Ch_ROP4,Ch_MOP4]*Ch<>[]) and reginop(reg,p.oper[3]^) then
  391. begin
  392. RegReadByInstruction := true;
  393. exit
  394. end;
  395. end;
  396. end;
  397. end;
  398. end;
  399. {$ifdef DEBUG_AOPTCPU}
  400. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  401. begin
  402. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  403. end;
  404. {$else DEBUG_AOPTCPU}
  405. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  406. begin
  407. end;
  408. {$endif DEBUG_AOPTCPU}
  409. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  410. begin
  411. if not SuperRegistersEqual(reg1,reg2) then
  412. exit(false);
  413. if getregtype(reg1)<>R_INTREGISTER then
  414. exit(true); {because SuperRegisterEqual is true}
  415. case getsubreg(reg1) of
  416. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  417. higher, it preserves the high bits, so the new value depends on
  418. reg2's previous value. In other words, it is equivalent to doing:
  419. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  420. R_SUBL:
  421. exit(getsubreg(reg2)=R_SUBL);
  422. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  423. higher, it actually does a:
  424. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  425. R_SUBH:
  426. exit(getsubreg(reg2)=R_SUBH);
  427. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  428. bits of reg2:
  429. reg2 := (reg2 and $ffff0000) or word(reg1); }
  430. R_SUBW:
  431. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  432. { a write to R_SUBD always overwrites every other subregister,
  433. because it clears the high 32 bits of R_SUBQ on x86_64 }
  434. R_SUBD,
  435. R_SUBQ:
  436. exit(true);
  437. else
  438. internalerror(2017042801);
  439. end;
  440. end;
  441. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  442. begin
  443. if not SuperRegistersEqual(reg1,reg2) then
  444. exit(false);
  445. if getregtype(reg1)<>R_INTREGISTER then
  446. exit(true); {because SuperRegisterEqual is true}
  447. case getsubreg(reg1) of
  448. R_SUBL:
  449. exit(getsubreg(reg2)<>R_SUBH);
  450. R_SUBH:
  451. exit(getsubreg(reg2)<>R_SUBL);
  452. R_SUBW,
  453. R_SUBD,
  454. R_SUBQ:
  455. exit(true);
  456. else
  457. internalerror(2017042802);
  458. end;
  459. end;
  460. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  461. var
  462. hp1 : tai;
  463. l : TCGInt;
  464. begin
  465. result:=false;
  466. { changes the code sequence
  467. shr/sar const1, x
  468. shl const2, x
  469. to
  470. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  471. if GetNextInstruction(p, hp1) and
  472. MatchInstruction(hp1,A_SHL,[]) and
  473. (taicpu(p).oper[0]^.typ = top_const) and
  474. (taicpu(hp1).oper[0]^.typ = top_const) and
  475. (taicpu(hp1).opsize = taicpu(p).opsize) and
  476. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  477. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  478. begin
  479. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  480. not(cs_opt_size in current_settings.optimizerswitches) then
  481. begin
  482. { shr/sar const1, %reg
  483. shl const2, %reg
  484. with const1 > const2 }
  485. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  486. taicpu(hp1).opcode := A_AND;
  487. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  488. case taicpu(p).opsize Of
  489. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  490. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  491. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  492. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  493. else
  494. Internalerror(2017050703)
  495. end;
  496. end
  497. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  498. not(cs_opt_size in current_settings.optimizerswitches) then
  499. begin
  500. { shr/sar const1, %reg
  501. shl const2, %reg
  502. with const1 < const2 }
  503. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  504. taicpu(p).opcode := A_AND;
  505. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  506. case taicpu(p).opsize Of
  507. S_B: taicpu(p).loadConst(0,l Xor $ff);
  508. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  509. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  510. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  511. else
  512. Internalerror(2017050702)
  513. end;
  514. end
  515. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  516. begin
  517. { shr/sar const1, %reg
  518. shl const2, %reg
  519. with const1 = const2 }
  520. taicpu(p).opcode := A_AND;
  521. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  522. case taicpu(p).opsize Of
  523. S_B: taicpu(p).loadConst(0,l Xor $ff);
  524. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  525. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  526. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  527. else
  528. Internalerror(2017050701)
  529. end;
  530. asml.remove(hp1);
  531. hp1.free;
  532. end;
  533. end;
  534. end;
  535. { allocates register reg between (and including) instructions p1 and p2
  536. the type of p1 and p2 must not be in SkipInstr
  537. note that this routine is both called from the peephole optimizer
  538. where optinfo is not yet initialised) and from the cse (where it is) }
  539. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  540. var
  541. hp, start: tai;
  542. removedsomething,
  543. firstRemovedWasAlloc,
  544. lastRemovedWasDealloc: boolean;
  545. begin
  546. {$ifdef EXTDEBUG}
  547. { if assigned(p1.optinfo) and
  548. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  549. internalerror(2004101010); }
  550. {$endif EXTDEBUG}
  551. start := p1;
  552. if (reg = NR_ESP) or
  553. (reg = current_procinfo.framepointer) or
  554. not(assigned(p1)) then
  555. { this happens with registers which are loaded implicitely, outside the }
  556. { current block (e.g. esi with self) }
  557. exit;
  558. { make sure we allocate it for this instruction }
  559. getnextinstruction(p2,p2);
  560. lastRemovedWasDealloc := false;
  561. removedSomething := false;
  562. firstRemovedWasAlloc := false;
  563. {$ifdef allocregdebug}
  564. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  565. ' from here...'));
  566. insertllitem(asml,p1.previous,p1,hp);
  567. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  568. ' till here...'));
  569. insertllitem(asml,p2,p2.next,hp);
  570. {$endif allocregdebug}
  571. { do it the safe way: always allocate the full super register,
  572. as we do no register re-allocation in the peephole optimizer,
  573. this does not hurt
  574. }
  575. case getregtype(reg) of
  576. R_MMREGISTER:
  577. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  578. R_INTREGISTER:
  579. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  580. end;
  581. if not(RegInUsedRegs(reg,initialusedregs)) then
  582. begin
  583. hp := tai_regalloc.alloc(reg,nil);
  584. insertllItem(p1.previous,p1,hp);
  585. IncludeRegInUsedRegs(reg,initialusedregs);
  586. end;
  587. while assigned(p1) and
  588. (p1 <> p2) do
  589. begin
  590. if assigned(p1.optinfo) then
  591. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  592. p1 := tai(p1.next);
  593. repeat
  594. while assigned(p1) and
  595. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  596. p1 := tai(p1.next);
  597. { remove all allocation/deallocation info about the register in between }
  598. if assigned(p1) and
  599. (p1.typ = ait_regalloc) then
  600. begin
  601. { same super register, different sub register? }
  602. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  603. begin
  604. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  605. internalerror(2016101501);
  606. tai_regalloc(p1).reg:=reg;
  607. end;
  608. if tai_regalloc(p1).reg=reg then
  609. begin
  610. if not removedSomething then
  611. begin
  612. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  613. removedSomething := true;
  614. end;
  615. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  616. hp := tai(p1.Next);
  617. asml.Remove(p1);
  618. p1.free;
  619. p1 := hp;
  620. end
  621. else
  622. p1 := tai(p1.next);
  623. end;
  624. until not(assigned(p1)) or
  625. not(p1.typ in SkipInstr);
  626. end;
  627. if assigned(p1) then
  628. begin
  629. if firstRemovedWasAlloc then
  630. begin
  631. hp := tai_regalloc.Alloc(reg,nil);
  632. insertLLItem(start.previous,start,hp);
  633. end;
  634. if lastRemovedWasDealloc then
  635. begin
  636. hp := tai_regalloc.DeAlloc(reg,nil);
  637. insertLLItem(p1.previous,p1,hp);
  638. end;
  639. end;
  640. end;
  641. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  642. var
  643. p: taicpu;
  644. begin
  645. if not assigned(hp) or
  646. (hp.typ <> ait_instruction) then
  647. begin
  648. Result := false;
  649. exit;
  650. end;
  651. p := taicpu(hp);
  652. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  653. with insprop[p.opcode] do
  654. begin
  655. case getsubreg(reg) of
  656. R_SUBW,R_SUBD,R_SUBQ:
  657. Result:=
  658. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  659. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  660. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  661. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  662. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  663. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  664. R_SUBFLAGCARRY:
  665. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  666. R_SUBFLAGPARITY:
  667. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGAUXILIARY:
  669. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  670. R_SUBFLAGZERO:
  671. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  672. R_SUBFLAGSIGN:
  673. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  674. R_SUBFLAGOVERFLOW:
  675. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  676. R_SUBFLAGINTERRUPT:
  677. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  678. R_SUBFLAGDIRECTION:
  679. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  680. else
  681. internalerror(2017050501);
  682. end;
  683. exit;
  684. end;
  685. Result :=
  686. (((p.opcode = A_MOV) or
  687. (p.opcode = A_MOVZX) or
  688. (p.opcode = A_MOVSX) or
  689. (p.opcode = A_LEA) or
  690. (p.opcode = A_VMOVSS) or
  691. (p.opcode = A_VMOVSD) or
  692. (p.opcode = A_VMOVAPD) or
  693. (p.opcode = A_VMOVAPS) or
  694. (p.opcode = A_VMOVQ) or
  695. (p.opcode = A_MOVSS) or
  696. (p.opcode = A_MOVSD) or
  697. (p.opcode = A_MOVQ) or
  698. (p.opcode = A_MOVAPD) or
  699. (p.opcode = A_MOVAPS) or
  700. {$ifndef x86_64}
  701. (p.opcode = A_LDS) or
  702. (p.opcode = A_LES) or
  703. {$endif not x86_64}
  704. (p.opcode = A_LFS) or
  705. (p.opcode = A_LGS) or
  706. (p.opcode = A_LSS)) and
  707. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  708. (p.oper[1]^.typ = top_reg) and
  709. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  710. ((p.oper[0]^.typ = top_const) or
  711. ((p.oper[0]^.typ = top_reg) and
  712. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  713. ((p.oper[0]^.typ = top_ref) and
  714. not RegInRef(reg,p.oper[0]^.ref^)))) or
  715. ((p.opcode = A_POP) and
  716. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  717. ((p.opcode = A_IMUL) and
  718. (p.ops=3) and
  719. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  720. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  721. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  722. ((((p.opcode = A_IMUL) or
  723. (p.opcode = A_MUL)) and
  724. (p.ops=1)) and
  725. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  726. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  727. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  728. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  729. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  730. {$ifdef x86_64}
  731. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  732. {$endif x86_64}
  733. )) or
  734. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  735. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  736. {$ifdef x86_64}
  737. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  738. {$endif x86_64}
  739. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  740. {$ifndef x86_64}
  741. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  742. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  743. {$endif not x86_64}
  744. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  745. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  746. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  747. {$ifndef x86_64}
  748. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  749. {$endif not x86_64}
  750. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  751. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  752. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  753. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  754. {$ifdef x86_64}
  755. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  756. {$endif x86_64}
  757. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  758. (((p.opcode = A_FSTSW) or
  759. (p.opcode = A_FNSTSW)) and
  760. (p.oper[0]^.typ=top_reg) and
  761. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  762. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  763. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  764. (p.oper[0]^.reg=p.oper[1]^.reg) and
  765. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  766. end;
  767. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  768. var
  769. hp2,hp3 : tai;
  770. begin
  771. { some x86-64 issue a NOP before the real exit code }
  772. if MatchInstruction(p,A_NOP,[]) then
  773. GetNextInstruction(p,p);
  774. result:=assigned(p) and (p.typ=ait_instruction) and
  775. ((taicpu(p).opcode = A_RET) or
  776. ((taicpu(p).opcode=A_LEAVE) and
  777. GetNextInstruction(p,hp2) and
  778. MatchInstruction(hp2,A_RET,[S_NO])
  779. ) or
  780. ((((taicpu(p).opcode=A_MOV) and
  781. MatchOpType(taicpu(p),top_reg,top_reg) and
  782. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  783. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  784. ((taicpu(p).opcode=A_LEA) and
  785. MatchOpType(taicpu(p),top_ref,top_reg) and
  786. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  787. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  788. )
  789. ) and
  790. GetNextInstruction(p,hp2) and
  791. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  792. MatchOpType(taicpu(hp2),top_reg) and
  793. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  794. GetNextInstruction(hp2,hp3) and
  795. MatchInstruction(hp3,A_RET,[S_NO])
  796. )
  797. );
  798. end;
  799. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  800. begin
  801. isFoldableArithOp := False;
  802. case hp1.opcode of
  803. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  804. isFoldableArithOp :=
  805. ((taicpu(hp1).oper[0]^.typ = top_const) or
  806. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  807. (taicpu(hp1).oper[0]^.reg <> reg))) and
  808. (taicpu(hp1).oper[1]^.typ = top_reg) and
  809. (taicpu(hp1).oper[1]^.reg = reg);
  810. A_INC,A_DEC,A_NEG,A_NOT:
  811. isFoldableArithOp :=
  812. (taicpu(hp1).oper[0]^.typ = top_reg) and
  813. (taicpu(hp1).oper[0]^.reg = reg);
  814. end;
  815. end;
  816. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  817. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  818. var
  819. hp2: tai;
  820. begin
  821. hp2 := p;
  822. repeat
  823. hp2 := tai(hp2.previous);
  824. if assigned(hp2) and
  825. (hp2.typ = ait_regalloc) and
  826. (tai_regalloc(hp2).ratype=ra_dealloc) and
  827. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  828. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  829. begin
  830. asml.remove(hp2);
  831. hp2.free;
  832. break;
  833. end;
  834. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  835. end;
  836. begin
  837. case current_procinfo.procdef.returndef.typ of
  838. arraydef,recorddef,pointerdef,
  839. stringdef,enumdef,procdef,objectdef,errordef,
  840. filedef,setdef,procvardef,
  841. classrefdef,forwarddef:
  842. DoRemoveLastDeallocForFuncRes(RS_EAX);
  843. orddef:
  844. if current_procinfo.procdef.returndef.size <> 0 then
  845. begin
  846. DoRemoveLastDeallocForFuncRes(RS_EAX);
  847. { for int64/qword }
  848. if current_procinfo.procdef.returndef.size = 8 then
  849. DoRemoveLastDeallocForFuncRes(RS_EDX);
  850. end;
  851. end;
  852. end;
  853. function TX86AsmOptimizer.OptPass1MOVAP(var p : tai) : boolean;
  854. var
  855. TmpUsedRegs : TAllUsedRegs;
  856. hp1,hp2 : tai;
  857. alloc ,dealloc: tai_regalloc;
  858. begin
  859. result:=false;
  860. if MatchOpType(taicpu(p),top_reg,top_reg) and
  861. GetNextInstruction(p, hp1) and
  862. (hp1.typ = ait_instruction) and
  863. GetNextInstruction(hp1, hp2) and
  864. MatchInstruction(hp2,taicpu(p).opcode,[]) and
  865. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  866. MatchOpType(taicpu(hp2),top_reg,top_reg) and
  867. MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  868. (((taicpu(p).opcode=A_MOVAPS) and
  869. ((taicpu(hp1).opcode=A_ADDSS) or (taicpu(hp1).opcode=A_SUBSS) or
  870. (taicpu(hp1).opcode=A_MULSS) or (taicpu(hp1).opcode=A_DIVSS))) or
  871. ((taicpu(p).opcode=A_MOVAPD) and
  872. ((taicpu(hp1).opcode=A_ADDSD) or (taicpu(hp1).opcode=A_SUBSD) or
  873. (taicpu(hp1).opcode=A_MULSD) or (taicpu(hp1).opcode=A_DIVSD)))
  874. ) then
  875. { change
  876. movapX reg,reg2
  877. addsX/subsX/... reg3, reg2
  878. movapX reg2,reg
  879. to
  880. addsX/subsX/... reg3,reg
  881. }
  882. begin
  883. CopyUsedRegs(TmpUsedRegs);
  884. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  885. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  886. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  887. begin
  888. DebugMsg('Peephole Optimization MovapXOpMovapX2Op ('+
  889. std_op2str[taicpu(p).opcode]+' '+
  890. std_op2str[taicpu(hp1).opcode]+' '+
  891. std_op2str[taicpu(hp2).opcode]+') done',p);
  892. { we cannot eliminate the first move if
  893. the operations uses the same register for source and dest }
  894. if not(OpsEqual(taicpu(hp1).oper[1]^,taicpu(hp1).oper[0]^)) then
  895. begin
  896. asml.remove(p);
  897. p.Free;
  898. end;
  899. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  900. asml.remove(hp2);
  901. hp2.Free;
  902. p:=hp1;
  903. result:=true;
  904. end;
  905. ReleaseUsedRegs(TmpUsedRegs);
  906. end
  907. end;
  908. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  909. var
  910. TmpUsedRegs : TAllUsedRegs;
  911. hp1,hp2 : tai;
  912. begin
  913. result:=false;
  914. if MatchOpType(taicpu(p),top_reg,top_reg) then
  915. begin
  916. { vmova* reg1,reg1
  917. =>
  918. <nop> }
  919. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  920. begin
  921. GetNextInstruction(p,hp1);
  922. asml.Remove(p);
  923. p.Free;
  924. p:=hp1;
  925. result:=true;
  926. end
  927. else if GetNextInstruction(p,hp1) then
  928. begin
  929. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  930. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  931. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  932. begin
  933. { vmova* reg1,reg2
  934. vmova* reg2,reg3
  935. dealloc reg2
  936. =>
  937. vmova* reg1,reg3 }
  938. CopyUsedRegs(TmpUsedRegs);
  939. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  940. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  941. begin
  942. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  943. asml.Remove(hp1);
  944. hp1.Free;
  945. result:=true;
  946. end
  947. { special case:
  948. vmova* reg1,reg2
  949. vmova* reg2,reg1
  950. =>
  951. vmova* reg1,reg2 }
  952. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  953. begin
  954. asml.Remove(hp1);
  955. hp1.Free;
  956. result:=true;
  957. end
  958. end
  959. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  960. { we mix single and double opperations here because we assume that the compiler
  961. generates vmovapd only after double operations and vmovaps only after single operations }
  962. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  963. GetNextInstruction(hp1,hp2) and
  964. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  965. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  966. begin
  967. CopyUsedRegs(TmpUsedRegs);
  968. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  969. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  970. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  971. then
  972. begin
  973. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  974. asml.Remove(p);
  975. p.Free;
  976. asml.Remove(hp2);
  977. hp2.Free;
  978. p:=hp1;
  979. end;
  980. end;
  981. end;
  982. end;
  983. end;
  984. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  985. var
  986. TmpUsedRegs : TAllUsedRegs;
  987. hp1 : tai;
  988. begin
  989. result:=false;
  990. { replace
  991. V<Op>X %mreg1,%mreg2,%mreg3
  992. VMovX %mreg3,%mreg4
  993. dealloc %mreg3
  994. by
  995. V<Op>X %mreg1,%mreg2,%mreg4
  996. ?
  997. }
  998. if GetNextInstruction(p,hp1) and
  999. { we mix single and double opperations here because we assume that the compiler
  1000. generates vmovapd only after double operations and vmovaps only after single operations }
  1001. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  1002. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  1003. (taicpu(hp1).oper[1]^.typ=top_reg) then
  1004. begin
  1005. CopyUsedRegs(TmpUsedRegs);
  1006. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1007. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  1008. ) then
  1009. begin
  1010. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  1011. DebugMsg('PeepHole Optimization VOpVmov2VOp done',p);
  1012. asml.Remove(hp1);
  1013. hp1.Free;
  1014. result:=true;
  1015. end;
  1016. end;
  1017. end;
  1018. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  1019. var
  1020. hp1, hp2: tai;
  1021. TmpUsedRegs : TAllUsedRegs;
  1022. GetNextIntruction_p : Boolean;
  1023. begin
  1024. Result:=false;
  1025. { remove mov reg1,reg1? }
  1026. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  1027. begin
  1028. GetNextInstruction(p, hp1);
  1029. DebugMsg('PeepHole Optimization Mov2Nop done',p);
  1030. asml.remove(p);
  1031. p.free;
  1032. p:=hp1;
  1033. Result:=true;
  1034. exit;
  1035. end;
  1036. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  1037. if GetNextIntruction_p and
  1038. MatchInstruction(hp1,A_AND,[]) and
  1039. (taicpu(p).oper[1]^.typ = top_reg) and
  1040. MatchOpType(taicpu(hp1),top_const,top_reg) and
  1041. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  1042. case taicpu(p).opsize Of
  1043. S_L:
  1044. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1045. begin
  1046. DebugMsg('PeepHole Optimization MovAnd2Mov done',p);
  1047. asml.remove(hp1);
  1048. hp1.free;
  1049. Result:=true;
  1050. exit;
  1051. end;
  1052. end
  1053. else if GetNextIntruction_p and
  1054. MatchInstruction(hp1,A_MOV,[]) and
  1055. (taicpu(p).oper[1]^.typ = top_reg) and
  1056. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  1057. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1058. begin
  1059. CopyUsedRegs(TmpUsedRegs);
  1060. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  1061. { we have
  1062. mov x, %treg
  1063. mov %treg, y
  1064. }
  1065. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  1066. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1067. { we've got
  1068. mov x, %treg
  1069. mov %treg, y
  1070. with %treg is not used after }
  1071. case taicpu(p).oper[0]^.typ Of
  1072. top_reg:
  1073. begin
  1074. { change
  1075. mov %reg, %treg
  1076. mov %treg, y
  1077. to
  1078. mov %reg, y
  1079. }
  1080. if taicpu(hp1).oper[1]^.typ=top_reg then
  1081. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1082. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  1083. DebugMsg('PeepHole Optimization MovMov2Mov 2 done',p);
  1084. asml.remove(hp1);
  1085. hp1.free;
  1086. ReleaseUsedRegs(TmpUsedRegs);
  1087. Result:=true;
  1088. Exit;
  1089. end;
  1090. top_ref:
  1091. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1092. begin
  1093. { change
  1094. mov mem, %treg
  1095. mov %treg, %reg
  1096. to
  1097. mov mem, %reg"
  1098. }
  1099. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1100. DebugMsg('PeepHole Optimization MovMov2Mov 3 done',p);
  1101. asml.remove(hp1);
  1102. hp1.free;
  1103. ReleaseUsedRegs(TmpUsedRegs);
  1104. Result:=true;
  1105. Exit;
  1106. end;
  1107. end;
  1108. ReleaseUsedRegs(TmpUsedRegs);
  1109. end
  1110. else
  1111. { Change
  1112. mov %reg1, %reg2
  1113. xxx %reg2, ???
  1114. to
  1115. mov %reg1, %reg2
  1116. xxx %reg1, ???
  1117. to avoid a write/read penalty
  1118. }
  1119. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1120. GetNextInstruction(p,hp1) and
  1121. (tai(hp1).typ = ait_instruction) and
  1122. (taicpu(hp1).ops >= 1) and
  1123. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1124. { we have
  1125. mov %reg1, %reg2
  1126. XXX %reg2, ???
  1127. }
  1128. begin
  1129. if ((taicpu(hp1).opcode = A_OR) or
  1130. (taicpu(hp1).opcode = A_TEST)) and
  1131. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1132. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1133. { we have
  1134. mov %reg1, %reg2
  1135. test/or %reg2, %reg2
  1136. }
  1137. begin
  1138. CopyUsedRegs(TmpUsedRegs);
  1139. { reg1 will be used after the first instruction,
  1140. so update the allocation info }
  1141. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1142. if GetNextInstruction(hp1, hp2) and
  1143. (hp2.typ = ait_instruction) and
  1144. taicpu(hp2).is_jmp and
  1145. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1146. { change
  1147. mov %reg1, %reg2
  1148. test/or %reg2, %reg2
  1149. jxx
  1150. to
  1151. test %reg1, %reg1
  1152. jxx
  1153. }
  1154. begin
  1155. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1156. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1157. DebugMsg('PeepHole Optimization MovTestJxx2TestMov done',p);
  1158. asml.remove(p);
  1159. p.free;
  1160. p := hp1;
  1161. ReleaseUsedRegs(TmpUsedRegs);
  1162. Exit;
  1163. end
  1164. else
  1165. { change
  1166. mov %reg1, %reg2
  1167. test/or %reg2, %reg2
  1168. to
  1169. mov %reg1, %reg2
  1170. test/or %reg1, %reg1
  1171. }
  1172. begin
  1173. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1174. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1175. DebugMsg('PeepHole Optimization MovTestJxx2ovTestJxx done',p);
  1176. end;
  1177. ReleaseUsedRegs(TmpUsedRegs);
  1178. end
  1179. end
  1180. else
  1181. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1182. x >= RetOffset) as it doesn't do anything (it writes either to a
  1183. parameter or to the temporary storage room for the function
  1184. result)
  1185. }
  1186. if GetNextIntruction_p and
  1187. (tai(hp1).typ = ait_instruction) then
  1188. begin
  1189. if IsExitCode(hp1) and
  1190. MatchOpType(taicpu(p),top_reg,top_ref) and
  1191. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1192. not(assigned(current_procinfo.procdef.funcretsym) and
  1193. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1194. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1195. begin
  1196. asml.remove(p);
  1197. p.free;
  1198. p:=hp1;
  1199. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1200. RemoveLastDeallocForFuncRes(p);
  1201. exit;
  1202. end
  1203. { change
  1204. mov reg1, mem1
  1205. test/cmp x, mem1
  1206. to
  1207. mov reg1, mem1
  1208. test/cmp x, reg1
  1209. }
  1210. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1211. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1212. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1213. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1214. begin
  1215. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1216. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1217. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1218. end;
  1219. end;
  1220. { Next instruction is also a MOV ? }
  1221. if GetNextIntruction_p and
  1222. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1223. begin
  1224. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1225. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1226. { mov reg1, mem1 or mov mem1, reg1
  1227. mov mem2, reg2 mov reg2, mem2}
  1228. begin
  1229. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1230. { mov reg1, mem1 or mov mem1, reg1
  1231. mov mem2, reg1 mov reg2, mem1}
  1232. begin
  1233. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1234. { Removes the second statement from
  1235. mov reg1, mem1/reg2
  1236. mov mem1/reg2, reg1 }
  1237. begin
  1238. if taicpu(p).oper[0]^.typ=top_reg then
  1239. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1240. DebugMsg('PeepHole Optimization MovMov2Mov 1',p);
  1241. asml.remove(hp1);
  1242. hp1.free;
  1243. Result:=true;
  1244. exit;
  1245. end
  1246. else
  1247. begin
  1248. CopyUsedRegs(TmpUsedRegs);
  1249. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1250. if (taicpu(p).oper[1]^.typ = top_ref) and
  1251. { mov reg1, mem1
  1252. mov mem2, reg1 }
  1253. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1254. GetNextInstruction(hp1, hp2) and
  1255. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1256. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1257. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1258. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1259. { change to
  1260. mov reg1, mem1 mov reg1, mem1
  1261. mov mem2, reg1 cmp reg1, mem2
  1262. cmp mem1, reg1
  1263. }
  1264. begin
  1265. asml.remove(hp2);
  1266. hp2.free;
  1267. taicpu(hp1).opcode := A_CMP;
  1268. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1269. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1270. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1271. DebugMsg('Peephole Optimization MovMovCmp2MovCmp done',hp1);
  1272. end;
  1273. ReleaseUsedRegs(TmpUsedRegs);
  1274. end;
  1275. end
  1276. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1277. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1278. begin
  1279. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1280. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1281. DebugMsg('PeepHole Optimization MovMov2MovMov1 done',p);
  1282. end
  1283. else
  1284. begin
  1285. CopyUsedRegs(TmpUsedRegs);
  1286. if GetNextInstruction(hp1, hp2) and
  1287. MatchOpType(taicpu(p),top_ref,top_reg) and
  1288. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1289. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1290. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1291. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1292. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1293. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1294. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1295. { mov mem1, %reg1
  1296. mov %reg1, mem2
  1297. mov mem2, reg2
  1298. to:
  1299. mov mem1, reg2
  1300. mov reg2, mem2}
  1301. begin
  1302. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1303. DebugMsg('PeepHole Optimization MovMovMov2MovMov 1 done',p);
  1304. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1305. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1306. asml.remove(hp2);
  1307. hp2.free;
  1308. end
  1309. {$ifdef i386}
  1310. { this is enabled for i386 only, as the rules to create the reg sets below
  1311. are too complicated for x86-64, so this makes this code too error prone
  1312. on x86-64
  1313. }
  1314. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1315. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1316. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1317. { mov mem1, reg1 mov mem1, reg1
  1318. mov reg1, mem2 mov reg1, mem2
  1319. mov mem2, reg2 mov mem2, reg1
  1320. to: to:
  1321. mov mem1, reg1 mov mem1, reg1
  1322. mov mem1, reg2 mov reg1, mem2
  1323. mov reg1, mem2
  1324. or (if mem1 depends on reg1
  1325. and/or if mem2 depends on reg2)
  1326. to:
  1327. mov mem1, reg1
  1328. mov reg1, mem2
  1329. mov reg1, reg2
  1330. }
  1331. begin
  1332. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1333. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1334. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1335. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1336. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1337. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1338. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1339. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1340. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1341. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1342. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1343. end
  1344. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1345. begin
  1346. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1347. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1348. end
  1349. else
  1350. begin
  1351. asml.remove(hp2);
  1352. hp2.free;
  1353. end
  1354. {$endif i386}
  1355. ;
  1356. ReleaseUsedRegs(TmpUsedRegs);
  1357. end;
  1358. end
  1359. (* { movl [mem1],reg1
  1360. movl [mem1],reg2
  1361. to
  1362. movl [mem1],reg1
  1363. movl reg1,reg2
  1364. }
  1365. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1366. (taicpu(p).oper[1]^.typ = top_reg) and
  1367. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1368. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1369. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1370. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1371. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1372. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1373. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1374. else*)
  1375. { movl const1,[mem1]
  1376. movl [mem1],reg1
  1377. to
  1378. movl const1,reg1
  1379. movl reg1,[mem1]
  1380. }
  1381. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1382. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1383. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1384. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1385. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1386. begin
  1387. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1388. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1389. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1390. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1391. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1392. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1393. end
  1394. end
  1395. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1396. GetNextIntruction_p and
  1397. (hp1.typ = ait_instruction) and
  1398. GetNextInstruction(hp1, hp2) and
  1399. MatchInstruction(hp2,A_MOV,[]) and
  1400. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1401. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1402. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1403. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1404. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1405. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1406. ) then
  1407. { change movsX/movzX reg/ref, reg2
  1408. add/sub/or/... reg3/$const, reg2
  1409. mov reg2 reg/ref
  1410. to add/sub/or/... reg3/$const, reg/ref }
  1411. begin
  1412. CopyUsedRegs(TmpUsedRegs);
  1413. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1414. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1415. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1416. begin
  1417. { by example:
  1418. movswl %si,%eax movswl %si,%eax p
  1419. decl %eax addl %edx,%eax hp1
  1420. movw %ax,%si movw %ax,%si hp2
  1421. ->
  1422. movswl %si,%eax movswl %si,%eax p
  1423. decw %eax addw %edx,%eax hp1
  1424. movw %ax,%si movw %ax,%si hp2
  1425. }
  1426. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1427. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1428. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1429. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1430. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1431. {
  1432. ->
  1433. movswl %si,%eax movswl %si,%eax p
  1434. decw %si addw %dx,%si hp1
  1435. movw %ax,%si movw %ax,%si hp2
  1436. }
  1437. case taicpu(hp1).ops of
  1438. 1:
  1439. begin
  1440. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1441. if taicpu(hp1).oper[0]^.typ=top_reg then
  1442. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1443. end;
  1444. 2:
  1445. begin
  1446. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1447. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1448. (taicpu(hp1).opcode<>A_SHL) and
  1449. (taicpu(hp1).opcode<>A_SHR) and
  1450. (taicpu(hp1).opcode<>A_SAR) then
  1451. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1452. end;
  1453. else
  1454. internalerror(2008042701);
  1455. end;
  1456. {
  1457. ->
  1458. decw %si addw %dx,%si p
  1459. }
  1460. asml.remove(p);
  1461. asml.remove(hp2);
  1462. p.Free;
  1463. hp2.Free;
  1464. p := hp1;
  1465. end;
  1466. ReleaseUsedRegs(TmpUsedRegs);
  1467. end
  1468. else if GetNextIntruction_p and
  1469. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1470. GetNextInstruction(hp1, hp2) and
  1471. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1472. MatchOperand(Taicpu(p).oper[0]^,0) and
  1473. (Taicpu(p).oper[1]^.typ = top_reg) and
  1474. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1475. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1476. { mov reg1,0
  1477. bts reg1,operand1 --> mov reg1,operand2
  1478. or reg1,operand2 bts reg1,operand1}
  1479. begin
  1480. Taicpu(hp2).opcode:=A_MOV;
  1481. asml.remove(hp1);
  1482. insertllitem(hp2,hp2.next,hp1);
  1483. asml.remove(p);
  1484. p.free;
  1485. p:=hp1;
  1486. end
  1487. else if GetNextIntruction_p and
  1488. MatchInstruction(hp1,A_LEA,[S_L]) and
  1489. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1490. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1491. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1492. ) or
  1493. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1494. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1495. )
  1496. ) then
  1497. { mov reg1,ref
  1498. lea reg2,[reg1,reg2]
  1499. to
  1500. add reg2,ref}
  1501. begin
  1502. CopyUsedRegs(TmpUsedRegs);
  1503. { reg1 may not be used afterwards }
  1504. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1505. begin
  1506. Taicpu(hp1).opcode:=A_ADD;
  1507. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1508. DebugMsg('Peephole MovLea2Add done',hp1);
  1509. asml.remove(p);
  1510. p.free;
  1511. p:=hp1;
  1512. end;
  1513. ReleaseUsedRegs(TmpUsedRegs);
  1514. end;
  1515. end;
  1516. function TX86AsmOptimizer.OptPass1MOVXX(var p : tai) : boolean;
  1517. var
  1518. hp1 : tai;
  1519. begin
  1520. Result:=false;
  1521. if taicpu(p).ops <> 2 then
  1522. exit;
  1523. if GetNextInstruction(p,hp1) and
  1524. MatchInstruction(hp1,taicpu(p).opcode,[taicpu(p).opsize]) and
  1525. (taicpu(hp1).ops = 2) then
  1526. begin
  1527. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1528. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1529. { movXX reg1, mem1 or movXX mem1, reg1
  1530. movXX mem2, reg2 movXX reg2, mem2}
  1531. begin
  1532. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1533. { movXX reg1, mem1 or movXX mem1, reg1
  1534. movXX mem2, reg1 movXX reg2, mem1}
  1535. begin
  1536. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1537. begin
  1538. { Removes the second statement from
  1539. movXX reg1, mem1/reg2
  1540. movXX mem1/reg2, reg1
  1541. }
  1542. if taicpu(p).oper[0]^.typ=top_reg then
  1543. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1544. { Removes the second statement from
  1545. movXX mem1/reg1, reg2
  1546. movXX reg2, mem1/reg1
  1547. }
  1548. if (taicpu(p).oper[1]^.typ=top_reg) and
  1549. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)) then
  1550. begin
  1551. asml.remove(p);
  1552. p.free;
  1553. GetNextInstruction(hp1,p);
  1554. DebugMsg('PeepHole Optimization MovXXMovXX2Nop 1 done',p);
  1555. end
  1556. else
  1557. DebugMsg('PeepHole Optimization MovXXMovXX2MoVXX 1 done',p);
  1558. asml.remove(hp1);
  1559. hp1.free;
  1560. Result:=true;
  1561. exit;
  1562. end
  1563. end;
  1564. end;
  1565. end;
  1566. end;
  1567. function TX86AsmOptimizer.OptPass1OP(const p : tai) : boolean;
  1568. var
  1569. TmpUsedRegs : TAllUsedRegs;
  1570. hp1 : tai;
  1571. begin
  1572. result:=false;
  1573. { replace
  1574. <Op>X %mreg1,%mreg2 // Op in [ADD,MUL]
  1575. MovX %mreg2,%mreg1
  1576. dealloc %mreg2
  1577. by
  1578. <Op>X %mreg2,%mreg1
  1579. ?
  1580. }
  1581. if GetNextInstruction(p,hp1) and
  1582. { we mix single and double opperations here because we assume that the compiler
  1583. generates vmovapd only after double operations and vmovaps only after single operations }
  1584. MatchInstruction(hp1,A_MOVAPD,A_MOVAPS,[S_NO]) and
  1585. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1586. MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) and
  1587. (taicpu(p).oper[0]^.typ=top_reg) then
  1588. begin
  1589. CopyUsedRegs(TmpUsedRegs);
  1590. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1591. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1592. begin
  1593. taicpu(p).loadoper(0,taicpu(hp1).oper[0]^);
  1594. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1595. DebugMsg('PeepHole Optimization OpMov2Op done',p);
  1596. asml.Remove(hp1);
  1597. hp1.Free;
  1598. result:=true;
  1599. end;
  1600. ReleaseUsedRegs(TmpUsedRegs);
  1601. end;
  1602. end;
  1603. function TX86AsmOptimizer.OptPass1LEA(var p : tai) : boolean;
  1604. var
  1605. hp1 : tai;
  1606. l : ASizeInt;
  1607. TmpUsedRegs : TAllUsedRegs;
  1608. begin
  1609. Result:=false;
  1610. { removes seg register prefixes from LEA operations, as they
  1611. don't do anything}
  1612. taicpu(p).oper[0]^.ref^.Segment:=NR_NO;
  1613. { changes "lea (%reg1), %reg2" into "mov %reg1, %reg2" }
  1614. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1615. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1616. { do not mess with leas acessing the stack pointer }
  1617. (taicpu(p).oper[1]^.reg <> NR_STACK_POINTER_REG) and
  1618. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1619. begin
  1620. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1621. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1622. begin
  1623. hp1:=taicpu.op_reg_reg(A_MOV,taicpu(p).opsize,taicpu(p).oper[0]^.ref^.base,
  1624. taicpu(p).oper[1]^.reg);
  1625. InsertLLItem(p.previous,p.next, hp1);
  1626. DebugMsg('PeepHole Optimization Lea2Mov done',hp1);
  1627. p.free;
  1628. p:=hp1;
  1629. Result:=true;
  1630. exit;
  1631. end
  1632. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1633. begin
  1634. hp1:=taicpu(p.Next);
  1635. DebugMsg('PeepHole Optimization Lea2Nop done',p);
  1636. asml.remove(p);
  1637. p.free;
  1638. p:=hp1;
  1639. Result:=true;
  1640. exit;
  1641. end
  1642. { continue to use lea to adjust the stack pointer,
  1643. it is the recommended way, but only if not optimizing for size }
  1644. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1645. (cs_opt_size in current_settings.optimizerswitches) then
  1646. with taicpu(p).oper[0]^.ref^ do
  1647. if (base = taicpu(p).oper[1]^.reg) then
  1648. begin
  1649. l:=offset;
  1650. if (l=1) and UseIncDec then
  1651. begin
  1652. taicpu(p).opcode:=A_INC;
  1653. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1654. taicpu(p).ops:=1;
  1655. DebugMsg('PeepHole Optimization Lea2Inc done',p);
  1656. end
  1657. else if (l=-1) and UseIncDec then
  1658. begin
  1659. taicpu(p).opcode:=A_DEC;
  1660. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1661. taicpu(p).ops:=1;
  1662. DebugMsg('PeepHole Optimization Lea2Dec done',p);
  1663. end
  1664. else
  1665. begin
  1666. if (l<0) and (l<>-2147483648) then
  1667. begin
  1668. taicpu(p).opcode:=A_SUB;
  1669. taicpu(p).loadConst(0,-l);
  1670. DebugMsg('PeepHole Optimization Lea2Sub done',p);
  1671. end
  1672. else
  1673. begin
  1674. taicpu(p).opcode:=A_ADD;
  1675. taicpu(p).loadConst(0,l);
  1676. DebugMsg('PeepHole Optimization Lea2Add done',p);
  1677. end;
  1678. end;
  1679. Result:=true;
  1680. exit;
  1681. end;
  1682. end;
  1683. if GetNextInstruction(p,hp1) and
  1684. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) and
  1685. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1686. MatchOpType(Taicpu(hp1),top_reg,top_reg) and
  1687. (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) then
  1688. begin
  1689. CopyUsedRegs(TmpUsedRegs);
  1690. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1691. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  1692. begin
  1693. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1694. DebugMsg('PeepHole Optimization LeaMov2Lea done',p);
  1695. asml.Remove(hp1);
  1696. hp1.Free;
  1697. result:=true;
  1698. end;
  1699. ReleaseUsedRegs(TmpUsedRegs);
  1700. end;
  1701. (*
  1702. This is unsafe, lea doesn't modify the flags but "add"
  1703. does. This breaks webtbs/tw15694.pp. The above
  1704. transformations are also unsafe, but they don't seem to
  1705. be triggered by code that FPC generators (or that at
  1706. least does not occur in the tests...). This needs to be
  1707. fixed by checking for the liveness of the flags register.
  1708. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1709. begin
  1710. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1711. taicpu(p).oper[0]^.ref^.base);
  1712. InsertLLItem(asml,p.previous,p.next, hp1);
  1713. DebugMsg('Peephole Lea2AddBase done',hp1);
  1714. p.free;
  1715. p:=hp1;
  1716. continue;
  1717. end
  1718. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1719. begin
  1720. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1721. taicpu(p).oper[0]^.ref^.index);
  1722. InsertLLItem(asml,p.previous,p.next,hp1);
  1723. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1724. p.free;
  1725. p:=hp1;
  1726. continue;
  1727. end
  1728. *)
  1729. end;
  1730. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1731. var
  1732. TmpUsedRegs : TAllUsedRegs;
  1733. hp1,hp2: tai;
  1734. begin
  1735. Result:=false;
  1736. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1737. GetNextInstruction(p, hp1) and
  1738. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1739. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1740. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1741. or
  1742. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1743. ) and
  1744. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1745. { mov reg1, reg2
  1746. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1747. begin
  1748. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1749. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1750. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1751. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1752. DebugMsg('PeepHole Optimization MovMovXX2MoVXX 1 done',p);
  1753. asml.remove(p);
  1754. p.free;
  1755. p := hp1;
  1756. Result:=true;
  1757. exit;
  1758. end
  1759. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1760. GetNextInstruction(p,hp1) and
  1761. (hp1.typ = ait_instruction) and
  1762. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1763. doing it separately in both branches allows to do the cheap checks
  1764. with low probability earlier }
  1765. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1766. GetNextInstruction(hp1,hp2) and
  1767. MatchInstruction(hp2,A_MOV,[])
  1768. ) or
  1769. ((taicpu(hp1).opcode=A_LEA) and
  1770. GetNextInstruction(hp1,hp2) and
  1771. MatchInstruction(hp2,A_MOV,[]) and
  1772. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1773. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1774. ) or
  1775. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1776. taicpu(p).oper[1]^.reg) and
  1777. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1778. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1779. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1780. ) and
  1781. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1782. )
  1783. ) and
  1784. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1785. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1786. begin
  1787. CopyUsedRegs(TmpUsedRegs);
  1788. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1789. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1790. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1791. { change mov (ref), reg
  1792. add/sub/or/... reg2/$const, reg
  1793. mov reg, (ref)
  1794. # release reg
  1795. to add/sub/or/... reg2/$const, (ref) }
  1796. begin
  1797. case taicpu(hp1).opcode of
  1798. A_INC,A_DEC,A_NOT,A_NEG :
  1799. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1800. A_LEA :
  1801. begin
  1802. taicpu(hp1).opcode:=A_ADD;
  1803. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1804. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1805. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1806. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1807. else
  1808. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1809. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1810. DebugMsg('Peephole FoldLea done',hp1);
  1811. end
  1812. else
  1813. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1814. end;
  1815. asml.remove(p);
  1816. asml.remove(hp2);
  1817. p.free;
  1818. hp2.free;
  1819. p := hp1
  1820. end;
  1821. ReleaseUsedRegs(TmpUsedRegs);
  1822. end;
  1823. end;
  1824. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1825. var
  1826. TmpUsedRegs : TAllUsedRegs;
  1827. hp1 : tai;
  1828. begin
  1829. Result:=false;
  1830. if (taicpu(p).ops >= 2) and
  1831. ((taicpu(p).oper[0]^.typ = top_const) or
  1832. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1833. (taicpu(p).oper[1]^.typ = top_reg) and
  1834. ((taicpu(p).ops = 2) or
  1835. ((taicpu(p).oper[2]^.typ = top_reg) and
  1836. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1837. GetLastInstruction(p,hp1) and
  1838. MatchInstruction(hp1,A_MOV,[]) and
  1839. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1840. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1841. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1842. begin
  1843. CopyUsedRegs(TmpUsedRegs);
  1844. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1845. { change
  1846. mov reg1,reg2
  1847. imul y,reg2 to imul y,reg1,reg2 }
  1848. begin
  1849. taicpu(p).ops := 3;
  1850. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1851. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1852. DebugMsg('Peephole MovImul2Imul done',p);
  1853. asml.remove(hp1);
  1854. hp1.free;
  1855. result:=true;
  1856. end;
  1857. ReleaseUsedRegs(TmpUsedRegs);
  1858. end;
  1859. end;
  1860. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1861. var
  1862. hp1 : tai;
  1863. begin
  1864. {
  1865. change
  1866. jmp .L1
  1867. ...
  1868. .L1:
  1869. ret
  1870. into
  1871. ret
  1872. }
  1873. result:=false;
  1874. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1875. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1876. begin
  1877. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1878. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1879. MatchInstruction(hp1,A_RET,[S_NO]) then
  1880. begin
  1881. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1882. taicpu(p).opcode:=A_RET;
  1883. taicpu(p).is_jmp:=false;
  1884. taicpu(p).ops:=taicpu(hp1).ops;
  1885. case taicpu(hp1).ops of
  1886. 0:
  1887. taicpu(p).clearop(0);
  1888. 1:
  1889. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1890. else
  1891. internalerror(2016041301);
  1892. end;
  1893. result:=true;
  1894. end;
  1895. end;
  1896. end;
  1897. function CanBeCMOV(p : tai) : boolean;
  1898. begin
  1899. CanBeCMOV:=assigned(p) and
  1900. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1901. { we can't use cmov ref,reg because
  1902. ref could be nil and cmov still throws an exception
  1903. if ref=nil but the mov isn't done (FK)
  1904. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1905. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1906. }
  1907. MatchOpType(taicpu(p),top_reg,top_reg);
  1908. end;
  1909. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1910. var
  1911. hp1,hp2,hp3: tai;
  1912. carryadd_opcode : TAsmOp;
  1913. l : Longint;
  1914. condition : TAsmCond;
  1915. begin
  1916. { jb @@1 cmc
  1917. inc/dec operand --> adc/sbb operand,0
  1918. @@1:
  1919. ... and ...
  1920. jnb @@1
  1921. inc/dec operand --> adc/sbb operand,0
  1922. @@1: }
  1923. result:=false;
  1924. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1925. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1926. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1927. begin
  1928. carryadd_opcode:=A_NONE;
  1929. if Taicpu(p).condition in [C_NAE,C_B] then
  1930. begin
  1931. if Taicpu(hp1).opcode=A_INC then
  1932. carryadd_opcode:=A_ADC;
  1933. if Taicpu(hp1).opcode=A_DEC then
  1934. carryadd_opcode:=A_SBB;
  1935. if carryadd_opcode<>A_NONE then
  1936. begin
  1937. Taicpu(p).clearop(0);
  1938. Taicpu(p).ops:=0;
  1939. Taicpu(p).is_jmp:=false;
  1940. Taicpu(p).opcode:=A_CMC;
  1941. Taicpu(p).condition:=C_NONE;
  1942. Taicpu(hp1).ops:=2;
  1943. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1944. Taicpu(hp1).loadconst(0,0);
  1945. Taicpu(hp1).opcode:=carryadd_opcode;
  1946. result:=true;
  1947. exit;
  1948. end;
  1949. end;
  1950. if Taicpu(p).condition in [C_AE,C_NB] then
  1951. begin
  1952. if Taicpu(hp1).opcode=A_INC then
  1953. carryadd_opcode:=A_ADC;
  1954. if Taicpu(hp1).opcode=A_DEC then
  1955. carryadd_opcode:=A_SBB;
  1956. if carryadd_opcode<>A_NONE then
  1957. begin
  1958. asml.remove(p);
  1959. p.free;
  1960. Taicpu(hp1).ops:=2;
  1961. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1962. Taicpu(hp1).loadconst(0,0);
  1963. Taicpu(hp1).opcode:=carryadd_opcode;
  1964. p:=hp1;
  1965. result:=true;
  1966. exit;
  1967. end;
  1968. end;
  1969. end;
  1970. {$ifndef i8086}
  1971. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1972. begin
  1973. { check for
  1974. jCC xxx
  1975. <several movs>
  1976. xxx:
  1977. }
  1978. l:=0;
  1979. GetNextInstruction(p, hp1);
  1980. while assigned(hp1) and
  1981. CanBeCMOV(hp1) and
  1982. { stop on labels }
  1983. not(hp1.typ=ait_label) do
  1984. begin
  1985. inc(l);
  1986. GetNextInstruction(hp1,hp1);
  1987. end;
  1988. if assigned(hp1) then
  1989. begin
  1990. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1991. begin
  1992. if (l<=4) and (l>0) then
  1993. begin
  1994. condition:=inverse_cond(taicpu(p).condition);
  1995. hp2:=p;
  1996. GetNextInstruction(p,hp1);
  1997. p:=hp1;
  1998. repeat
  1999. taicpu(hp1).opcode:=A_CMOVcc;
  2000. taicpu(hp1).condition:=condition;
  2001. GetNextInstruction(hp1,hp1);
  2002. until not(assigned(hp1)) or
  2003. not(CanBeCMOV(hp1));
  2004. { wait with removing else GetNextInstruction could
  2005. ignore the label if it was the only usage in the
  2006. jump moved away }
  2007. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2008. { if the label refs. reach zero, remove any alignment before the label }
  2009. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  2010. begin
  2011. asml.Remove(hp1);
  2012. hp1.Free;
  2013. end;
  2014. asml.remove(hp2);
  2015. hp2.free;
  2016. result:=true;
  2017. exit;
  2018. end;
  2019. end
  2020. else
  2021. begin
  2022. { check further for
  2023. jCC xxx
  2024. <several movs 1>
  2025. jmp yyy
  2026. xxx:
  2027. <several movs 2>
  2028. yyy:
  2029. }
  2030. { hp2 points to jmp yyy }
  2031. hp2:=hp1;
  2032. { skip hp1 to xxx }
  2033. GetNextInstruction(hp1, hp1);
  2034. if assigned(hp2) and
  2035. assigned(hp1) and
  2036. (l<=3) and
  2037. (hp2.typ=ait_instruction) and
  2038. (taicpu(hp2).is_jmp) and
  2039. (taicpu(hp2).condition=C_None) and
  2040. { real label and jump, no further references to the
  2041. label are allowed }
  2042. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  2043. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  2044. begin
  2045. l:=0;
  2046. { skip hp1 to <several moves 2> }
  2047. GetNextInstruction(hp1, hp1);
  2048. while assigned(hp1) and
  2049. CanBeCMOV(hp1) do
  2050. begin
  2051. inc(l);
  2052. GetNextInstruction(hp1, hp1);
  2053. end;
  2054. { hp1 points to yyy: }
  2055. if assigned(hp1) and
  2056. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  2057. begin
  2058. condition:=inverse_cond(taicpu(p).condition);
  2059. GetNextInstruction(p,hp1);
  2060. hp3:=p;
  2061. p:=hp1;
  2062. repeat
  2063. taicpu(hp1).opcode:=A_CMOVcc;
  2064. taicpu(hp1).condition:=condition;
  2065. GetNextInstruction(hp1,hp1);
  2066. until not(assigned(hp1)) or
  2067. not(CanBeCMOV(hp1));
  2068. { hp2 is still at jmp yyy }
  2069. GetNextInstruction(hp2,hp1);
  2070. { hp2 is now at xxx: }
  2071. condition:=inverse_cond(condition);
  2072. GetNextInstruction(hp1,hp1);
  2073. { hp1 is now at <several movs 2> }
  2074. repeat
  2075. taicpu(hp1).opcode:=A_CMOVcc;
  2076. taicpu(hp1).condition:=condition;
  2077. GetNextInstruction(hp1,hp1);
  2078. until not(assigned(hp1)) or
  2079. not(CanBeCMOV(hp1));
  2080. {
  2081. asml.remove(hp1.next)
  2082. hp1.next.free;
  2083. asml.remove(hp1);
  2084. hp1.free;
  2085. }
  2086. { remove jCC }
  2087. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  2088. asml.remove(hp3);
  2089. hp3.free;
  2090. { remove jmp }
  2091. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  2092. asml.remove(hp2);
  2093. hp2.free;
  2094. result:=true;
  2095. exit;
  2096. end;
  2097. end;
  2098. end;
  2099. end;
  2100. end;
  2101. {$endif i8086}
  2102. end;
  2103. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  2104. var
  2105. hp1,hp2: tai;
  2106. begin
  2107. result:=false;
  2108. if (taicpu(p).oper[1]^.typ = top_reg) and
  2109. GetNextInstruction(p,hp1) and
  2110. (hp1.typ = ait_instruction) and
  2111. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  2112. GetNextInstruction(hp1,hp2) and
  2113. MatchInstruction(hp2,A_MOV,[]) and
  2114. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2115. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  2116. {$ifdef i386}
  2117. { not all registers have byte size sub registers on i386 }
  2118. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  2119. {$endif i386}
  2120. (((taicpu(hp1).ops=2) and
  2121. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  2122. ((taicpu(hp1).ops=1) and
  2123. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  2124. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  2125. begin
  2126. { change movsX/movzX reg/ref, reg2
  2127. add/sub/or/... reg3/$const, reg2
  2128. mov reg2 reg/ref
  2129. to add/sub/or/... reg3/$const, reg/ref }
  2130. { by example:
  2131. movswl %si,%eax movswl %si,%eax p
  2132. decl %eax addl %edx,%eax hp1
  2133. movw %ax,%si movw %ax,%si hp2
  2134. ->
  2135. movswl %si,%eax movswl %si,%eax p
  2136. decw %eax addw %edx,%eax hp1
  2137. movw %ax,%si movw %ax,%si hp2
  2138. }
  2139. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  2140. {
  2141. ->
  2142. movswl %si,%eax movswl %si,%eax p
  2143. decw %si addw %dx,%si hp1
  2144. movw %ax,%si movw %ax,%si hp2
  2145. }
  2146. case taicpu(hp1).ops of
  2147. 1:
  2148. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  2149. 2:
  2150. begin
  2151. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  2152. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  2153. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  2154. end;
  2155. else
  2156. internalerror(2008042701);
  2157. end;
  2158. {
  2159. ->
  2160. decw %si addw %dx,%si p
  2161. }
  2162. DebugMsg('PeepHole Optimization,var3',p);
  2163. asml.remove(p);
  2164. asml.remove(hp2);
  2165. p.free;
  2166. hp2.free;
  2167. p:=hp1;
  2168. end
  2169. { removes superfluous And's after movzx's }
  2170. else if taicpu(p).opcode=A_MOVZX then
  2171. begin
  2172. if (taicpu(p).oper[1]^.typ = top_reg) and
  2173. GetNextInstruction(p, hp1) and
  2174. (tai(hp1).typ = ait_instruction) and
  2175. (taicpu(hp1).opcode = A_AND) and
  2176. (taicpu(hp1).oper[0]^.typ = top_const) and
  2177. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2178. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2179. begin
  2180. case taicpu(p).opsize Of
  2181. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  2182. if (taicpu(hp1).oper[0]^.val = $ff) then
  2183. begin
  2184. DebugMsg('PeepHole Optimization,var4',p);
  2185. asml.remove(hp1);
  2186. hp1.free;
  2187. end;
  2188. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  2189. if (taicpu(hp1).oper[0]^.val = $ffff) then
  2190. begin
  2191. DebugMsg('PeepHole Optimization,var5',p);
  2192. asml.remove(hp1);
  2193. hp1.free;
  2194. end;
  2195. {$ifdef x86_64}
  2196. S_LQ:
  2197. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  2198. begin
  2199. if (cs_asm_source in current_settings.globalswitches) then
  2200. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  2201. asml.remove(hp1);
  2202. hp1.Free;
  2203. end;
  2204. {$endif x86_64}
  2205. end;
  2206. end;
  2207. { changes some movzx constructs to faster synonims (all examples
  2208. are given with eax/ax, but are also valid for other registers)}
  2209. if (taicpu(p).oper[1]^.typ = top_reg) then
  2210. if (taicpu(p).oper[0]^.typ = top_reg) then
  2211. case taicpu(p).opsize of
  2212. S_BW:
  2213. begin
  2214. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2215. not(cs_opt_size in current_settings.optimizerswitches) then
  2216. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  2217. begin
  2218. taicpu(p).opcode := A_AND;
  2219. taicpu(p).changeopsize(S_W);
  2220. taicpu(p).loadConst(0,$ff);
  2221. DebugMsg('PeepHole Optimization,var7',p);
  2222. end
  2223. else if GetNextInstruction(p, hp1) and
  2224. (tai(hp1).typ = ait_instruction) and
  2225. (taicpu(hp1).opcode = A_AND) and
  2226. (taicpu(hp1).oper[0]^.typ = top_const) and
  2227. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2228. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2229. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  2230. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  2231. begin
  2232. DebugMsg('PeepHole Optimization,var8',p);
  2233. taicpu(p).opcode := A_MOV;
  2234. taicpu(p).changeopsize(S_W);
  2235. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  2236. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2237. end;
  2238. end;
  2239. S_BL:
  2240. begin
  2241. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2242. not(cs_opt_size in current_settings.optimizerswitches) then
  2243. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  2244. begin
  2245. taicpu(p).opcode := A_AND;
  2246. taicpu(p).changeopsize(S_L);
  2247. taicpu(p).loadConst(0,$ff)
  2248. end
  2249. else if GetNextInstruction(p, hp1) and
  2250. (tai(hp1).typ = ait_instruction) and
  2251. (taicpu(hp1).opcode = A_AND) and
  2252. (taicpu(hp1).oper[0]^.typ = top_const) and
  2253. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2254. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2255. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  2256. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  2257. begin
  2258. DebugMsg('PeepHole Optimization,var10',p);
  2259. taicpu(p).opcode := A_MOV;
  2260. taicpu(p).changeopsize(S_L);
  2261. { do not use R_SUBWHOLE
  2262. as movl %rdx,%eax
  2263. is invalid in assembler PM }
  2264. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2265. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2266. end
  2267. end;
  2268. {$ifndef i8086}
  2269. S_WL:
  2270. begin
  2271. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  2272. not(cs_opt_size in current_settings.optimizerswitches) then
  2273. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  2274. begin
  2275. DebugMsg('PeepHole Optimization,var11',p);
  2276. taicpu(p).opcode := A_AND;
  2277. taicpu(p).changeopsize(S_L);
  2278. taicpu(p).loadConst(0,$ffff);
  2279. end
  2280. else if GetNextInstruction(p, hp1) and
  2281. (tai(hp1).typ = ait_instruction) and
  2282. (taicpu(hp1).opcode = A_AND) and
  2283. (taicpu(hp1).oper[0]^.typ = top_const) and
  2284. (taicpu(hp1).oper[1]^.typ = top_reg) and
  2285. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2286. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  2287. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  2288. begin
  2289. DebugMsg('PeepHole Optimization,var12',p);
  2290. taicpu(p).opcode := A_MOV;
  2291. taicpu(p).changeopsize(S_L);
  2292. { do not use R_SUBWHOLE
  2293. as movl %rdx,%eax
  2294. is invalid in assembler PM }
  2295. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  2296. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2297. end;
  2298. end;
  2299. {$endif i8086}
  2300. end
  2301. else if (taicpu(p).oper[0]^.typ = top_ref) then
  2302. begin
  2303. if GetNextInstruction(p, hp1) and
  2304. (tai(hp1).typ = ait_instruction) and
  2305. (taicpu(hp1).opcode = A_AND) and
  2306. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2307. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2308. begin
  2309. taicpu(p).opcode := A_MOV;
  2310. case taicpu(p).opsize Of
  2311. S_BL:
  2312. begin
  2313. DebugMsg('PeepHole Optimization,var13',p);
  2314. taicpu(p).changeopsize(S_L);
  2315. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2316. end;
  2317. S_WL:
  2318. begin
  2319. DebugMsg('PeepHole Optimization,var14',p);
  2320. taicpu(p).changeopsize(S_L);
  2321. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2322. end;
  2323. S_BW:
  2324. begin
  2325. DebugMsg('PeepHole Optimization,var15',p);
  2326. taicpu(p).changeopsize(S_W);
  2327. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2328. end;
  2329. {$ifdef x86_64}
  2330. S_BQ:
  2331. begin
  2332. DebugMsg('PeepHole Optimization,var16',p);
  2333. taicpu(p).changeopsize(S_Q);
  2334. taicpu(hp1).loadConst(
  2335. 0, taicpu(hp1).oper[0]^.val and $ff);
  2336. end;
  2337. S_WQ:
  2338. begin
  2339. DebugMsg('PeepHole Optimization,var17',p);
  2340. taicpu(p).changeopsize(S_Q);
  2341. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2342. end;
  2343. S_LQ:
  2344. begin
  2345. DebugMsg('PeepHole Optimization,var18',p);
  2346. taicpu(p).changeopsize(S_Q);
  2347. taicpu(hp1).loadConst(
  2348. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2349. end;
  2350. {$endif x86_64}
  2351. else
  2352. Internalerror(2017050704)
  2353. end;
  2354. end;
  2355. end;
  2356. end;
  2357. end;
  2358. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2359. var
  2360. hp1 : tai;
  2361. begin
  2362. Result:=false;
  2363. if not(GetNextInstruction(p, hp1)) then
  2364. exit;
  2365. if MatchOpType(taicpu(p),top_const,top_reg) and
  2366. MatchInstruction(hp1,A_AND,[]) and
  2367. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2368. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2369. { the second register must contain the first one, so compare their subreg types }
  2370. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2371. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2372. { change
  2373. and const1, reg
  2374. and const2, reg
  2375. to
  2376. and (const1 and const2), reg
  2377. }
  2378. begin
  2379. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2380. DebugMsg('Peephole AndAnd2And done',hp1);
  2381. asml.remove(p);
  2382. p.Free;
  2383. p:=hp1;
  2384. Result:=true;
  2385. exit;
  2386. end
  2387. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2388. MatchInstruction(hp1,A_MOVZX,[]) and
  2389. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2390. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2391. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2392. (((taicpu(p).opsize=S_W) and
  2393. (taicpu(hp1).opsize=S_BW)) or
  2394. ((taicpu(p).opsize=S_L) and
  2395. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2396. {$ifdef x86_64}
  2397. or
  2398. ((taicpu(p).opsize=S_Q) and
  2399. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2400. {$endif x86_64}
  2401. ) then
  2402. begin
  2403. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2404. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2405. ) or
  2406. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2407. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2408. {$ifdef x86_64}
  2409. or
  2410. (((taicpu(hp1).opsize)=S_LQ) and
  2411. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2412. )
  2413. {$endif x86_64}
  2414. then
  2415. begin
  2416. DebugMsg('Peephole AndMovzToAnd done',p);
  2417. asml.remove(hp1);
  2418. hp1.free;
  2419. end;
  2420. end
  2421. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2422. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2423. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2424. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2425. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2426. (((taicpu(p).opsize=S_W) and
  2427. (taicpu(hp1).opsize=S_BW)) or
  2428. ((taicpu(p).opsize=S_L) and
  2429. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2430. {$ifdef x86_64}
  2431. or
  2432. ((taicpu(p).opsize=S_Q) and
  2433. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2434. {$endif x86_64}
  2435. ) then
  2436. begin
  2437. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2438. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2439. ) or
  2440. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2441. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2442. {$ifdef x86_64}
  2443. or
  2444. (((taicpu(hp1).opsize)=S_LQ) and
  2445. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2446. )
  2447. {$endif x86_64}
  2448. then
  2449. begin
  2450. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2451. asml.remove(hp1);
  2452. hp1.free;
  2453. end;
  2454. end
  2455. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2456. (hp1.typ = ait_instruction) and
  2457. (taicpu(hp1).is_jmp) and
  2458. (taicpu(hp1).opcode<>A_JMP) and
  2459. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2460. { change
  2461. and x, reg
  2462. jxx
  2463. to
  2464. test x, reg
  2465. jxx
  2466. if reg is deallocated before the
  2467. jump, but only if it's a conditional jump (PFV)
  2468. }
  2469. taicpu(p).opcode := A_TEST;
  2470. end;
  2471. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2472. begin
  2473. if MatchOperand(taicpu(p).oper[0]^,0) and
  2474. (taicpu(p).oper[1]^.typ = Top_Reg) and
  2475. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2476. { change "mov $0, %reg" into "xor %reg, %reg" }
  2477. begin
  2478. taicpu(p).opcode := A_XOR;
  2479. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2480. end;
  2481. end;
  2482. end.