aoptx86.pas 96 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptx86;
  18. {$i fpcdefs.inc}
  19. {$define DEBUG_AOPTCPU}
  20. interface
  21. uses
  22. globtype,
  23. cpubase,
  24. aasmtai,aasmcpu,
  25. cgbase,cgutils,
  26. aopt,aoptobj;
  27. type
  28. TX86AsmOptimizer = class(TAsmOptimizer)
  29. function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
  30. function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
  31. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  32. protected
  33. { checks whether loading a new value in reg1 overwrites the entirety of reg2 }
  34. function Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  35. { checks whether reading the value in reg1 depends on the value of reg2. This
  36. is very similar to SuperRegisterEquals, except it takes into account that
  37. R_SUBH and R_SUBL are independendent (e.g. reading from AL does not
  38. depend on the value in AH). }
  39. function Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  40. procedure DebugMsg(const s : string; p : tai);inline;
  41. procedure AllocRegBetween(reg : tregister; p1,p2 : tai;var initialusedregs : TAllUsedRegs);
  42. class function IsExitCode(p : tai) : boolean;
  43. class function isFoldableArithOp(hp1 : taicpu; reg : tregister) : boolean;
  44. procedure RemoveLastDeallocForFuncRes(p : tai);
  45. function PrePeepholeOptSxx(var p : tai) : boolean;
  46. function OptPass1AND(var p : tai) : boolean;
  47. function OptPass1VMOVAP(var p : tai) : boolean;
  48. function OptPass1VOP(const p : tai) : boolean;
  49. function OptPass1MOV(var p : tai) : boolean;
  50. function OptPass1Movx(var p : tai) : boolean;
  51. function OptPass2MOV(var p : tai) : boolean;
  52. function OptPass2Imul(var p : tai) : boolean;
  53. function OptPass2Jmp(var p : tai) : boolean;
  54. function OptPass2Jcc(var p : tai) : boolean;
  55. procedure PostPeepholeOptMov(const p : tai);
  56. end;
  57. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  58. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  59. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  60. function MatchInstruction(const instr: tai; const ops: array of TAsmOp; const opsize: topsizes): boolean;
  61. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  62. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  63. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  64. function RefsEqual(const r1, r2: treference): boolean;
  65. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  66. { returns true, if ref is a reference using only the registers passed as base and index
  67. and having an offset }
  68. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  69. implementation
  70. uses
  71. cutils,verbose,
  72. globals,
  73. cpuinfo,
  74. procinfo,
  75. aasmbase,
  76. aoptutils,
  77. symconst,symsym,
  78. itcpugas;
  79. function MatchInstruction(const instr: tai; const op: TAsmOp; const opsize: topsizes): boolean;
  80. begin
  81. result :=
  82. (instr.typ = ait_instruction) and
  83. (taicpu(instr).opcode = op) and
  84. ((opsize = []) or (taicpu(instr).opsize in opsize));
  85. end;
  86. function MatchInstruction(const instr: tai; const op1,op2: TAsmOp; const opsize: topsizes): boolean;
  87. begin
  88. result :=
  89. (instr.typ = ait_instruction) and
  90. ((taicpu(instr).opcode = op1) or
  91. (taicpu(instr).opcode = op2)
  92. ) and
  93. ((opsize = []) or (taicpu(instr).opsize in opsize));
  94. end;
  95. function MatchInstruction(const instr: tai; const op1,op2,op3: TAsmOp; const opsize: topsizes): boolean;
  96. begin
  97. result :=
  98. (instr.typ = ait_instruction) and
  99. ((taicpu(instr).opcode = op1) or
  100. (taicpu(instr).opcode = op2) or
  101. (taicpu(instr).opcode = op3)
  102. ) and
  103. ((opsize = []) or (taicpu(instr).opsize in opsize));
  104. end;
  105. function MatchInstruction(const instr : tai;const ops : array of TAsmOp;
  106. const opsize : topsizes) : boolean;
  107. var
  108. op : TAsmOp;
  109. begin
  110. result:=false;
  111. for op in ops do
  112. begin
  113. if (instr.typ = ait_instruction) and
  114. (taicpu(instr).opcode = op) and
  115. ((opsize = []) or (taicpu(instr).opsize in opsize)) then
  116. begin
  117. result:=true;
  118. exit;
  119. end;
  120. end;
  121. end;
  122. function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
  123. begin
  124. result := (oper.typ = top_reg) and (oper.reg = reg);
  125. end;
  126. function MatchOperand(const oper: TOper; const a: tcgint): boolean; inline;
  127. begin
  128. result := (oper.typ = top_const) and (oper.val = a);
  129. end;
  130. function MatchOperand(const oper1: TOper; const oper2: TOper): boolean;
  131. begin
  132. result := oper1.typ = oper2.typ;
  133. if result then
  134. case oper1.typ of
  135. top_const:
  136. Result:=oper1.val = oper2.val;
  137. top_reg:
  138. Result:=oper1.reg = oper2.reg;
  139. top_ref:
  140. Result:=RefsEqual(oper1.ref^, oper2.ref^);
  141. else
  142. internalerror(2013102801);
  143. end
  144. end;
  145. function RefsEqual(const r1, r2: treference): boolean;
  146. begin
  147. RefsEqual :=
  148. (r1.offset = r2.offset) and
  149. (r1.segment = r2.segment) and (r1.base = r2.base) and
  150. (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
  151. (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
  152. (r1.relsymbol = r2.relsymbol);
  153. end;
  154. function MatchReference(const ref : treference;base,index : TRegister) : Boolean;
  155. begin
  156. Result:=(ref.offset=0) and
  157. (ref.scalefactor in [0,1]) and
  158. (ref.segment=NR_NO) and
  159. (ref.symbol=nil) and
  160. (ref.relsymbol=nil) and
  161. ((base=NR_INVALID) or
  162. (ref.base=base)) and
  163. ((index=NR_INVALID) or
  164. (ref.index=index));
  165. end;
  166. function MatchReferenceWithOffset(const ref : treference;base,index : TRegister) : Boolean;
  167. begin
  168. Result:=(ref.scalefactor in [0,1]) and
  169. (ref.segment=NR_NO) and
  170. (ref.symbol=nil) and
  171. (ref.relsymbol=nil) and
  172. ((base=NR_INVALID) or
  173. (ref.base=base)) and
  174. ((index=NR_INVALID) or
  175. (ref.index=index));
  176. end;
  177. function TX86AsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  178. begin
  179. Result:=RegReadByInstruction(reg,hp);
  180. end;
  181. function TX86AsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  182. var
  183. p: taicpu;
  184. opcount: longint;
  185. begin
  186. RegReadByInstruction := false;
  187. if hp.typ <> ait_instruction then
  188. exit;
  189. p := taicpu(hp);
  190. case p.opcode of
  191. A_CALL:
  192. regreadbyinstruction := true;
  193. A_IMUL:
  194. case p.ops of
  195. 1:
  196. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  197. (
  198. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  199. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  200. );
  201. 2,3:
  202. regReadByInstruction :=
  203. reginop(reg,p.oper[0]^) or
  204. reginop(reg,p.oper[1]^);
  205. end;
  206. A_MUL:
  207. begin
  208. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  209. (
  210. ((getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)=RS_EAX)) and
  211. ((getsubreg(reg)<>R_SUBH) or (p.opsize<>S_B))
  212. );
  213. end;
  214. A_IDIV,A_DIV:
  215. begin
  216. regReadByInstruction := RegInOp(reg,p.oper[0]^) or
  217. (
  218. (getregtype(reg)=R_INTREGISTER) and
  219. (
  220. (getsupreg(reg)=RS_EAX) or ((getsupreg(reg)=RS_EDX) and (p.opsize<>S_B))
  221. )
  222. );
  223. end;
  224. else
  225. begin
  226. if (p.opcode=A_LEA) and is_segment_reg(reg) then
  227. begin
  228. RegReadByInstruction := false;
  229. exit;
  230. end;
  231. for opcount := 0 to p.ops-1 do
  232. if (p.oper[opCount]^.typ = top_ref) and
  233. RegInRef(reg,p.oper[opcount]^.ref^) then
  234. begin
  235. RegReadByInstruction := true;
  236. exit
  237. end;
  238. { special handling for SSE MOVSD }
  239. if (p.opcode=A_MOVSD) and (p.ops>0) then
  240. begin
  241. if p.ops<>2 then
  242. internalerror(2017042702);
  243. regReadByInstruction := reginop(reg,p.oper[0]^) or
  244. (
  245. (p.oper[1]^.typ=top_reg) and (p.oper[0]^.typ=top_reg) and reginop(reg, p.oper[1]^)
  246. );
  247. exit;
  248. end;
  249. with insprop[p.opcode] do
  250. begin
  251. if getregtype(reg)=R_INTREGISTER then
  252. begin
  253. case getsupreg(reg) of
  254. RS_EAX:
  255. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  256. begin
  257. RegReadByInstruction := true;
  258. exit
  259. end;
  260. RS_ECX:
  261. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  262. begin
  263. RegReadByInstruction := true;
  264. exit
  265. end;
  266. RS_EDX:
  267. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  268. begin
  269. RegReadByInstruction := true;
  270. exit
  271. end;
  272. RS_EBX:
  273. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  274. begin
  275. RegReadByInstruction := true;
  276. exit
  277. end;
  278. RS_ESP:
  279. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  280. begin
  281. RegReadByInstruction := true;
  282. exit
  283. end;
  284. RS_EBP:
  285. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  286. begin
  287. RegReadByInstruction := true;
  288. exit
  289. end;
  290. RS_ESI:
  291. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  292. begin
  293. RegReadByInstruction := true;
  294. exit
  295. end;
  296. RS_EDI:
  297. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  298. begin
  299. RegReadByInstruction := true;
  300. exit
  301. end;
  302. end;
  303. end;
  304. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  305. begin
  306. if (Ch_RFLAGScc in Ch) and not(getsubreg(reg) in [R_SUBW,R_SUBD,R_SUBQ]) then
  307. begin
  308. case p.condition of
  309. C_A,C_NBE, { CF=0 and ZF=0 }
  310. C_BE,C_NA: { CF=1 or ZF=1 }
  311. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY,R_SUBFLAGZERO];
  312. C_AE,C_NB,C_NC, { CF=0 }
  313. C_B,C_NAE,C_C: { CF=1 }
  314. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGCARRY];
  315. C_NE,C_NZ, { ZF=0 }
  316. C_E,C_Z: { ZF=1 }
  317. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO];
  318. C_G,C_NLE, { ZF=0 and SF=OF }
  319. C_LE,C_NG: { ZF=1 or SF<>OF }
  320. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGZERO,R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  321. C_GE,C_NL, { SF=OF }
  322. C_L,C_NGE: { SF<>OF }
  323. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN,R_SUBFLAGOVERFLOW];
  324. C_NO, { OF=0 }
  325. C_O: { OF=1 }
  326. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGOVERFLOW];
  327. C_NP,C_PO, { PF=0 }
  328. C_P,C_PE: { PF=1 }
  329. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGPARITY];
  330. C_NS, { SF=0 }
  331. C_S: { SF=1 }
  332. RegReadByInstruction:=getsubreg(reg) in [R_SUBFLAGSIGN];
  333. else
  334. internalerror(2017042701);
  335. end;
  336. if RegReadByInstruction then
  337. exit;
  338. end;
  339. case getsubreg(reg) of
  340. R_SUBW,R_SUBD,R_SUBQ:
  341. RegReadByInstruction :=
  342. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  343. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  344. Ch_RDirFlag,Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc]*Ch<>[];
  345. R_SUBFLAGCARRY:
  346. RegReadByInstruction:=[Ch_RCarryFlag,Ch_RWCarryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  347. R_SUBFLAGPARITY:
  348. RegReadByInstruction:=[Ch_RParityFlag,Ch_RWParityFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  349. R_SUBFLAGAUXILIARY:
  350. RegReadByInstruction:=[Ch_RAuxiliaryFlag,Ch_RWAuxiliaryFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  351. R_SUBFLAGZERO:
  352. RegReadByInstruction:=[Ch_RZeroFlag,Ch_RWZeroFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  353. R_SUBFLAGSIGN:
  354. RegReadByInstruction:=[Ch_RSignFlag,Ch_RWSignFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  355. R_SUBFLAGOVERFLOW:
  356. RegReadByInstruction:=[Ch_ROverflowFlag,Ch_RWOverflowFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  357. R_SUBFLAGINTERRUPT:
  358. RegReadByInstruction:=[Ch_RFlags,Ch_RWFlags]*Ch<>[];
  359. R_SUBFLAGDIRECTION:
  360. RegReadByInstruction:=[Ch_RDirFlag,Ch_RFlags,Ch_RWFlags]*Ch<>[];
  361. else
  362. internalerror(2017042601);
  363. end;
  364. exit;
  365. end;
  366. if (Ch_NoReadIfEqualRegs in Ch) and (p.ops=2) and
  367. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  368. (p.oper[0]^.reg=p.oper[1]^.reg) then
  369. exit;
  370. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  371. begin
  372. RegReadByInstruction := true;
  373. exit
  374. end;
  375. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  376. begin
  377. RegReadByInstruction := true;
  378. exit
  379. end;
  380. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  381. begin
  382. RegReadByInstruction := true;
  383. exit
  384. end;
  385. end;
  386. end;
  387. end;
  388. end;
  389. {$ifdef DEBUG_AOPTCPU}
  390. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);
  391. begin
  392. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  393. end;
  394. {$else DEBUG_AOPTCPU}
  395. procedure TX86AsmOptimizer.DebugMsg(const s: string;p : tai);inline;
  396. begin
  397. end;
  398. {$endif DEBUG_AOPTCPU}
  399. function TX86AsmOptimizer.Reg1WriteOverwritesReg2Entirely(reg1, reg2: tregister): boolean;
  400. begin
  401. if not SuperRegistersEqual(reg1,reg2) then
  402. exit(false);
  403. if getregtype(reg1)<>R_INTREGISTER then
  404. exit(true); {because SuperRegisterEqual is true}
  405. case getsubreg(reg1) of
  406. { A write to R_SUBL doesn't change R_SUBH and if reg2 is R_SUBW or
  407. higher, it preserves the high bits, so the new value depends on
  408. reg2's previous value. In other words, it is equivalent to doing:
  409. reg2 := (reg2 and $ffffff00) or byte(reg1); }
  410. R_SUBL:
  411. exit(getsubreg(reg2)=R_SUBL);
  412. { A write to R_SUBH doesn't change R_SUBL and if reg2 is R_SUBW or
  413. higher, it actually does a:
  414. reg2 := (reg2 and $ffff00ff) or (reg1 and $ff00); }
  415. R_SUBH:
  416. exit(getsubreg(reg2)=R_SUBH);
  417. { If reg2 is R_SUBD or larger, a write to R_SUBW preserves the high 16
  418. bits of reg2:
  419. reg2 := (reg2 and $ffff0000) or word(reg1); }
  420. R_SUBW:
  421. exit(getsubreg(reg2) in [R_SUBL,R_SUBH,R_SUBW]);
  422. { a write to R_SUBD always overwrites every other subregister,
  423. because it clears the high 32 bits of R_SUBQ on x86_64 }
  424. R_SUBD,
  425. R_SUBQ:
  426. exit(true);
  427. else
  428. internalerror(2017042801);
  429. end;
  430. end;
  431. function TX86AsmOptimizer.Reg1ReadDependsOnReg2(reg1, reg2: tregister): boolean;
  432. begin
  433. if not SuperRegistersEqual(reg1,reg2) then
  434. exit(false);
  435. if getregtype(reg1)<>R_INTREGISTER then
  436. exit(true); {because SuperRegisterEqual is true}
  437. case getsubreg(reg1) of
  438. R_SUBL:
  439. exit(getsubreg(reg2)<>R_SUBH);
  440. R_SUBH:
  441. exit(getsubreg(reg2)<>R_SUBL);
  442. R_SUBW,
  443. R_SUBD,
  444. R_SUBQ:
  445. exit(true);
  446. else
  447. internalerror(2017042802);
  448. end;
  449. end;
  450. function TX86AsmOptimizer.PrePeepholeOptSxx(var p : tai) : boolean;
  451. var
  452. hp1 : tai;
  453. l : TCGInt;
  454. begin
  455. result:=false;
  456. { changes the code sequence
  457. shr/sar const1, x
  458. shl const2, x
  459. to
  460. either "sar/and", "shl/and" or just "and" depending on const1 and const2 }
  461. if GetNextInstruction(p, hp1) and
  462. MatchInstruction(hp1,A_SHL,[]) and
  463. (taicpu(p).oper[0]^.typ = top_const) and
  464. (taicpu(hp1).oper[0]^.typ = top_const) and
  465. (taicpu(hp1).opsize = taicpu(p).opsize) and
  466. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  467. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  468. begin
  469. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  470. not(cs_opt_size in current_settings.optimizerswitches) then
  471. begin
  472. { shr/sar const1, %reg
  473. shl const2, %reg
  474. with const1 > const2 }
  475. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  476. taicpu(hp1).opcode := A_AND;
  477. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  478. case taicpu(p).opsize Of
  479. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  480. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  481. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  482. S_Q: taicpu(hp1).loadConst(0,l Xor aint($ffffffffffffffff));
  483. else
  484. Internalerror(2017050703)
  485. end;
  486. end
  487. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  488. not(cs_opt_size in current_settings.optimizerswitches) then
  489. begin
  490. { shr/sar const1, %reg
  491. shl const2, %reg
  492. with const1 < const2 }
  493. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  494. taicpu(p).opcode := A_AND;
  495. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  496. case taicpu(p).opsize Of
  497. S_B: taicpu(p).loadConst(0,l Xor $ff);
  498. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  499. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  500. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  501. else
  502. Internalerror(2017050702)
  503. end;
  504. end
  505. else if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  506. begin
  507. { shr/sar const1, %reg
  508. shl const2, %reg
  509. with const1 = const2 }
  510. taicpu(p).opcode := A_AND;
  511. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  512. case taicpu(p).opsize Of
  513. S_B: taicpu(p).loadConst(0,l Xor $ff);
  514. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  515. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  516. S_Q: taicpu(p).loadConst(0,l Xor aint($ffffffffffffffff));
  517. else
  518. Internalerror(2017050701)
  519. end;
  520. asml.remove(hp1);
  521. hp1.free;
  522. end;
  523. end;
  524. end;
  525. { allocates register reg between (and including) instructions p1 and p2
  526. the type of p1 and p2 must not be in SkipInstr
  527. note that this routine is both called from the peephole optimizer
  528. where optinfo is not yet initialised) and from the cse (where it is) }
  529. procedure TX86AsmOptimizer.AllocRegBetween(reg: tregister; p1, p2: tai; var initialusedregs: TAllUsedRegs);
  530. var
  531. hp, start: tai;
  532. removedsomething,
  533. firstRemovedWasAlloc,
  534. lastRemovedWasDealloc: boolean;
  535. begin
  536. {$ifdef EXTDEBUG}
  537. { if assigned(p1.optinfo) and
  538. (ptaiprop(p1.optinfo)^.usedregs <> initialusedregs) then
  539. internalerror(2004101010); }
  540. {$endif EXTDEBUG}
  541. start := p1;
  542. if (reg = NR_ESP) or
  543. (reg = current_procinfo.framepointer) or
  544. not(assigned(p1)) then
  545. { this happens with registers which are loaded implicitely, outside the }
  546. { current block (e.g. esi with self) }
  547. exit;
  548. { make sure we allocate it for this instruction }
  549. getnextinstruction(p2,p2);
  550. lastRemovedWasDealloc := false;
  551. removedSomething := false;
  552. firstRemovedWasAlloc := false;
  553. {$ifdef allocregdebug}
  554. hp := tai_comment.Create(strpnew('allocating '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  555. ' from here...'));
  556. insertllitem(asml,p1.previous,p1,hp);
  557. hp := tai_comment.Create(strpnew('allocated '+std_regname(newreg(R_INTREGISTER,supreg,R_SUBWHOLE))+
  558. ' till here...'));
  559. insertllitem(asml,p2,p2.next,hp);
  560. {$endif allocregdebug}
  561. { do it the safe way: always allocate the full super register,
  562. as we do no register re-allocation in the peephole optimizer,
  563. this does not hurt
  564. }
  565. case getregtype(reg) of
  566. R_MMREGISTER:
  567. reg:=newreg(R_MMREGISTER,getsupreg(reg),R_SUBMMWHOLE);
  568. R_INTREGISTER:
  569. reg:=newreg(R_INTREGISTER,getsupreg(reg),R_SUBWHOLE);
  570. end;
  571. if not(RegInUsedRegs(reg,initialusedregs)) then
  572. begin
  573. hp := tai_regalloc.alloc(reg,nil);
  574. insertllItem(p1.previous,p1,hp);
  575. IncludeRegInUsedRegs(reg,initialusedregs);
  576. end;
  577. while assigned(p1) and
  578. (p1 <> p2) do
  579. begin
  580. if assigned(p1.optinfo) then
  581. internalerror(2014022301); // IncludeRegInUsedRegs(reg,ptaiprop(p1.optinfo)^.usedregs);
  582. p1 := tai(p1.next);
  583. repeat
  584. while assigned(p1) and
  585. (p1.typ in (SkipInstr-[ait_regalloc])) Do
  586. p1 := tai(p1.next);
  587. { remove all allocation/deallocation info about the register in between }
  588. if assigned(p1) and
  589. (p1.typ = ait_regalloc) then
  590. begin
  591. { same super register, different sub register? }
  592. if SuperRegistersEqual(reg,tai_regalloc(p1).reg) and (tai_regalloc(p1).reg<>reg) then
  593. begin
  594. if (getsubreg(tai_regalloc(p1).reg)>getsubreg(reg)) or (getsubreg(reg)=R_SUBH) then
  595. internalerror(2016101501);
  596. tai_regalloc(p1).reg:=reg;
  597. end;
  598. if tai_regalloc(p1).reg=reg then
  599. begin
  600. if not removedSomething then
  601. begin
  602. firstRemovedWasAlloc := tai_regalloc(p1).ratype=ra_alloc;
  603. removedSomething := true;
  604. end;
  605. lastRemovedWasDealloc := (tai_regalloc(p1).ratype=ra_dealloc);
  606. hp := tai(p1.Next);
  607. asml.Remove(p1);
  608. p1.free;
  609. p1 := hp;
  610. end
  611. else
  612. p1 := tai(p1.next);
  613. end;
  614. until not(assigned(p1)) or
  615. not(p1.typ in SkipInstr);
  616. end;
  617. if assigned(p1) then
  618. begin
  619. if firstRemovedWasAlloc then
  620. begin
  621. hp := tai_regalloc.Alloc(reg,nil);
  622. insertLLItem(start.previous,start,hp);
  623. end;
  624. if lastRemovedWasDealloc then
  625. begin
  626. hp := tai_regalloc.DeAlloc(reg,nil);
  627. insertLLItem(p1.previous,p1,hp);
  628. end;
  629. end;
  630. end;
  631. function TX86AsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
  632. var
  633. p: taicpu;
  634. begin
  635. if not assigned(hp) or
  636. (hp.typ <> ait_instruction) then
  637. begin
  638. Result := false;
  639. exit;
  640. end;
  641. p := taicpu(hp);
  642. if SuperRegistersEqual(reg,NR_DEFAULTFLAGS) then
  643. with insprop[p.opcode] do
  644. begin
  645. case getsubreg(reg) of
  646. R_SUBW,R_SUBD,R_SUBQ:
  647. Result:=
  648. RegLoadedWithNewValue(NR_CARRYFLAG,hp) and
  649. RegLoadedWithNewValue(NR_PARITYFLAG,hp) and
  650. RegLoadedWithNewValue(NR_AUXILIARYFLAG,hp) and
  651. RegLoadedWithNewValue(NR_ZEROFLAG,hp) and
  652. RegLoadedWithNewValue(NR_SIGNFLAG,hp) and
  653. RegLoadedWithNewValue(NR_OVERFLOWFLAG,hp);
  654. R_SUBFLAGCARRY:
  655. Result:=[Ch_W0CarryFlag,Ch_W1CarryFlag,Ch_WCarryFlag,Ch_WUCarryFlag,Ch_WFlags]*Ch<>[];
  656. R_SUBFLAGPARITY:
  657. Result:=[Ch_W0ParityFlag,Ch_W1ParityFlag,Ch_WParityFlag,Ch_WUParityFlag,Ch_WFlags]*Ch<>[];
  658. R_SUBFLAGAUXILIARY:
  659. Result:=[Ch_W0AuxiliaryFlag,Ch_W1AuxiliaryFlag,Ch_WAuxiliaryFlag,Ch_WUAuxiliaryFlag,Ch_WFlags]*Ch<>[];
  660. R_SUBFLAGZERO:
  661. Result:=[Ch_W0ZeroFlag,Ch_W1ZeroFlag,Ch_WZeroFlag,Ch_WUZeroFlag,Ch_WFlags]*Ch<>[];
  662. R_SUBFLAGSIGN:
  663. Result:=[Ch_W0SignFlag,Ch_W1SignFlag,Ch_WSignFlag,Ch_WUSignFlag,Ch_WFlags]*Ch<>[];
  664. R_SUBFLAGOVERFLOW:
  665. Result:=[Ch_W0OverflowFlag,Ch_W1OverflowFlag,Ch_WOverflowFlag,Ch_WUOverflowFlag,Ch_WFlags]*Ch<>[];
  666. R_SUBFLAGINTERRUPT:
  667. Result:=[Ch_W0IntFlag,Ch_W1IntFlag,Ch_WFlags]*Ch<>[];
  668. R_SUBFLAGDIRECTION:
  669. Result:=[Ch_W0DirFlag,Ch_W1DirFlag,Ch_WFlags]*Ch<>[];
  670. else
  671. internalerror(2017050501);
  672. end;
  673. exit;
  674. end;
  675. Result :=
  676. (((p.opcode = A_MOV) or
  677. (p.opcode = A_MOVZX) or
  678. (p.opcode = A_MOVSX) or
  679. (p.opcode = A_LEA) or
  680. (p.opcode = A_VMOVSS) or
  681. (p.opcode = A_VMOVSD) or
  682. (p.opcode = A_VMOVAPD) or
  683. (p.opcode = A_VMOVAPS) or
  684. (p.opcode = A_VMOVQ) or
  685. (p.opcode = A_MOVSS) or
  686. (p.opcode = A_MOVSD) or
  687. (p.opcode = A_MOVQ) or
  688. (p.opcode = A_MOVAPD) or
  689. (p.opcode = A_MOVAPS) or
  690. {$ifndef x86_64}
  691. (p.opcode = A_LDS) or
  692. (p.opcode = A_LES) or
  693. {$endif not x86_64}
  694. (p.opcode = A_LFS) or
  695. (p.opcode = A_LGS) or
  696. (p.opcode = A_LSS)) and
  697. (p.ops=2) and { A_MOVSD can have zero operands, so this check is needed }
  698. (p.oper[1]^.typ = top_reg) and
  699. (Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg)) and
  700. ((p.oper[0]^.typ = top_const) or
  701. ((p.oper[0]^.typ = top_reg) and
  702. not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  703. ((p.oper[0]^.typ = top_ref) and
  704. not RegInRef(reg,p.oper[0]^.ref^)))) or
  705. ((p.opcode = A_POP) and
  706. (Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg))) or
  707. ((p.opcode = A_IMUL) and
  708. (p.ops=3) and
  709. (Reg1WriteOverwritesReg2Entirely(p.oper[2]^.reg,reg)) and
  710. (((p.oper[1]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[1]^.reg,reg))) or
  711. ((p.oper[1]^.typ=top_ref) and not(RegInRef(reg,p.oper[1]^.ref^))))) or
  712. ((((p.opcode = A_IMUL) or
  713. (p.opcode = A_MUL)) and
  714. (p.ops=1)) and
  715. (((p.oper[0]^.typ=top_reg) and not(Reg1ReadDependsOnReg2(p.oper[0]^.reg,reg))) or
  716. ((p.oper[0]^.typ=top_ref) and not(RegInRef(reg,p.oper[0]^.ref^)))) and
  717. (((p.opsize=S_B) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  718. ((p.opsize=S_W) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  719. ((p.opsize=S_L) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg))
  720. {$ifdef x86_64}
  721. or ((p.opsize=S_Q) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg))
  722. {$endif x86_64}
  723. )) or
  724. ((p.opcode = A_CWD) and Reg1WriteOverwritesReg2Entirely(NR_DX,reg)) or
  725. ((p.opcode = A_CDQ) and Reg1WriteOverwritesReg2Entirely(NR_EDX,reg)) or
  726. {$ifdef x86_64}
  727. ((p.opcode = A_CQO) and Reg1WriteOverwritesReg2Entirely(NR_RDX,reg)) or
  728. {$endif x86_64}
  729. ((p.opcode = A_CBW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg) and not(Reg1ReadDependsOnReg2(NR_AL,reg))) or
  730. {$ifndef x86_64}
  731. ((p.opcode = A_LDS) and (reg=NR_DS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  732. ((p.opcode = A_LES) and (reg=NR_ES) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  733. {$endif not x86_64}
  734. ((p.opcode = A_LFS) and (reg=NR_FS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  735. ((p.opcode = A_LGS) and (reg=NR_GS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  736. ((p.opcode = A_LSS) and (reg=NR_SS) and not(RegInRef(reg,p.oper[0]^.ref^))) or
  737. {$ifndef x86_64}
  738. ((p.opcode = A_AAM) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  739. {$endif not x86_64}
  740. ((p.opcode = A_LAHF) and Reg1WriteOverwritesReg2Entirely(NR_AH,reg)) or
  741. ((p.opcode = A_LODSB) and Reg1WriteOverwritesReg2Entirely(NR_AL,reg)) or
  742. ((p.opcode = A_LODSW) and Reg1WriteOverwritesReg2Entirely(NR_AX,reg)) or
  743. ((p.opcode = A_LODSD) and Reg1WriteOverwritesReg2Entirely(NR_EAX,reg)) or
  744. {$ifdef x86_64}
  745. ((p.opcode = A_LODSQ) and Reg1WriteOverwritesReg2Entirely(NR_RAX,reg)) or
  746. {$endif x86_64}
  747. ((p.opcode = A_SETcc) and (p.oper[0]^.typ=top_reg) and Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  748. (((p.opcode = A_FSTSW) or
  749. (p.opcode = A_FNSTSW)) and
  750. (p.oper[0]^.typ=top_reg) and
  751. Reg1WriteOverwritesReg2Entirely(p.oper[0]^.reg,reg)) or
  752. (((p.opcode = A_XOR) or (p.opcode = A_SUB) or (p.opcode = A_SBB)) and
  753. (p.oper[0]^.typ=top_reg) and (p.oper[1]^.typ=top_reg) and
  754. (p.oper[0]^.reg=p.oper[1]^.reg) and
  755. Reg1WriteOverwritesReg2Entirely(p.oper[1]^.reg,reg));
  756. end;
  757. class function TX86AsmOptimizer.IsExitCode(p : tai) : boolean;
  758. var
  759. hp2,hp3 : tai;
  760. begin
  761. { some x86-64 issue a NOP before the real exit code }
  762. if MatchInstruction(p,A_NOP,[]) then
  763. GetNextInstruction(p,p);
  764. result:=assigned(p) and (p.typ=ait_instruction) and
  765. ((taicpu(p).opcode = A_RET) or
  766. ((taicpu(p).opcode=A_LEAVE) and
  767. GetNextInstruction(p,hp2) and
  768. MatchInstruction(hp2,A_RET,[S_NO])
  769. ) or
  770. ((((taicpu(p).opcode=A_MOV) and
  771. MatchOpType(taicpu(p),top_reg,top_reg) and
  772. (taicpu(p).oper[0]^.reg=current_procinfo.framepointer) and
  773. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)) or
  774. ((taicpu(p).opcode=A_LEA) and
  775. MatchOpType(taicpu(p),top_ref,top_reg) and
  776. (taicpu(p).oper[0]^.ref^.base=current_procinfo.framepointer) and
  777. (taicpu(p).oper[1]^.reg=NR_STACK_POINTER_REG)
  778. )
  779. ) and
  780. GetNextInstruction(p,hp2) and
  781. MatchInstruction(hp2,A_POP,[reg2opsize(current_procinfo.framepointer)]) and
  782. MatchOpType(taicpu(hp2),top_reg) and
  783. (taicpu(hp2).oper[0]^.reg=current_procinfo.framepointer) and
  784. GetNextInstruction(hp2,hp3) and
  785. MatchInstruction(hp3,A_RET,[S_NO])
  786. )
  787. );
  788. end;
  789. class function TX86AsmOptimizer.isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  790. begin
  791. isFoldableArithOp := False;
  792. case hp1.opcode of
  793. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  794. isFoldableArithOp :=
  795. ((taicpu(hp1).oper[0]^.typ = top_const) or
  796. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  797. (taicpu(hp1).oper[0]^.reg <> reg))) and
  798. (taicpu(hp1).oper[1]^.typ = top_reg) and
  799. (taicpu(hp1).oper[1]^.reg = reg);
  800. A_INC,A_DEC,A_NEG,A_NOT:
  801. isFoldableArithOp :=
  802. (taicpu(hp1).oper[0]^.typ = top_reg) and
  803. (taicpu(hp1).oper[0]^.reg = reg);
  804. end;
  805. end;
  806. procedure TX86AsmOptimizer.RemoveLastDeallocForFuncRes(p: tai);
  807. procedure DoRemoveLastDeallocForFuncRes( supreg: tsuperregister);
  808. var
  809. hp2: tai;
  810. begin
  811. hp2 := p;
  812. repeat
  813. hp2 := tai(hp2.previous);
  814. if assigned(hp2) and
  815. (hp2.typ = ait_regalloc) and
  816. (tai_regalloc(hp2).ratype=ra_dealloc) and
  817. (getregtype(tai_regalloc(hp2).reg) = R_INTREGISTER) and
  818. (getsupreg(tai_regalloc(hp2).reg) = supreg) then
  819. begin
  820. asml.remove(hp2);
  821. hp2.free;
  822. break;
  823. end;
  824. until not(assigned(hp2)) or regInInstruction(newreg(R_INTREGISTER,supreg,R_SUBWHOLE),hp2);
  825. end;
  826. begin
  827. case current_procinfo.procdef.returndef.typ of
  828. arraydef,recorddef,pointerdef,
  829. stringdef,enumdef,procdef,objectdef,errordef,
  830. filedef,setdef,procvardef,
  831. classrefdef,forwarddef:
  832. DoRemoveLastDeallocForFuncRes(RS_EAX);
  833. orddef:
  834. if current_procinfo.procdef.returndef.size <> 0 then
  835. begin
  836. DoRemoveLastDeallocForFuncRes(RS_EAX);
  837. { for int64/qword }
  838. if current_procinfo.procdef.returndef.size = 8 then
  839. DoRemoveLastDeallocForFuncRes(RS_EDX);
  840. end;
  841. end;
  842. end;
  843. function TX86AsmOptimizer.OptPass1VMOVAP(var p : tai) : boolean;
  844. var
  845. TmpUsedRegs : TAllUsedRegs;
  846. hp1,hp2 : tai;
  847. begin
  848. result:=false;
  849. if MatchOpType(taicpu(p),top_reg,top_reg) then
  850. begin
  851. { vmova* reg1,reg1
  852. =>
  853. <nop> }
  854. if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  855. begin
  856. GetNextInstruction(p,hp1);
  857. asml.Remove(p);
  858. p.Free;
  859. p:=hp1;
  860. result:=true;
  861. end
  862. else if GetNextInstruction(p,hp1) then
  863. begin
  864. if MatchInstruction(hp1,[taicpu(p).opcode],[S_NO]) and
  865. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  866. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  867. begin
  868. { vmova* reg1,reg2
  869. vmova* reg2,reg3
  870. dealloc reg2
  871. =>
  872. vmova* reg1,reg3 }
  873. CopyUsedRegs(TmpUsedRegs);
  874. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  875. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,TmpUsedRegs)) then
  876. begin
  877. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  878. asml.Remove(hp1);
  879. hp1.Free;
  880. result:=true;
  881. end
  882. { special case:
  883. vmova* reg1,reg2
  884. vmova* reg2,reg1
  885. =>
  886. vmova* reg1,reg2 }
  887. else if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
  888. begin
  889. asml.Remove(hp1);
  890. hp1.Free;
  891. result:=true;
  892. end
  893. end
  894. else if MatchInstruction(hp1,[A_VFMADD132PD,A_VFNMADD231SD,A_VFMADD231SD],[S_NO]) and
  895. { we mix single and double opperations here because we assume that the compiler
  896. generates vmovapd only after double operations and vmovaps only after single operations }
  897. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[2]^) and
  898. GetNextInstruction(hp1,hp2) and
  899. MatchInstruction(hp2,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  900. MatchOperand(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) then
  901. begin
  902. CopyUsedRegs(TmpUsedRegs);
  903. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  904. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  905. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs))
  906. then
  907. begin
  908. taicpu(hp1).loadoper(2,taicpu(p).oper[0]^);
  909. asml.Remove(p);
  910. p.Free;
  911. asml.Remove(hp2);
  912. hp2.Free;
  913. p:=hp1;
  914. end;
  915. end;
  916. end;
  917. end;
  918. end;
  919. function TX86AsmOptimizer.OptPass1VOP(const p : tai) : boolean;
  920. var
  921. TmpUsedRegs : TAllUsedRegs;
  922. hp1 : tai;
  923. begin
  924. result:=false;
  925. if GetNextInstruction(p,hp1) and
  926. { we mix single and double opperations here because we assume that the compiler
  927. generates vmovapd only after double operations and vmovaps only after single operations }
  928. MatchInstruction(hp1,A_VMOVAPD,A_VMOVAPS,[S_NO]) and
  929. MatchOperand(taicpu(p).oper[2]^,taicpu(hp1).oper[0]^) and
  930. (taicpu(hp1).oper[1]^.typ=top_reg) then
  931. begin
  932. CopyUsedRegs(TmpUsedRegs);
  933. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  934. if not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg,hp1,TmpUsedRegs)
  935. ) then
  936. begin
  937. taicpu(p).loadoper(2,taicpu(hp1).oper[1]^);
  938. asml.Remove(hp1);
  939. hp1.Free;
  940. result:=true;
  941. end;
  942. end;
  943. end;
  944. function TX86AsmOptimizer.OptPass1MOV(var p : tai) : boolean;
  945. var
  946. hp1, hp2: tai;
  947. TmpUsedRegs : TAllUsedRegs;
  948. GetNextIntruction_p : Boolean;
  949. begin
  950. Result:=false;
  951. GetNextIntruction_p:=GetNextInstruction(p, hp1);
  952. if GetNextIntruction_p and
  953. MatchInstruction(hp1,A_AND,[]) and
  954. (taicpu(p).oper[1]^.typ = top_reg) and
  955. MatchOpType(taicpu(hp1),top_const,top_reg) and
  956. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then
  957. case taicpu(p).opsize Of
  958. S_L:
  959. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  960. begin
  961. DebugMsg('PeepHole Optimization,MovAnd2Mov',p);
  962. asml.remove(hp1);
  963. hp1.free;
  964. Result:=true;
  965. exit;
  966. end;
  967. end
  968. else if GetNextIntruction_p and
  969. MatchInstruction(hp1,A_MOV,[]) and
  970. (taicpu(p).oper[1]^.typ = top_reg) and
  971. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  972. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  973. begin
  974. CopyUsedRegs(TmpUsedRegs);
  975. UpdateUsedRegs(TmpUsedRegs, tai(p.Next));
  976. { we have
  977. mov x, %treg
  978. mov %treg, y
  979. }
  980. if not(RegInOp(taicpu(p).oper[1]^.reg,taicpu(hp1).oper[1]^)) and
  981. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  982. { we've got
  983. mov x, %treg
  984. mov %treg, y
  985. with %treg is not used after }
  986. case taicpu(p).oper[0]^.typ Of
  987. top_reg:
  988. begin
  989. { change
  990. mov %reg, %treg
  991. mov %treg, y
  992. to
  993. mov %reg, y
  994. }
  995. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  996. DebugMsg('PeepHole Optimization,MovMov2Mov 2',p);
  997. asml.remove(hp1);
  998. hp1.free;
  999. ReleaseUsedRegs(TmpUsedRegs);
  1000. Exit;
  1001. end;
  1002. top_ref:
  1003. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  1004. begin
  1005. { change
  1006. mov mem, %treg
  1007. mov %treg, %reg
  1008. to
  1009. mov mem, %reg"
  1010. }
  1011. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  1012. DebugMsg('PeepHole Optimization,MovMov2Mov 3',p);
  1013. asml.remove(hp1);
  1014. hp1.free;
  1015. ReleaseUsedRegs(TmpUsedRegs);
  1016. Exit;
  1017. end;
  1018. end;
  1019. ReleaseUsedRegs(TmpUsedRegs);
  1020. end
  1021. else
  1022. { Change
  1023. mov %reg1, %reg2
  1024. xxx %reg2, ???
  1025. to
  1026. mov %reg1, %reg2
  1027. xxx %reg1, ???
  1028. to avoid a write/read penalty
  1029. }
  1030. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1031. GetNextInstruction(p,hp1) and
  1032. (tai(hp1).typ = ait_instruction) and
  1033. (taicpu(hp1).ops >= 1) and
  1034. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) then
  1035. { we have
  1036. mov %reg1, %reg2
  1037. XXX %reg2, ???
  1038. }
  1039. begin
  1040. if ((taicpu(hp1).opcode = A_OR) or
  1041. (taicpu(hp1).opcode = A_TEST)) and
  1042. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1043. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  1044. { we have
  1045. mov %reg1, %reg2
  1046. test/or %reg2, %reg2
  1047. }
  1048. begin
  1049. CopyUsedRegs(TmpUsedRegs);
  1050. { reg1 will be used after the first instruction,
  1051. so update the allocation info }
  1052. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1053. if GetNextInstruction(hp1, hp2) and
  1054. (hp2.typ = ait_instruction) and
  1055. taicpu(hp2).is_jmp and
  1056. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  1057. { change
  1058. mov %reg1, %reg2
  1059. test/or %reg2, %reg2
  1060. jxx
  1061. to
  1062. test %reg1, %reg1
  1063. jxx
  1064. }
  1065. begin
  1066. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1067. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1068. asml.remove(p);
  1069. p.free;
  1070. p := hp1;
  1071. ReleaseUsedRegs(TmpUsedRegs);
  1072. Exit;
  1073. end
  1074. else
  1075. { change
  1076. mov %reg1, %reg2
  1077. test/or %reg2, %reg2
  1078. to
  1079. mov %reg1, %reg2
  1080. test/or %reg1, %reg1
  1081. }
  1082. begin
  1083. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1084. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1085. end;
  1086. ReleaseUsedRegs(TmpUsedRegs);
  1087. end
  1088. end
  1089. else
  1090. { leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1091. x >= RetOffset) as it doesn't do anything (it writes either to a
  1092. parameter or to the temporary storage room for the function
  1093. result)
  1094. }
  1095. if GetNextIntruction_p and
  1096. (tai(hp1).typ = ait_instruction) then
  1097. begin
  1098. if IsExitCode(hp1) and
  1099. MatchOpType(taicpu(p),top_reg,top_ref) and
  1100. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1101. not(assigned(current_procinfo.procdef.funcretsym) and
  1102. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1103. (taicpu(p).oper[1]^.ref^.index = NR_NO) then
  1104. begin
  1105. asml.remove(p);
  1106. p.free;
  1107. p:=hp1;
  1108. DebugMsg('Peephole removed deadstore before leave/ret',p);
  1109. RemoveLastDeallocForFuncRes(p);
  1110. exit;
  1111. end
  1112. { change
  1113. mov reg1, mem1
  1114. test/cmp x, mem1
  1115. to
  1116. mov reg1, mem1
  1117. test/cmp x, reg1
  1118. }
  1119. else if MatchOpType(taicpu(p),top_reg,top_ref) and
  1120. MatchInstruction(hp1,A_CMP,A_TEST,[taicpu(p).opsize]) and
  1121. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1122. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1123. begin
  1124. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1125. DebugMsg('Peephole MovTestCmp2MovTestCmp 1',hp1);
  1126. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1127. end;
  1128. end;
  1129. { Next instruction is also a MOV ? }
  1130. if GetNextIntruction_p and
  1131. MatchInstruction(hp1,A_MOV,[taicpu(p).opsize]) then
  1132. begin
  1133. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1134. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1135. { mov reg1, mem1 or mov mem1, reg1
  1136. mov mem2, reg2 mov reg2, mem2}
  1137. begin
  1138. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1139. { mov reg1, mem1 or mov mem1, reg1
  1140. mov mem2, reg1 mov reg2, mem1}
  1141. begin
  1142. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1143. { Removes the second statement from
  1144. mov reg1, mem1/reg2
  1145. mov mem1/reg2, reg1 }
  1146. begin
  1147. if taicpu(p).oper[0]^.typ=top_reg then
  1148. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1149. DebugMsg('PeepHole Optimization,MovMov2Mov 1',p);
  1150. asml.remove(hp1);
  1151. hp1.free;
  1152. Result:=true;
  1153. exit;
  1154. end
  1155. else
  1156. begin
  1157. CopyUsedRegs(TmpUsedRegs);
  1158. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1159. if (taicpu(p).oper[1]^.typ = top_ref) and
  1160. { mov reg1, mem1
  1161. mov mem2, reg1 }
  1162. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1163. GetNextInstruction(hp1, hp2) and
  1164. MatchInstruction(hp2,A_CMP,[taicpu(p).opsize]) and
  1165. OpsEqual(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^) and
  1166. OpsEqual(taicpu(p).oper[0]^,taicpu(hp2).oper[1]^) and
  1167. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1168. { change to
  1169. mov reg1, mem1 mov reg1, mem1
  1170. mov mem2, reg1 cmp reg1, mem2
  1171. cmp mem1, reg1
  1172. }
  1173. begin
  1174. asml.remove(hp2);
  1175. hp2.free;
  1176. taicpu(hp1).opcode := A_CMP;
  1177. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1178. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1179. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1180. DebugMsg('Peephole MovMovCmp2MovCmp done',hp1);
  1181. end;
  1182. ReleaseUsedRegs(TmpUsedRegs);
  1183. end;
  1184. end
  1185. else if (taicpu(p).oper[1]^.typ=top_ref) and
  1186. OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1187. begin
  1188. AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
  1189. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1190. DebugMsg('PeepHole Optimization,MovMov2MovMov1',p);
  1191. end
  1192. else
  1193. begin
  1194. CopyUsedRegs(TmpUsedRegs);
  1195. if GetNextInstruction(hp1, hp2) and
  1196. MatchOpType(taicpu(p),top_ref,top_reg) and
  1197. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[0]^) and
  1198. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1199. MatchInstruction(hp2,A_MOV,[taicpu(p).opsize]) and
  1200. MatchOpType(taicpu(hp2),top_ref,top_reg) and
  1201. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1202. if not RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^) and
  1203. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1204. { mov mem1, %reg1
  1205. mov %reg1, mem2
  1206. mov mem2, reg2
  1207. to:
  1208. mov mem1, reg2
  1209. mov reg2, mem2}
  1210. begin
  1211. AllocRegBetween(taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1212. DebugMsg('PeepHole Optimization,MovMovMov2MovMov 1',p);
  1213. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1214. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1215. asml.remove(hp2);
  1216. hp2.free;
  1217. end
  1218. {$ifdef i386}
  1219. { this is enabled for i386 only, as the rules to create the reg sets below
  1220. are too complicated for x86-64, so this makes this code too error prone
  1221. on x86-64
  1222. }
  1223. else if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1224. not(RegInRef(taicpu(p).oper[1]^.reg,taicpu(p).oper[0]^.ref^)) and
  1225. not(RegInRef(taicpu(hp2).oper[1]^.reg,taicpu(hp2).oper[0]^.ref^)) then
  1226. { mov mem1, reg1 mov mem1, reg1
  1227. mov reg1, mem2 mov reg1, mem2
  1228. mov mem2, reg2 mov mem2, reg1
  1229. to: to:
  1230. mov mem1, reg1 mov mem1, reg1
  1231. mov mem1, reg2 mov reg1, mem2
  1232. mov reg1, mem2
  1233. or (if mem1 depends on reg1
  1234. and/or if mem2 depends on reg2)
  1235. to:
  1236. mov mem1, reg1
  1237. mov reg1, mem2
  1238. mov reg1, reg2
  1239. }
  1240. begin
  1241. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1242. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1243. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1244. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1245. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1246. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1247. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1248. AllocRegBetween(taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1249. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1250. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1251. AllocRegBetween(taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1252. end
  1253. else if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1254. begin
  1255. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1256. AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1257. end
  1258. else
  1259. begin
  1260. asml.remove(hp2);
  1261. hp2.free;
  1262. end
  1263. {$endif i386}
  1264. ;
  1265. ReleaseUsedRegs(TmpUsedRegs);
  1266. end;
  1267. end
  1268. (* { movl [mem1],reg1
  1269. movl [mem1],reg2
  1270. to
  1271. movl [mem1],reg1
  1272. movl reg1,reg2
  1273. }
  1274. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1275. (taicpu(p).oper[1]^.typ = top_reg) and
  1276. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1277. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1278. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1279. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1280. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1281. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1282. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1283. else*)
  1284. { movl const1,[mem1]
  1285. movl [mem1],reg1
  1286. to
  1287. movl const1,reg1
  1288. movl reg1,[mem1]
  1289. }
  1290. else if MatchOpType(Taicpu(p),top_const,top_ref) and
  1291. MatchOpType(Taicpu(hp1),top_ref,top_reg) and
  1292. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1293. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1294. not(RegInRef(taicpu(hp1).oper[1]^.reg,taicpu(hp1).oper[0]^.ref^)) then
  1295. begin
  1296. AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1297. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1298. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1299. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1300. taicpu(hp1).fileinfo := taicpu(p).fileinfo;
  1301. DebugMsg('PeepHole Optimization,MovMov2MovMov 1',p);
  1302. end
  1303. end
  1304. else if (taicpu(p).oper[1]^.typ = top_reg) and
  1305. GetNextIntruction_p and
  1306. (hp1.typ = ait_instruction) and
  1307. GetNextInstruction(hp1, hp2) and
  1308. MatchInstruction(hp2,A_MOV,[]) and
  1309. OpsEqual(taicpu(hp2).oper[1]^, taicpu(p).oper[0]^) and
  1310. (taicpu(hp2).oper[0]^.typ=top_reg) and
  1311. (SuperRegistersEqual(taicpu(hp2).oper[0]^.reg,taicpu(p).oper[1]^.reg)) and
  1312. (IsFoldableArithOp(taicpu(hp1), taicpu(p).oper[1]^.reg) or
  1313. ((taicpu(p).opsize=S_L) and (taicpu(hp1).opsize=S_Q) and
  1314. IsFoldableArithOp(taicpu(hp1), newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBQ)))
  1315. ) then
  1316. { change movsX/movzX reg/ref, reg2
  1317. add/sub/or/... reg3/$const, reg2
  1318. mov reg2 reg/ref
  1319. to add/sub/or/... reg3/$const, reg/ref }
  1320. begin
  1321. CopyUsedRegs(TmpUsedRegs);
  1322. UpdateUsedRegs(TmpUsedRegs, tai(p.next));
  1323. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1324. If not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp2,TmpUsedRegs)) then
  1325. begin
  1326. { by example:
  1327. movswl %si,%eax movswl %si,%eax p
  1328. decl %eax addl %edx,%eax hp1
  1329. movw %ax,%si movw %ax,%si hp2
  1330. ->
  1331. movswl %si,%eax movswl %si,%eax p
  1332. decw %eax addw %edx,%eax hp1
  1333. movw %ax,%si movw %ax,%si hp2
  1334. }
  1335. DebugMsg('Peephole Optimization MovOpMov2Op ('+
  1336. std_op2str[taicpu(p).opcode]+gas_opsize2str[taicpu(p).opsize]+' '+
  1337. std_op2str[taicpu(hp1).opcode]+gas_opsize2str[taicpu(hp1).opsize]+' '+
  1338. std_op2str[taicpu(hp2).opcode]+gas_opsize2str[taicpu(hp2).opsize],p);
  1339. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1340. {
  1341. ->
  1342. movswl %si,%eax movswl %si,%eax p
  1343. decw %si addw %dx,%si hp1
  1344. movw %ax,%si movw %ax,%si hp2
  1345. }
  1346. case taicpu(hp1).ops of
  1347. 1:
  1348. begin
  1349. taicpu(hp1).loadoper(0, taicpu(hp2).oper[1]^);
  1350. if taicpu(hp1).oper[0]^.typ=top_reg then
  1351. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1352. end;
  1353. 2:
  1354. begin
  1355. taicpu(hp1).loadoper(1, taicpu(hp2).oper[1]^);
  1356. if (taicpu(hp1).oper[0]^.typ=top_reg) and
  1357. (taicpu(hp1).opcode<>A_SHL) and
  1358. (taicpu(hp1).opcode<>A_SHR) and
  1359. (taicpu(hp1).opcode<>A_SAR) then
  1360. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1361. end;
  1362. else
  1363. internalerror(2008042701);
  1364. end;
  1365. {
  1366. ->
  1367. decw %si addw %dx,%si p
  1368. }
  1369. asml.remove(p);
  1370. asml.remove(hp2);
  1371. p.Free;
  1372. hp2.Free;
  1373. p := hp1;
  1374. end;
  1375. ReleaseUsedRegs(TmpUsedRegs);
  1376. end
  1377. else if GetNextIntruction_p and
  1378. MatchInstruction(hp1,A_BTS,A_BTR,[Taicpu(p).opsize]) and
  1379. GetNextInstruction(hp1, hp2) and
  1380. MatchInstruction(hp2,A_OR,[Taicpu(p).opsize]) and
  1381. MatchOperand(Taicpu(p).oper[0]^,0) and
  1382. (Taicpu(p).oper[1]^.typ = top_reg) and
  1383. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp1).oper[1]^) and
  1384. MatchOperand(Taicpu(p).oper[1]^,Taicpu(hp2).oper[1]^) then
  1385. { mov reg1,0
  1386. bts reg1,operand1 --> mov reg1,operand2
  1387. or reg1,operand2 bts reg1,operand1}
  1388. begin
  1389. Taicpu(hp2).opcode:=A_MOV;
  1390. asml.remove(hp1);
  1391. insertllitem(hp2,hp2.next,hp1);
  1392. asml.remove(p);
  1393. p.free;
  1394. p:=hp1;
  1395. end
  1396. else if GetNextIntruction_p and
  1397. MatchInstruction(hp1,A_LEA,[S_L]) and
  1398. MatchOpType(Taicpu(p),top_ref,top_reg) and
  1399. ((MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(hp1).oper[1]^.reg,Taicpu(p).oper[1]^.reg) and
  1400. (Taicpu(hp1).oper[0]^.ref^.base<>Taicpu(p).oper[1]^.reg)
  1401. ) or
  1402. (MatchReference(Taicpu(hp1).oper[0]^.ref^,Taicpu(p).oper[1]^.reg,Taicpu(hp1).oper[1]^.reg) and
  1403. (Taicpu(hp1).oper[0]^.ref^.index<>Taicpu(p).oper[1]^.reg)
  1404. )
  1405. ) then
  1406. { mov reg1,ref
  1407. lea reg2,[reg1,reg2]
  1408. to
  1409. add reg2,ref}
  1410. begin
  1411. CopyUsedRegs(TmpUsedRegs);
  1412. { reg1 may not be used afterwards }
  1413. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  1414. begin
  1415. Taicpu(hp1).opcode:=A_ADD;
  1416. Taicpu(hp1).oper[0]^.ref^:=Taicpu(p).oper[0]^.ref^;
  1417. DebugMsg('Peephole MovLea2Add done',hp1);
  1418. asml.remove(p);
  1419. p.free;
  1420. p:=hp1;
  1421. end;
  1422. ReleaseUsedRegs(TmpUsedRegs);
  1423. end;
  1424. end;
  1425. function TX86AsmOptimizer.OptPass2MOV(var p : tai) : boolean;
  1426. var
  1427. TmpUsedRegs : TAllUsedRegs;
  1428. hp1,hp2: tai;
  1429. begin
  1430. Result:=false;
  1431. if MatchOpType(taicpu(p),top_reg,top_reg) and
  1432. GetNextInstruction(p, hp1) and
  1433. MatchInstruction(hp1,A_MOV,A_MOVZX,A_MOVSX,[]) and
  1434. MatchOpType(taicpu(hp1),top_ref,top_reg) and
  1435. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg)
  1436. or
  1437. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)
  1438. ) and
  1439. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1440. { mov reg1, reg2
  1441. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1442. begin
  1443. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1444. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1445. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1446. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1447. asml.remove(p);
  1448. p.free;
  1449. p := hp1;
  1450. Result:=true;
  1451. exit;
  1452. end
  1453. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1454. GetNextInstruction(p,hp1) and
  1455. (hp1.typ = ait_instruction) and
  1456. { while the GetNextInstruction(hp1,hp2) call could be factored out,
  1457. doing it separately in both branches allows to do the cheap checks
  1458. with low probability earlier }
  1459. ((IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1460. GetNextInstruction(hp1,hp2) and
  1461. MatchInstruction(hp2,A_MOV,[])
  1462. ) or
  1463. ((taicpu(hp1).opcode=A_LEA) and
  1464. GetNextInstruction(hp1,hp2) and
  1465. MatchInstruction(hp2,A_MOV,[]) and
  1466. ((MatchReference(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) and
  1467. (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg)
  1468. ) or
  1469. (MatchReference(taicpu(hp1).oper[0]^.ref^,NR_INVALID,
  1470. taicpu(p).oper[1]^.reg) and
  1471. (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg)) or
  1472. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_NO)) or
  1473. (MatchReferenceWithOffset(taicpu(hp1).oper[0]^.ref^,NR_NO,taicpu(p).oper[1]^.reg))
  1474. ) and
  1475. ((MatchOperand(taicpu(p).oper[1]^,taicpu(hp2).oper[0]^)) or not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,UsedRegs)))
  1476. )
  1477. ) and
  1478. MatchOperand(taicpu(hp1).oper[taicpu(hp1).ops-1]^,taicpu(hp2).oper[0]^) and
  1479. (taicpu(hp2).oper[1]^.typ = top_ref) then
  1480. begin
  1481. CopyUsedRegs(TmpUsedRegs);
  1482. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  1483. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  1484. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2, TmpUsedRegs))) then
  1485. { change mov (ref), reg
  1486. add/sub/or/... reg2/$const, reg
  1487. mov reg, (ref)
  1488. # release reg
  1489. to add/sub/or/... reg2/$const, (ref) }
  1490. begin
  1491. case taicpu(hp1).opcode of
  1492. A_INC,A_DEC,A_NOT,A_NEG :
  1493. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1494. A_LEA :
  1495. begin
  1496. taicpu(hp1).opcode:=A_ADD;
  1497. if (taicpu(hp1).oper[0]^.ref^.index<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.index<>NR_NO) then
  1498. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.index)
  1499. else if (taicpu(hp1).oper[0]^.ref^.base<>taicpu(p).oper[1]^.reg) and (taicpu(hp1).oper[0]^.ref^.base<>NR_NO) then
  1500. taicpu(hp1).loadreg(0,taicpu(hp1).oper[0]^.ref^.base)
  1501. else
  1502. taicpu(hp1).loadconst(0,taicpu(hp1).oper[0]^.ref^.offset);
  1503. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1504. DebugMsg('Peephole FoldLea done',hp1);
  1505. end
  1506. else
  1507. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  1508. end;
  1509. asml.remove(p);
  1510. asml.remove(hp2);
  1511. p.free;
  1512. hp2.free;
  1513. p := hp1
  1514. end;
  1515. ReleaseUsedRegs(TmpUsedRegs);
  1516. end;
  1517. end;
  1518. function TX86AsmOptimizer.OptPass2Imul(var p : tai) : boolean;
  1519. var
  1520. TmpUsedRegs : TAllUsedRegs;
  1521. hp1 : tai;
  1522. begin
  1523. Result:=false;
  1524. if (taicpu(p).ops >= 2) and
  1525. ((taicpu(p).oper[0]^.typ = top_const) or
  1526. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1527. (taicpu(p).oper[1]^.typ = top_reg) and
  1528. ((taicpu(p).ops = 2) or
  1529. ((taicpu(p).oper[2]^.typ = top_reg) and
  1530. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1531. GetLastInstruction(p,hp1) and
  1532. MatchInstruction(hp1,A_MOV,[]) and
  1533. MatchOpType(taicpu(hp1),top_reg,top_reg) and
  1534. ((taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) or
  1535. ((taicpu(hp1).opsize=S_L) and (taicpu(p).opsize=S_Q) and SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(p).oper[1]^.reg))) then
  1536. begin
  1537. CopyUsedRegs(TmpUsedRegs);
  1538. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,p,TmpUsedRegs)) then
  1539. { change
  1540. mov reg1,reg2
  1541. imul y,reg2 to imul y,reg1,reg2 }
  1542. begin
  1543. taicpu(p).ops := 3;
  1544. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1545. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1546. DebugMsg('Peephole MovImul2Imul done',p);
  1547. asml.remove(hp1);
  1548. hp1.free;
  1549. result:=true;
  1550. end;
  1551. ReleaseUsedRegs(TmpUsedRegs);
  1552. end;
  1553. end;
  1554. function TX86AsmOptimizer.OptPass2Jmp(var p : tai) : boolean;
  1555. var
  1556. hp1 : tai;
  1557. begin
  1558. {
  1559. change
  1560. jmp .L1
  1561. ...
  1562. .L1:
  1563. ret
  1564. into
  1565. ret
  1566. }
  1567. result:=false;
  1568. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) and (taicpu(p).oper[0]^.ref^.base=NR_NO) and
  1569. (taicpu(p).oper[0]^.ref^.index=NR_NO) then
  1570. begin
  1571. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1572. if (taicpu(p).condition=C_None) and assigned(hp1) and SkipLabels(hp1,hp1) and
  1573. MatchInstruction(hp1,A_RET,[S_NO]) then
  1574. begin
  1575. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1576. taicpu(p).opcode:=A_RET;
  1577. taicpu(p).is_jmp:=false;
  1578. taicpu(p).ops:=taicpu(hp1).ops;
  1579. case taicpu(hp1).ops of
  1580. 0:
  1581. taicpu(p).clearop(0);
  1582. 1:
  1583. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1584. else
  1585. internalerror(2016041301);
  1586. end;
  1587. result:=true;
  1588. end;
  1589. end;
  1590. end;
  1591. function CanBeCMOV(p : tai) : boolean;
  1592. begin
  1593. CanBeCMOV:=assigned(p) and
  1594. MatchInstruction(p,A_MOV,[S_W,S_L,S_Q]) and
  1595. { we can't use cmov ref,reg because
  1596. ref could be nil and cmov still throws an exception
  1597. if ref=nil but the mov isn't done (FK)
  1598. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1599. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1600. }
  1601. MatchOpType(taicpu(p),top_reg,top_reg);
  1602. end;
  1603. function TX86AsmOptimizer.OptPass2Jcc(var p : tai) : boolean;
  1604. var
  1605. hp1,hp2,hp3: tai;
  1606. carryadd_opcode : TAsmOp;
  1607. l : Longint;
  1608. condition : TAsmCond;
  1609. begin
  1610. { jb @@1 cmc
  1611. inc/dec operand --> adc/sbb operand,0
  1612. @@1:
  1613. ... and ...
  1614. jnb @@1
  1615. inc/dec operand --> adc/sbb operand,0
  1616. @@1: }
  1617. result:=false;
  1618. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1619. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1620. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1621. begin
  1622. carryadd_opcode:=A_NONE;
  1623. if Taicpu(p).condition in [C_NAE,C_B] then
  1624. begin
  1625. if Taicpu(hp1).opcode=A_INC then
  1626. carryadd_opcode:=A_ADC;
  1627. if Taicpu(hp1).opcode=A_DEC then
  1628. carryadd_opcode:=A_SBB;
  1629. if carryadd_opcode<>A_NONE then
  1630. begin
  1631. Taicpu(p).clearop(0);
  1632. Taicpu(p).ops:=0;
  1633. Taicpu(p).is_jmp:=false;
  1634. Taicpu(p).opcode:=A_CMC;
  1635. Taicpu(p).condition:=C_NONE;
  1636. Taicpu(hp1).ops:=2;
  1637. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1638. Taicpu(hp1).loadconst(0,0);
  1639. Taicpu(hp1).opcode:=carryadd_opcode;
  1640. result:=true;
  1641. exit;
  1642. end;
  1643. end;
  1644. if Taicpu(p).condition in [C_AE,C_NB] then
  1645. begin
  1646. if Taicpu(hp1).opcode=A_INC then
  1647. carryadd_opcode:=A_ADC;
  1648. if Taicpu(hp1).opcode=A_DEC then
  1649. carryadd_opcode:=A_SBB;
  1650. if carryadd_opcode<>A_NONE then
  1651. begin
  1652. asml.remove(p);
  1653. p.free;
  1654. Taicpu(hp1).ops:=2;
  1655. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1656. Taicpu(hp1).loadconst(0,0);
  1657. Taicpu(hp1).opcode:=carryadd_opcode;
  1658. p:=hp1;
  1659. result:=true;
  1660. exit;
  1661. end;
  1662. end;
  1663. end;
  1664. {$ifndef i8086}
  1665. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1666. begin
  1667. { check for
  1668. jCC xxx
  1669. <several movs>
  1670. xxx:
  1671. }
  1672. l:=0;
  1673. GetNextInstruction(p, hp1);
  1674. while assigned(hp1) and
  1675. CanBeCMOV(hp1) and
  1676. { stop on labels }
  1677. not(hp1.typ=ait_label) do
  1678. begin
  1679. inc(l);
  1680. GetNextInstruction(hp1,hp1);
  1681. end;
  1682. if assigned(hp1) then
  1683. begin
  1684. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1685. begin
  1686. if (l<=4) and (l>0) then
  1687. begin
  1688. condition:=inverse_cond(taicpu(p).condition);
  1689. hp2:=p;
  1690. GetNextInstruction(p,hp1);
  1691. p:=hp1;
  1692. repeat
  1693. taicpu(hp1).opcode:=A_CMOVcc;
  1694. taicpu(hp1).condition:=condition;
  1695. GetNextInstruction(hp1,hp1);
  1696. until not(assigned(hp1)) or
  1697. not(CanBeCMOV(hp1));
  1698. { wait with removing else GetNextInstruction could
  1699. ignore the label if it was the only usage in the
  1700. jump moved away }
  1701. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1702. { if the label refs. reach zero, remove any alignment before the label }
  1703. if (hp1.typ=ait_align) and (tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).getrefs=0) then
  1704. begin
  1705. asml.Remove(hp1);
  1706. hp1.Free;
  1707. end;
  1708. asml.remove(hp2);
  1709. hp2.free;
  1710. result:=true;
  1711. exit;
  1712. end;
  1713. end
  1714. else
  1715. begin
  1716. { check further for
  1717. jCC xxx
  1718. <several movs 1>
  1719. jmp yyy
  1720. xxx:
  1721. <several movs 2>
  1722. yyy:
  1723. }
  1724. { hp2 points to jmp yyy }
  1725. hp2:=hp1;
  1726. { skip hp1 to xxx }
  1727. GetNextInstruction(hp1, hp1);
  1728. if assigned(hp2) and
  1729. assigned(hp1) and
  1730. (l<=3) and
  1731. (hp2.typ=ait_instruction) and
  1732. (taicpu(hp2).is_jmp) and
  1733. (taicpu(hp2).condition=C_None) and
  1734. { real label and jump, no further references to the
  1735. label are allowed }
  1736. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1737. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1738. begin
  1739. l:=0;
  1740. { skip hp1 to <several moves 2> }
  1741. GetNextInstruction(hp1, hp1);
  1742. while assigned(hp1) and
  1743. CanBeCMOV(hp1) do
  1744. begin
  1745. inc(l);
  1746. GetNextInstruction(hp1, hp1);
  1747. end;
  1748. { hp1 points to yyy: }
  1749. if assigned(hp1) and
  1750. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1751. begin
  1752. condition:=inverse_cond(taicpu(p).condition);
  1753. GetNextInstruction(p,hp1);
  1754. hp3:=p;
  1755. p:=hp1;
  1756. repeat
  1757. taicpu(hp1).opcode:=A_CMOVcc;
  1758. taicpu(hp1).condition:=condition;
  1759. GetNextInstruction(hp1,hp1);
  1760. until not(assigned(hp1)) or
  1761. not(CanBeCMOV(hp1));
  1762. { hp2 is still at jmp yyy }
  1763. GetNextInstruction(hp2,hp1);
  1764. { hp2 is now at xxx: }
  1765. condition:=inverse_cond(condition);
  1766. GetNextInstruction(hp1,hp1);
  1767. { hp1 is now at <several movs 2> }
  1768. repeat
  1769. taicpu(hp1).opcode:=A_CMOVcc;
  1770. taicpu(hp1).condition:=condition;
  1771. GetNextInstruction(hp1,hp1);
  1772. until not(assigned(hp1)) or
  1773. not(CanBeCMOV(hp1));
  1774. {
  1775. asml.remove(hp1.next)
  1776. hp1.next.free;
  1777. asml.remove(hp1);
  1778. hp1.free;
  1779. }
  1780. { remove jCC }
  1781. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1782. asml.remove(hp3);
  1783. hp3.free;
  1784. { remove jmp }
  1785. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1786. asml.remove(hp2);
  1787. hp2.free;
  1788. result:=true;
  1789. exit;
  1790. end;
  1791. end;
  1792. end;
  1793. end;
  1794. end;
  1795. {$endif i8086}
  1796. end;
  1797. function TX86AsmOptimizer.OptPass1Movx(var p : tai) : boolean;
  1798. var
  1799. hp1,hp2: tai;
  1800. begin
  1801. result:=false;
  1802. if (taicpu(p).oper[1]^.typ = top_reg) and
  1803. GetNextInstruction(p,hp1) and
  1804. (hp1.typ = ait_instruction) and
  1805. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1806. GetNextInstruction(hp1,hp2) and
  1807. MatchInstruction(hp2,A_MOV,[]) and
  1808. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1809. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1810. {$ifdef i386}
  1811. { not all registers have byte size sub registers on i386 }
  1812. ((taicpu(hp2).opsize<>S_B) or (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX])) and
  1813. {$endif i386}
  1814. (((taicpu(hp1).ops=2) and
  1815. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1816. ((taicpu(hp1).ops=1) and
  1817. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1818. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1819. begin
  1820. { change movsX/movzX reg/ref, reg2
  1821. add/sub/or/... reg3/$const, reg2
  1822. mov reg2 reg/ref
  1823. to add/sub/or/... reg3/$const, reg/ref }
  1824. { by example:
  1825. movswl %si,%eax movswl %si,%eax p
  1826. decl %eax addl %edx,%eax hp1
  1827. movw %ax,%si movw %ax,%si hp2
  1828. ->
  1829. movswl %si,%eax movswl %si,%eax p
  1830. decw %eax addw %edx,%eax hp1
  1831. movw %ax,%si movw %ax,%si hp2
  1832. }
  1833. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1834. {
  1835. ->
  1836. movswl %si,%eax movswl %si,%eax p
  1837. decw %si addw %dx,%si hp1
  1838. movw %ax,%si movw %ax,%si hp2
  1839. }
  1840. case taicpu(hp1).ops of
  1841. 1:
  1842. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1843. 2:
  1844. begin
  1845. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1846. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1847. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1848. end;
  1849. else
  1850. internalerror(2008042701);
  1851. end;
  1852. {
  1853. ->
  1854. decw %si addw %dx,%si p
  1855. }
  1856. DebugMsg('PeepHole Optimization,var3',p);
  1857. asml.remove(p);
  1858. asml.remove(hp2);
  1859. p.free;
  1860. hp2.free;
  1861. p:=hp1;
  1862. end
  1863. { removes superfluous And's after movzx's }
  1864. else if taicpu(p).opcode=A_MOVZX then
  1865. begin
  1866. if (taicpu(p).oper[1]^.typ = top_reg) and
  1867. GetNextInstruction(p, hp1) and
  1868. (tai(hp1).typ = ait_instruction) and
  1869. (taicpu(hp1).opcode = A_AND) and
  1870. (taicpu(hp1).oper[0]^.typ = top_const) and
  1871. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1872. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1873. begin
  1874. case taicpu(p).opsize Of
  1875. S_BL, S_BW{$ifdef x86_64}, S_BQ{$endif x86_64}:
  1876. if (taicpu(hp1).oper[0]^.val = $ff) then
  1877. begin
  1878. DebugMsg('PeepHole Optimization,var4',p);
  1879. asml.remove(hp1);
  1880. hp1.free;
  1881. end;
  1882. S_WL{$ifdef x86_64}, S_WQ{$endif x86_64}:
  1883. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1884. begin
  1885. DebugMsg('PeepHole Optimization,var5',p);
  1886. asml.remove(hp1);
  1887. hp1.free;
  1888. end;
  1889. {$ifdef x86_64}
  1890. S_LQ:
  1891. if (taicpu(hp1).oper[0]^.val = $ffffffff) then
  1892. begin
  1893. if (cs_asm_source in current_settings.globalswitches) then
  1894. asml.insertbefore(tai_comment.create(strpnew('PeepHole Optimization,var6')),p);
  1895. asml.remove(hp1);
  1896. hp1.Free;
  1897. end;
  1898. {$endif x86_64}
  1899. end;
  1900. end;
  1901. { changes some movzx constructs to faster synonims (all examples
  1902. are given with eax/ax, but are also valid for other registers)}
  1903. if (taicpu(p).oper[1]^.typ = top_reg) then
  1904. if (taicpu(p).oper[0]^.typ = top_reg) then
  1905. case taicpu(p).opsize of
  1906. S_BW:
  1907. begin
  1908. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1909. not(cs_opt_size in current_settings.optimizerswitches) then
  1910. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1911. begin
  1912. taicpu(p).opcode := A_AND;
  1913. taicpu(p).changeopsize(S_W);
  1914. taicpu(p).loadConst(0,$ff);
  1915. DebugMsg('PeepHole Optimization,var7',p);
  1916. end
  1917. else if GetNextInstruction(p, hp1) and
  1918. (tai(hp1).typ = ait_instruction) and
  1919. (taicpu(hp1).opcode = A_AND) and
  1920. (taicpu(hp1).oper[0]^.typ = top_const) and
  1921. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1922. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1923. { Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1924. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1925. begin
  1926. DebugMsg('PeepHole Optimization,var8',p);
  1927. taicpu(p).opcode := A_MOV;
  1928. taicpu(p).changeopsize(S_W);
  1929. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1930. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1931. end;
  1932. end;
  1933. S_BL:
  1934. begin
  1935. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1936. not(cs_opt_size in current_settings.optimizerswitches) then
  1937. { Change "movzbl %al, %eax" to "andl $0x0ffh, %eax" }
  1938. begin
  1939. taicpu(p).opcode := A_AND;
  1940. taicpu(p).changeopsize(S_L);
  1941. taicpu(p).loadConst(0,$ff)
  1942. end
  1943. else if GetNextInstruction(p, hp1) and
  1944. (tai(hp1).typ = ait_instruction) and
  1945. (taicpu(hp1).opcode = A_AND) and
  1946. (taicpu(hp1).oper[0]^.typ = top_const) and
  1947. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1948. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1949. { Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1950. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1951. begin
  1952. DebugMsg('PeepHole Optimization,var10',p);
  1953. taicpu(p).opcode := A_MOV;
  1954. taicpu(p).changeopsize(S_L);
  1955. { do not use R_SUBWHOLE
  1956. as movl %rdx,%eax
  1957. is invalid in assembler PM }
  1958. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  1959. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1960. end
  1961. end;
  1962. {$ifndef i8086}
  1963. S_WL:
  1964. begin
  1965. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1966. not(cs_opt_size in current_settings.optimizerswitches) then
  1967. { Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax" }
  1968. begin
  1969. DebugMsg('PeepHole Optimization,var11',p);
  1970. taicpu(p).opcode := A_AND;
  1971. taicpu(p).changeopsize(S_L);
  1972. taicpu(p).loadConst(0,$ffff);
  1973. end
  1974. else if GetNextInstruction(p, hp1) and
  1975. (tai(hp1).typ = ait_instruction) and
  1976. (taicpu(hp1).opcode = A_AND) and
  1977. (taicpu(hp1).oper[0]^.typ = top_const) and
  1978. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1979. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1980. { Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1981. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1982. begin
  1983. DebugMsg('PeepHole Optimization,var12',p);
  1984. taicpu(p).opcode := A_MOV;
  1985. taicpu(p).changeopsize(S_L);
  1986. { do not use R_SUBWHOLE
  1987. as movl %rdx,%eax
  1988. is invalid in assembler PM }
  1989. setsubreg(taicpu(p).oper[0]^.reg, R_SUBD);
  1990. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1991. end;
  1992. end;
  1993. {$endif i8086}
  1994. end
  1995. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1996. begin
  1997. if GetNextInstruction(p, hp1) and
  1998. (tai(hp1).typ = ait_instruction) and
  1999. (taicpu(hp1).opcode = A_AND) and
  2000. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2001. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  2002. begin
  2003. taicpu(p).opcode := A_MOV;
  2004. case taicpu(p).opsize Of
  2005. S_BL:
  2006. begin
  2007. DebugMsg('PeepHole Optimization,var13',p);
  2008. taicpu(p).changeopsize(S_L);
  2009. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2010. end;
  2011. S_WL:
  2012. begin
  2013. DebugMsg('PeepHole Optimization,var14',p);
  2014. taicpu(p).changeopsize(S_L);
  2015. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  2016. end;
  2017. S_BW:
  2018. begin
  2019. DebugMsg('PeepHole Optimization,var15',p);
  2020. taicpu(p).changeopsize(S_W);
  2021. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  2022. end;
  2023. {$ifdef x86_64}
  2024. S_BQ:
  2025. begin
  2026. DebugMsg('PeepHole Optimization,var16',p);
  2027. taicpu(p).changeopsize(S_Q);
  2028. taicpu(hp1).loadConst(
  2029. 0, taicpu(hp1).oper[0]^.val and $ff);
  2030. end;
  2031. S_WQ:
  2032. begin
  2033. DebugMsg('PeepHole Optimization,var17',p);
  2034. taicpu(p).changeopsize(S_Q);
  2035. taicpu(hp1).loadConst(0, taicpu(hp1).oper[0]^.val and $ffff);
  2036. end;
  2037. S_LQ:
  2038. begin
  2039. DebugMsg('PeepHole Optimization,var18',p);
  2040. taicpu(p).changeopsize(S_Q);
  2041. taicpu(hp1).loadConst(
  2042. 0, taicpu(hp1).oper[0]^.val and $ffffffff);
  2043. end;
  2044. {$endif x86_64}
  2045. else
  2046. Internalerror(2017050704)
  2047. end;
  2048. end;
  2049. end;
  2050. end;
  2051. end;
  2052. function TX86AsmOptimizer.OptPass1AND(var p : tai) : boolean;
  2053. var
  2054. hp1 : tai;
  2055. begin
  2056. Result:=false;
  2057. if not(GetNextInstruction(p, hp1)) then
  2058. exit;
  2059. if MatchOpType(taicpu(p),top_const,top_reg) and
  2060. MatchInstruction(hp1,A_AND,[]) and
  2061. MatchOpType(taicpu(hp1),top_const,top_reg) and
  2062. (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2063. { the second register must contain the first one, so compare their subreg types }
  2064. (getsubreg(taicpu(p).oper[1]^.reg)<=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2065. (abs(taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val)<$80000000) then
  2066. { change
  2067. and const1, reg
  2068. and const2, reg
  2069. to
  2070. and (const1 and const2), reg
  2071. }
  2072. begin
  2073. taicpu(hp1).loadConst(0, taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  2074. DebugMsg('Peephole AndAnd2And done',hp1);
  2075. asml.remove(p);
  2076. p.Free;
  2077. p:=hp1;
  2078. Result:=true;
  2079. exit;
  2080. end
  2081. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2082. MatchInstruction(hp1,A_MOVZX,[]) and
  2083. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2084. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2085. (getsubreg(taicpu(hp1).oper[0]^.reg)=getsubreg(taicpu(hp1).oper[1]^.reg)) and
  2086. (((taicpu(p).opsize=S_W) and
  2087. (taicpu(hp1).opsize=S_BW)) or
  2088. ((taicpu(p).opsize=S_L) and
  2089. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2090. {$ifdef x86_64}
  2091. or
  2092. ((taicpu(p).opsize=S_Q) and
  2093. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2094. {$endif x86_64}
  2095. ) then
  2096. begin
  2097. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2098. ((taicpu(p).oper[0]^.val and $ff)=taicpu(p).oper[0]^.val)
  2099. ) or
  2100. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2101. ((taicpu(p).oper[0]^.val and $ffff)=taicpu(p).oper[0]^.val))
  2102. {$ifdef x86_64}
  2103. or
  2104. (((taicpu(hp1).opsize)=S_LQ) and
  2105. ((taicpu(p).oper[0]^.val and $ffffffff)=taicpu(p).oper[0]^.val)
  2106. )
  2107. {$endif x86_64}
  2108. then
  2109. begin
  2110. DebugMsg('Peephole AndMovzToAnd done',p);
  2111. asml.remove(hp1);
  2112. hp1.free;
  2113. end;
  2114. end
  2115. else if MatchOpType(taicpu(p),top_const,top_reg) and
  2116. MatchInstruction(hp1,A_MOVSX{$ifdef x86_64},A_MOVSXD{$endif x86_64},[]) and
  2117. (taicpu(hp1).oper[0]^.typ = top_reg) and
  2118. MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) and
  2119. (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg)) and
  2120. (((taicpu(p).opsize=S_W) and
  2121. (taicpu(hp1).opsize=S_BW)) or
  2122. ((taicpu(p).opsize=S_L) and
  2123. (taicpu(hp1).opsize in [S_WL,S_BL]))
  2124. {$ifdef x86_64}
  2125. or
  2126. ((taicpu(p).opsize=S_Q) and
  2127. (taicpu(hp1).opsize in [S_BQ,S_WQ,S_LQ]))
  2128. {$endif x86_64}
  2129. ) then
  2130. begin
  2131. if (((taicpu(hp1).opsize) in [S_BW,S_BL{$ifdef x86_64},S_BQ{$endif x86_64}]) and
  2132. ((taicpu(p).oper[0]^.val and $7f)=taicpu(p).oper[0]^.val)
  2133. ) or
  2134. (((taicpu(hp1).opsize) in [S_WL{$ifdef x86_64},S_WQ{$endif x86_64}]) and
  2135. ((taicpu(p).oper[0]^.val and $7fff)=taicpu(p).oper[0]^.val))
  2136. {$ifdef x86_64}
  2137. or
  2138. (((taicpu(hp1).opsize)=S_LQ) and
  2139. ((taicpu(p).oper[0]^.val and $7fffffff)=taicpu(p).oper[0]^.val)
  2140. )
  2141. {$endif x86_64}
  2142. then
  2143. begin
  2144. DebugMsg('PeepHole Optimization,AndMovsxToAnd',p);
  2145. asml.remove(hp1);
  2146. hp1.free;
  2147. end;
  2148. end
  2149. else if (taicpu(p).oper[1]^.typ = top_reg) and
  2150. (hp1.typ = ait_instruction) and
  2151. (taicpu(hp1).is_jmp) and
  2152. (taicpu(hp1).opcode<>A_JMP) and
  2153. not(RegInUsedRegs(taicpu(p).oper[1]^.reg,UsedRegs)) then
  2154. { change
  2155. and x, reg
  2156. jxx
  2157. to
  2158. test x, reg
  2159. jxx
  2160. if reg is deallocated before the
  2161. jump, but only if it's a conditional jump (PFV)
  2162. }
  2163. taicpu(p).opcode := A_TEST;
  2164. end;
  2165. procedure TX86AsmOptimizer.PostPeepholeOptMov(const p : tai);
  2166. begin
  2167. if MatchOperand(taicpu(p).oper[0]^,0) and
  2168. (taicpu(p).oper[1]^.typ = Top_Reg) and
  2169. not(RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
  2170. { change "mov $0, %reg" into "xor %reg, %reg" }
  2171. begin
  2172. taicpu(p).opcode := A_XOR;
  2173. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2174. end;
  2175. end;
  2176. end.