aoptcpu.pas 104 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aoptobj, aoptcpub, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. function RegReadByInstruction(reg : TRegister; hp : tai) : boolean;
  34. function InstructionLoadsFromReg(const reg : TRegister;const hp : tai) : boolean;override;
  35. end;
  36. Var
  37. AsmOptimizer : TCpuAsmOptimizer;
  38. Implementation
  39. uses
  40. verbose,globtype,globals,
  41. cutils,
  42. aoptbase,
  43. cpuinfo,
  44. aasmcpu,
  45. procinfo,
  46. cgutils,cgx86,
  47. { units we should get rid off: }
  48. symsym,symconst;
  49. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  50. { returns true if a "continue" should be done after this optimization }
  51. var hp1, hp2: tai;
  52. begin
  53. DoFpuLoadStoreOpt := false;
  54. if (taicpu(p).oper[0]^.typ = top_ref) and
  55. getNextInstruction(p, hp1) and
  56. (hp1.typ = ait_instruction) and
  57. (((taicpu(hp1).opcode = A_FLD) and
  58. (taicpu(p).opcode = A_FSTP)) or
  59. ((taicpu(p).opcode = A_FISTP) and
  60. (taicpu(hp1).opcode = A_FILD))) and
  61. (taicpu(hp1).oper[0]^.typ = top_ref) and
  62. (taicpu(hp1).opsize = taicpu(p).opsize) and
  63. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  64. begin
  65. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  66. if (taicpu(p).opsize=S_FX) and
  67. getNextInstruction(hp1, hp2) and
  68. (hp2.typ = ait_instruction) and
  69. IsExitCode(hp2) and
  70. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  71. not(assigned(current_procinfo.procdef.funcretsym) and
  72. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  73. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  74. begin
  75. asml.remove(p);
  76. asml.remove(hp1);
  77. p.free;
  78. hp1.free;
  79. p := hp2;
  80. removeLastDeallocForFuncRes(p);
  81. doFPULoadStoreOpt := true;
  82. end
  83. (* can't be done because the store operation rounds
  84. else
  85. { fst can't store an extended value! }
  86. if (taicpu(p).opsize <> S_FX) and
  87. (taicpu(p).opsize <> S_IQ) then
  88. begin
  89. if (taicpu(p).opcode = A_FSTP) then
  90. taicpu(p).opcode := A_FST
  91. else taicpu(p).opcode := A_FIST;
  92. asml.remove(hp1);
  93. hp1.free;
  94. end
  95. *)
  96. end;
  97. end;
  98. { converts a TChange variable to a TRegister }
  99. function tch2reg(ch: tinschange): tsuperregister;
  100. const
  101. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  102. begin
  103. if (ch <= CH_REDI) then
  104. tch2reg := ch2reg[ch]
  105. else if (ch <= CH_WEDI) then
  106. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  107. else if (ch <= CH_RWEDI) then
  108. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  109. else if (ch <= CH_MEDI) then
  110. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  111. else
  112. InternalError(2016041901)
  113. end;
  114. { Checks if the register is a 32 bit general purpose register }
  115. function isgp32reg(reg: TRegister): boolean;
  116. begin
  117. {$push}{$warnings off}
  118. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  119. {$pop}
  120. end;
  121. function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister;const hp: tai): boolean;
  122. begin
  123. Result:=RegReadByInstruction(reg,hp);
  124. end;
  125. function TCpuAsmOptimizer.RegReadByInstruction(reg: TRegister; hp: tai): boolean;
  126. var
  127. p: taicpu;
  128. opcount: longint;
  129. begin
  130. RegReadByInstruction := false;
  131. if hp.typ <> ait_instruction then
  132. exit;
  133. p := taicpu(hp);
  134. case p.opcode of
  135. A_CALL:
  136. regreadbyinstruction := true;
  137. A_IMUL:
  138. case p.ops of
  139. 1:
  140. regReadByInstruction :=
  141. (reg = NR_EAX) or RegInOp(reg,p.oper[0]^);
  142. 2,3:
  143. regReadByInstruction :=
  144. reginop(reg,p.oper[0]^) or
  145. reginop(reg,p.oper[1]^);
  146. end;
  147. A_IDIV,A_DIV,A_MUL:
  148. begin
  149. regReadByInstruction :=
  150. RegInOp(reg,p.oper[0]^) or (getsupreg(reg) in [RS_EAX,RS_EDX]);
  151. end;
  152. else
  153. begin
  154. for opcount := 0 to p.ops-1 do
  155. if (p.oper[opCount]^.typ = top_ref) and
  156. RegInRef(reg,p.oper[opcount]^.ref^) then
  157. begin
  158. RegReadByInstruction := true;
  159. exit
  160. end;
  161. with insprop[p.opcode] do
  162. begin
  163. case getsupreg(reg) of
  164. RS_EAX:
  165. if [Ch_REAX,Ch_RWEAX,Ch_MEAX]*Ch<>[] then
  166. begin
  167. RegReadByInstruction := true;
  168. exit
  169. end;
  170. RS_ECX:
  171. if [Ch_RECX,Ch_RWECX,Ch_MECX]*Ch<>[] then
  172. begin
  173. RegReadByInstruction := true;
  174. exit
  175. end;
  176. RS_EDX:
  177. if [Ch_REDX,Ch_RWEDX,Ch_MEDX]*Ch<>[] then
  178. begin
  179. RegReadByInstruction := true;
  180. exit
  181. end;
  182. RS_EBX:
  183. if [Ch_REBX,Ch_RWEBX,Ch_MEBX]*Ch<>[] then
  184. begin
  185. RegReadByInstruction := true;
  186. exit
  187. end;
  188. RS_ESP:
  189. if [Ch_RESP,Ch_RWESP,Ch_MESP]*Ch<>[] then
  190. begin
  191. RegReadByInstruction := true;
  192. exit
  193. end;
  194. RS_EBP:
  195. if [Ch_REBP,Ch_RWEBP,Ch_MEBP]*Ch<>[] then
  196. begin
  197. RegReadByInstruction := true;
  198. exit
  199. end;
  200. RS_ESI:
  201. if [Ch_RESI,Ch_RWESI,Ch_MESI]*Ch<>[] then
  202. begin
  203. RegReadByInstruction := true;
  204. exit
  205. end;
  206. RS_EDI:
  207. if [Ch_REDI,Ch_RWEDI,Ch_MEDI]*Ch<>[] then
  208. begin
  209. RegReadByInstruction := true;
  210. exit
  211. end;
  212. end;
  213. if ([CH_RWOP1,CH_ROP1,CH_MOP1]*Ch<>[]) and reginop(reg,p.oper[0]^) then
  214. begin
  215. RegReadByInstruction := true;
  216. exit
  217. end;
  218. if ([Ch_RWOP2,Ch_ROP2,Ch_MOP2]*Ch<>[]) and reginop(reg,p.oper[1]^) then
  219. begin
  220. RegReadByInstruction := true;
  221. exit
  222. end;
  223. if ([Ch_RWOP3,Ch_ROP3,Ch_MOP3]*Ch<>[]) and reginop(reg,p.oper[2]^) then
  224. begin
  225. RegReadByInstruction := true;
  226. exit
  227. end;
  228. if ([Ch_RFlags,Ch_RWFlags]*Ch<>[]) and (reg=NR_DEFAULTFLAGS) then
  229. begin
  230. RegReadByInstruction := true;
  231. exit
  232. end;
  233. end;
  234. end;
  235. end;
  236. end;
  237. { returns true if p contains a memory operand with a segment set }
  238. function InsContainsSegRef(p: taicpu): boolean;
  239. var
  240. i: longint;
  241. begin
  242. result:=true;
  243. for i:=0 to p.opercnt-1 do
  244. if (p.oper[i]^.typ=top_ref) and
  245. (p.oper[i]^.ref^.segment<>NR_NO) then
  246. exit;
  247. result:=false;
  248. end;
  249. function InstrReadsFlags(p: tai): boolean;
  250. var
  251. l: longint;
  252. begin
  253. InstrReadsFlags := true;
  254. case p.typ of
  255. ait_instruction:
  256. if InsProp[taicpu(p).opcode].Ch*[Ch_RFlags,Ch_RWFlags,Ch_All]<>[] then
  257. exit;
  258. ait_label:
  259. exit;
  260. end;
  261. InstrReadsFlags := false;
  262. end;
  263. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  264. var
  265. p,hp1: tai;
  266. l: aint;
  267. tmpRef: treference;
  268. begin
  269. p := BlockStart;
  270. while (p <> BlockEnd) Do
  271. begin
  272. case p.Typ Of
  273. Ait_Instruction:
  274. begin
  275. if InsContainsSegRef(taicpu(p)) then
  276. begin
  277. p := tai(p.next);
  278. continue;
  279. end;
  280. case taicpu(p).opcode Of
  281. A_IMUL:
  282. {changes certain "imul const, %reg"'s to lea sequences}
  283. begin
  284. if (taicpu(p).oper[0]^.typ = Top_Const) and
  285. (taicpu(p).oper[1]^.typ = Top_Reg) and
  286. (taicpu(p).opsize = S_L) then
  287. if (taicpu(p).oper[0]^.val = 1) then
  288. if (taicpu(p).ops = 2) then
  289. {remove "imul $1, reg"}
  290. begin
  291. hp1 := tai(p.Next);
  292. asml.remove(p);
  293. p.free;
  294. p := hp1;
  295. continue;
  296. end
  297. else
  298. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  299. begin
  300. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  301. InsertLLItem(p.previous, p.next, hp1);
  302. p.free;
  303. p := hp1;
  304. end
  305. else if
  306. ((taicpu(p).ops <= 2) or
  307. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  308. (taicpu(p).oper[0]^.val <= 12) and
  309. not(cs_opt_size in current_settings.optimizerswitches) and
  310. (not(GetNextInstruction(p, hp1)) or
  311. {GetNextInstruction(p, hp1) and}
  312. not((tai(hp1).typ = ait_instruction) and
  313. ((taicpu(hp1).opcode=A_Jcc) and
  314. (taicpu(hp1).condition in [C_O,C_NO])))) then
  315. begin
  316. reference_reset(tmpref,1,[]);
  317. case taicpu(p).oper[0]^.val Of
  318. 3: begin
  319. {imul 3, reg1, reg2 to
  320. lea (reg1,reg1,2), reg2
  321. imul 3, reg1 to
  322. lea (reg1,reg1,2), reg1}
  323. TmpRef.base := taicpu(p).oper[1]^.reg;
  324. TmpRef.index := taicpu(p).oper[1]^.reg;
  325. TmpRef.ScaleFactor := 2;
  326. if (taicpu(p).ops = 2) then
  327. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  328. else
  329. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  330. InsertLLItem(p.previous, p.next, hp1);
  331. p.free;
  332. p := hp1;
  333. end;
  334. 5: begin
  335. {imul 5, reg1, reg2 to
  336. lea (reg1,reg1,4), reg2
  337. imul 5, reg1 to
  338. lea (reg1,reg1,4), reg1}
  339. TmpRef.base := taicpu(p).oper[1]^.reg;
  340. TmpRef.index := taicpu(p).oper[1]^.reg;
  341. TmpRef.ScaleFactor := 4;
  342. if (taicpu(p).ops = 2) then
  343. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  344. else
  345. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  346. InsertLLItem(p.previous, p.next, hp1);
  347. p.free;
  348. p := hp1;
  349. end;
  350. 6: begin
  351. {imul 6, reg1, reg2 to
  352. lea (,reg1,2), reg2
  353. lea (reg2,reg1,4), reg2
  354. imul 6, reg1 to
  355. lea (reg1,reg1,2), reg1
  356. add reg1, reg1}
  357. if (current_settings.optimizecputype <= cpu_386) then
  358. begin
  359. TmpRef.index := taicpu(p).oper[1]^.reg;
  360. if (taicpu(p).ops = 3) then
  361. begin
  362. TmpRef.base := taicpu(p).oper[2]^.reg;
  363. TmpRef.ScaleFactor := 4;
  364. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  365. end
  366. else
  367. begin
  368. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  369. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  370. end;
  371. InsertLLItem(p, p.next, hp1);
  372. reference_reset(tmpref,2,[]);
  373. TmpRef.index := taicpu(p).oper[1]^.reg;
  374. TmpRef.ScaleFactor := 2;
  375. if (taicpu(p).ops = 3) then
  376. begin
  377. TmpRef.base := NR_NO;
  378. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  379. taicpu(p).oper[2]^.reg);
  380. end
  381. else
  382. begin
  383. TmpRef.base := taicpu(p).oper[1]^.reg;
  384. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  385. end;
  386. InsertLLItem(p.previous, p.next, hp1);
  387. p.free;
  388. p := tai(hp1.next);
  389. end
  390. end;
  391. 9: begin
  392. {imul 9, reg1, reg2 to
  393. lea (reg1,reg1,8), reg2
  394. imul 9, reg1 to
  395. lea (reg1,reg1,8), reg1}
  396. TmpRef.base := taicpu(p).oper[1]^.reg;
  397. TmpRef.index := taicpu(p).oper[1]^.reg;
  398. TmpRef.ScaleFactor := 8;
  399. if (taicpu(p).ops = 2) then
  400. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  401. else
  402. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  403. InsertLLItem(p.previous, p.next, hp1);
  404. p.free;
  405. p := hp1;
  406. end;
  407. 10: begin
  408. {imul 10, reg1, reg2 to
  409. lea (reg1,reg1,4), reg2
  410. add reg2, reg2
  411. imul 10, reg1 to
  412. lea (reg1,reg1,4), reg1
  413. add reg1, reg1}
  414. if (current_settings.optimizecputype <= cpu_386) then
  415. begin
  416. if (taicpu(p).ops = 3) then
  417. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  418. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  419. else
  420. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  421. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  422. InsertLLItem(p, p.next, hp1);
  423. TmpRef.base := taicpu(p).oper[1]^.reg;
  424. TmpRef.index := taicpu(p).oper[1]^.reg;
  425. TmpRef.ScaleFactor := 4;
  426. if (taicpu(p).ops = 3) then
  427. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  428. else
  429. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  430. InsertLLItem(p.previous, p.next, hp1);
  431. p.free;
  432. p := tai(hp1.next);
  433. end
  434. end;
  435. 12: begin
  436. {imul 12, reg1, reg2 to
  437. lea (,reg1,4), reg2
  438. lea (reg2,reg1,8), reg2
  439. imul 12, reg1 to
  440. lea (reg1,reg1,2), reg1
  441. lea (,reg1,4), reg1}
  442. if (current_settings.optimizecputype <= cpu_386)
  443. then
  444. begin
  445. TmpRef.index := taicpu(p).oper[1]^.reg;
  446. if (taicpu(p).ops = 3) then
  447. begin
  448. TmpRef.base := taicpu(p).oper[2]^.reg;
  449. TmpRef.ScaleFactor := 8;
  450. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  451. end
  452. else
  453. begin
  454. TmpRef.base := NR_NO;
  455. TmpRef.ScaleFactor := 4;
  456. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  457. end;
  458. InsertLLItem(p, p.next, hp1);
  459. reference_reset(tmpref,2,[]);
  460. TmpRef.index := taicpu(p).oper[1]^.reg;
  461. if (taicpu(p).ops = 3) then
  462. begin
  463. TmpRef.base := NR_NO;
  464. TmpRef.ScaleFactor := 4;
  465. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  466. end
  467. else
  468. begin
  469. TmpRef.base := taicpu(p).oper[1]^.reg;
  470. TmpRef.ScaleFactor := 2;
  471. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  472. end;
  473. InsertLLItem(p.previous, p.next, hp1);
  474. p.free;
  475. p := tai(hp1.next);
  476. end
  477. end
  478. end;
  479. end;
  480. end;
  481. A_SAR, A_SHR:
  482. {changes the code sequence
  483. shr/sar const1, x
  484. shl const2, x
  485. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  486. begin
  487. if GetNextInstruction(p, hp1) and
  488. (tai(hp1).typ = ait_instruction) and
  489. (taicpu(hp1).opcode = A_SHL) and
  490. (taicpu(p).oper[0]^.typ = top_const) and
  491. (taicpu(hp1).oper[0]^.typ = top_const) and
  492. (taicpu(hp1).opsize = taicpu(p).opsize) and
  493. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  494. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  495. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  496. not(cs_opt_size in current_settings.optimizerswitches) then
  497. { shr/sar const1, %reg
  498. shl const2, %reg
  499. with const1 > const2 }
  500. begin
  501. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  502. taicpu(hp1).opcode := A_AND;
  503. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  504. case taicpu(p).opsize Of
  505. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  506. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  507. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  508. end;
  509. end
  510. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  511. not(cs_opt_size in current_settings.optimizerswitches) then
  512. { shr/sar const1, %reg
  513. shl const2, %reg
  514. with const1 < const2 }
  515. begin
  516. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  517. taicpu(p).opcode := A_AND;
  518. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  519. case taicpu(p).opsize Of
  520. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  521. S_B: taicpu(p).loadConst(0,l Xor $ff);
  522. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  523. end;
  524. end
  525. else
  526. { shr/sar const1, %reg
  527. shl const2, %reg
  528. with const1 = const2 }
  529. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  530. begin
  531. taicpu(p).opcode := A_AND;
  532. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  533. case taicpu(p).opsize Of
  534. S_B: taicpu(p).loadConst(0,l Xor $ff);
  535. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  536. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  537. end;
  538. asml.remove(hp1);
  539. hp1.free;
  540. end;
  541. end;
  542. A_XOR:
  543. if (taicpu(p).oper[0]^.typ = top_reg) and
  544. (taicpu(p).oper[1]^.typ = top_reg) and
  545. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  546. { temporarily change this to 'mov reg,0' to make it easier }
  547. { for the CSE. Will be changed back in pass 2 }
  548. begin
  549. taicpu(p).opcode := A_MOV;
  550. taicpu(p).loadConst(0,0);
  551. end;
  552. end;
  553. end;
  554. end;
  555. p := tai(p.next)
  556. end;
  557. end;
  558. { skips all labels and returns the next "real" instruction }
  559. function SkipLabels(hp: tai; var hp2: tai): boolean;
  560. begin
  561. while assigned(hp.next) and
  562. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  563. hp := tai(hp.next);
  564. if assigned(hp.next) then
  565. begin
  566. SkipLabels := True;
  567. hp2 := tai(hp.next)
  568. end
  569. else
  570. begin
  571. hp2 := hp;
  572. SkipLabels := False
  573. end;
  574. end;
  575. { First pass of peephole optimizations }
  576. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  577. function WriteOk : Boolean;
  578. begin
  579. writeln('Ok');
  580. Result:=True;
  581. end;
  582. var
  583. l : longint;
  584. p,hp1,hp2 : tai;
  585. hp3,hp4: tai;
  586. v:aint;
  587. TmpRef: TReference;
  588. TmpBool1, TmpBool2: Boolean;
  589. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  590. {traces sucessive jumps to their final destination and sets it, e.g.
  591. je l1 je l3
  592. <code> <code>
  593. l1: becomes l1:
  594. je l2 je l3
  595. <code> <code>
  596. l2: l2:
  597. jmp l3 jmp l3
  598. the level parameter denotes how deeep we have already followed the jump,
  599. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  600. var p1, p2: tai;
  601. l: tasmlabel;
  602. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  603. begin
  604. FindAnyLabel := false;
  605. while assigned(hp.next) and
  606. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  607. hp := tai(hp.next);
  608. if assigned(hp.next) and
  609. (tai(hp.next).typ = ait_label) then
  610. begin
  611. FindAnyLabel := true;
  612. l := tai_label(hp.next).labsym;
  613. end
  614. end;
  615. begin
  616. GetfinalDestination := false;
  617. if level > 20 then
  618. exit;
  619. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  620. if assigned(p1) then
  621. begin
  622. SkipLabels(p1,p1);
  623. if (tai(p1).typ = ait_instruction) and
  624. (taicpu(p1).is_jmp) then
  625. if { the next instruction after the label where the jump hp arrives}
  626. { is unconditional or of the same type as hp, so continue }
  627. (taicpu(p1).condition in [C_None,hp.condition]) or
  628. { the next instruction after the label where the jump hp arrives}
  629. { is the opposite of hp (so this one is never taken), but after }
  630. { that one there is a branch that will be taken, so perform a }
  631. { little hack: set p1 equal to this instruction (that's what the}
  632. { last SkipLabels is for, only works with short bool evaluation)}
  633. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  634. SkipLabels(p1,p2) and
  635. (p2.typ = ait_instruction) and
  636. (taicpu(p2).is_jmp) and
  637. (taicpu(p2).condition in [C_None,hp.condition]) and
  638. SkipLabels(p1,p1)) then
  639. begin
  640. { quick check for loops of the form "l5: ; jmp l5 }
  641. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  642. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  643. exit;
  644. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  645. exit;
  646. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  647. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  648. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  649. end
  650. else
  651. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  652. if not FindAnyLabel(p1,l) then
  653. begin
  654. {$ifdef finaldestdebug}
  655. insertllitem(asml,p1,p1.next,tai_comment.Create(
  656. strpnew('previous label inserted'))));
  657. {$endif finaldestdebug}
  658. current_asmdata.getjumplabel(l);
  659. insertllitem(p1,p1.next,tai_label.Create(l));
  660. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  661. hp.oper[0]^.ref^.symbol := l;
  662. l.increfs;
  663. { this won't work, since the new label isn't in the labeltable }
  664. { so it will fail the rangecheck. Labeltable should become a }
  665. { hashtable to support this: }
  666. { GetFinalDestination(asml, hp); }
  667. end
  668. else
  669. begin
  670. {$ifdef finaldestdebug}
  671. insertllitem(asml,p1,p1.next,tai_comment.Create(
  672. strpnew('next label reused'))));
  673. {$endif finaldestdebug}
  674. l.increfs;
  675. hp.oper[0]^.ref^.symbol := l;
  676. if not GetFinalDestination(asml, hp,succ(level)) then
  677. exit;
  678. end;
  679. end;
  680. GetFinalDestination := true;
  681. end;
  682. function DoSubAddOpt(var p: tai): Boolean;
  683. begin
  684. DoSubAddOpt := False;
  685. if GetLastInstruction(p, hp1) and
  686. (hp1.typ = ait_instruction) and
  687. (taicpu(hp1).opsize = taicpu(p).opsize) then
  688. case taicpu(hp1).opcode Of
  689. A_DEC:
  690. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  691. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  692. begin
  693. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  694. asml.remove(hp1);
  695. hp1.free;
  696. end;
  697. A_SUB:
  698. if (taicpu(hp1).oper[0]^.typ = top_const) and
  699. (taicpu(hp1).oper[1]^.typ = top_reg) and
  700. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  701. begin
  702. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  703. asml.remove(hp1);
  704. hp1.free;
  705. end;
  706. A_ADD:
  707. if (taicpu(hp1).oper[0]^.typ = top_const) and
  708. (taicpu(hp1).oper[1]^.typ = top_reg) and
  709. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  710. begin
  711. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  712. asml.remove(hp1);
  713. hp1.free;
  714. if (taicpu(p).oper[0]^.val = 0) then
  715. begin
  716. hp1 := tai(p.next);
  717. asml.remove(p);
  718. p.free;
  719. if not GetLastInstruction(hp1, p) then
  720. p := hp1;
  721. DoSubAddOpt := True;
  722. end
  723. end;
  724. end;
  725. end;
  726. begin
  727. p := BlockStart;
  728. ClearUsedRegs;
  729. while (p <> BlockEnd) Do
  730. begin
  731. UpDateUsedRegs(UsedRegs, tai(p.next));
  732. case p.Typ Of
  733. ait_instruction:
  734. begin
  735. current_filepos:=taicpu(p).fileinfo;
  736. if InsContainsSegRef(taicpu(p)) then
  737. begin
  738. p := tai(p.next);
  739. continue;
  740. end;
  741. { Handle Jmp Optimizations }
  742. if taicpu(p).is_jmp then
  743. begin
  744. {the following if-block removes all code between a jmp and the next label,
  745. because it can never be executed}
  746. if (taicpu(p).opcode = A_JMP) then
  747. begin
  748. hp2:=p;
  749. while GetNextInstruction(hp2, hp1) and
  750. (hp1.typ <> ait_label) do
  751. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  752. begin
  753. { don't kill start/end of assembler block,
  754. no-line-info-start/end etc }
  755. if hp1.typ<>ait_marker then
  756. begin
  757. asml.remove(hp1);
  758. hp1.free;
  759. end
  760. else
  761. hp2:=hp1;
  762. end
  763. else break;
  764. end;
  765. { remove jumps to a label coming right after them }
  766. if GetNextInstruction(p, hp1) then
  767. begin
  768. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  769. { TODO: FIXME removing the first instruction fails}
  770. (p<>blockstart) then
  771. begin
  772. hp2:=tai(hp1.next);
  773. asml.remove(p);
  774. p.free;
  775. p:=hp2;
  776. continue;
  777. end
  778. else
  779. begin
  780. if hp1.typ = ait_label then
  781. SkipLabels(hp1,hp1);
  782. if (tai(hp1).typ=ait_instruction) and
  783. (taicpu(hp1).opcode=A_JMP) and
  784. GetNextInstruction(hp1, hp2) and
  785. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  786. begin
  787. if taicpu(p).opcode=A_Jcc then
  788. begin
  789. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  790. tai_label(hp2).labsym.decrefs;
  791. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  792. { when free'ing hp1, the ref. isn't decresed, so we don't
  793. increase it (FK)
  794. taicpu(p).oper[0]^.ref^.symbol.increfs;
  795. }
  796. asml.remove(hp1);
  797. hp1.free;
  798. GetFinalDestination(asml, taicpu(p),0);
  799. end
  800. else
  801. begin
  802. GetFinalDestination(asml, taicpu(p),0);
  803. p:=tai(p.next);
  804. continue;
  805. end;
  806. end
  807. else
  808. GetFinalDestination(asml, taicpu(p),0);
  809. end;
  810. end;
  811. end
  812. else
  813. { All other optimizes }
  814. begin
  815. for l := 0 to taicpu(p).ops-1 Do
  816. if (taicpu(p).oper[l]^.typ = top_ref) then
  817. With taicpu(p).oper[l]^.ref^ Do
  818. begin
  819. if (base = NR_NO) and
  820. (index <> NR_NO) and
  821. (scalefactor in [0,1]) then
  822. begin
  823. base := index;
  824. index := NR_NO
  825. end
  826. end;
  827. case taicpu(p).opcode Of
  828. A_AND:
  829. if OptPass1And(p) then
  830. continue;
  831. A_CMP:
  832. begin
  833. { cmp register,$8000 neg register
  834. je target --> jo target
  835. .... only if register is deallocated before jump.}
  836. case Taicpu(p).opsize of
  837. S_B: v:=$80;
  838. S_W: v:=$8000;
  839. S_L: v:=aint($80000000);
  840. else
  841. internalerror(2013112905);
  842. end;
  843. if (taicpu(p).oper[0]^.typ=Top_const) and
  844. (taicpu(p).oper[0]^.val=v) and
  845. (Taicpu(p).oper[1]^.typ=top_reg) and
  846. GetNextInstruction(p, hp1) and
  847. (hp1.typ=ait_instruction) and
  848. (taicpu(hp1).opcode=A_Jcc) and
  849. (Taicpu(hp1).condition in [C_E,C_NE]) and
  850. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  851. begin
  852. Taicpu(p).opcode:=A_NEG;
  853. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  854. Taicpu(p).clearop(1);
  855. Taicpu(p).ops:=1;
  856. if Taicpu(hp1).condition=C_E then
  857. Taicpu(hp1).condition:=C_O
  858. else
  859. Taicpu(hp1).condition:=C_NO;
  860. continue;
  861. end;
  862. {
  863. @@2: @@2:
  864. .... ....
  865. cmp operand1,0
  866. jle/jbe @@1
  867. dec operand1 --> sub operand1,1
  868. jmp @@2 jge/jae @@2
  869. @@1: @@1:
  870. ... ....}
  871. if (taicpu(p).oper[0]^.typ = top_const) and
  872. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  873. (taicpu(p).oper[0]^.val = 0) and
  874. GetNextInstruction(p, hp1) and
  875. (hp1.typ = ait_instruction) and
  876. (taicpu(hp1).is_jmp) and
  877. (taicpu(hp1).opcode=A_Jcc) and
  878. (taicpu(hp1).condition in [C_LE,C_BE]) and
  879. GetNextInstruction(hp1,hp2) and
  880. (hp2.typ = ait_instruction) and
  881. (taicpu(hp2).opcode = A_DEC) and
  882. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  883. GetNextInstruction(hp2, hp3) and
  884. (hp3.typ = ait_instruction) and
  885. (taicpu(hp3).is_jmp) and
  886. (taicpu(hp3).opcode = A_JMP) and
  887. GetNextInstruction(hp3, hp4) and
  888. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  889. begin
  890. taicpu(hp2).Opcode := A_SUB;
  891. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  892. taicpu(hp2).loadConst(0,1);
  893. taicpu(hp2).ops:=2;
  894. taicpu(hp3).Opcode := A_Jcc;
  895. case taicpu(hp1).condition of
  896. C_LE: taicpu(hp3).condition := C_GE;
  897. C_BE: taicpu(hp3).condition := C_AE;
  898. end;
  899. asml.remove(p);
  900. asml.remove(hp1);
  901. p.free;
  902. hp1.free;
  903. p := hp2;
  904. continue;
  905. end
  906. end;
  907. A_FLD:
  908. begin
  909. if (taicpu(p).oper[0]^.typ = top_reg) and
  910. GetNextInstruction(p, hp1) and
  911. (hp1.typ = Ait_Instruction) and
  912. (taicpu(hp1).oper[0]^.typ = top_reg) and
  913. (taicpu(hp1).oper[1]^.typ = top_reg) and
  914. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  915. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  916. { change to
  917. fld reg fxxx reg,st
  918. fxxxp st, st1 (hp1)
  919. Remark: non commutative operations must be reversed!
  920. }
  921. begin
  922. case taicpu(hp1).opcode Of
  923. A_FMULP,A_FADDP,
  924. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  925. begin
  926. case taicpu(hp1).opcode Of
  927. A_FADDP: taicpu(hp1).opcode := A_FADD;
  928. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  929. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  930. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  931. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  932. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  933. end;
  934. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  935. taicpu(hp1).oper[1]^.reg := NR_ST;
  936. asml.remove(p);
  937. p.free;
  938. p := hp1;
  939. continue;
  940. end;
  941. end;
  942. end
  943. else
  944. if (taicpu(p).oper[0]^.typ = top_ref) and
  945. GetNextInstruction(p, hp2) and
  946. (hp2.typ = Ait_Instruction) and
  947. (taicpu(hp2).ops = 2) and
  948. (taicpu(hp2).oper[0]^.typ = top_reg) and
  949. (taicpu(hp2).oper[1]^.typ = top_reg) and
  950. (taicpu(p).opsize in [S_FS, S_FL]) and
  951. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  952. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  953. if GetLastInstruction(p, hp1) and
  954. (hp1.typ = Ait_Instruction) and
  955. ((taicpu(hp1).opcode = A_FLD) or
  956. (taicpu(hp1).opcode = A_FST)) and
  957. (taicpu(hp1).opsize = taicpu(p).opsize) and
  958. (taicpu(hp1).oper[0]^.typ = top_ref) and
  959. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  960. if ((taicpu(hp2).opcode = A_FMULP) or
  961. (taicpu(hp2).opcode = A_FADDP)) then
  962. { change to
  963. fld/fst mem1 (hp1) fld/fst mem1
  964. fld mem1 (p) fadd/
  965. faddp/ fmul st, st
  966. fmulp st, st1 (hp2) }
  967. begin
  968. asml.remove(p);
  969. p.free;
  970. p := hp1;
  971. if (taicpu(hp2).opcode = A_FADDP) then
  972. taicpu(hp2).opcode := A_FADD
  973. else
  974. taicpu(hp2).opcode := A_FMUL;
  975. taicpu(hp2).oper[1]^.reg := NR_ST;
  976. end
  977. else
  978. { change to
  979. fld/fst mem1 (hp1) fld/fst mem1
  980. fld mem1 (p) fld st}
  981. begin
  982. taicpu(p).changeopsize(S_FL);
  983. taicpu(p).loadreg(0,NR_ST);
  984. end
  985. else
  986. begin
  987. case taicpu(hp2).opcode Of
  988. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  989. { change to
  990. fld/fst mem1 (hp1) fld/fst mem1
  991. fld mem2 (p) fxxx mem2
  992. fxxxp st, st1 (hp2) }
  993. begin
  994. case taicpu(hp2).opcode Of
  995. A_FADDP: taicpu(p).opcode := A_FADD;
  996. A_FMULP: taicpu(p).opcode := A_FMUL;
  997. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  998. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  999. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  1000. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  1001. end;
  1002. asml.remove(hp2);
  1003. hp2.free;
  1004. end
  1005. end
  1006. end
  1007. end;
  1008. A_FSTP,A_FISTP:
  1009. if doFpuLoadStoreOpt(p) then
  1010. continue;
  1011. A_LEA:
  1012. begin
  1013. {removes seg register prefixes from LEA operations, as they
  1014. don't do anything}
  1015. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  1016. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  1017. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1018. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  1019. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1020. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  1021. begin
  1022. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1023. (taicpu(p).oper[0]^.ref^.offset = 0) then
  1024. begin
  1025. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  1026. taicpu(p).oper[1]^.reg);
  1027. InsertLLItem(p.previous,p.next, hp1);
  1028. p.free;
  1029. p := hp1;
  1030. continue;
  1031. end
  1032. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  1033. begin
  1034. hp1 := tai(p.Next);
  1035. asml.remove(p);
  1036. p.free;
  1037. p := hp1;
  1038. continue;
  1039. end
  1040. { continue to use lea to adjust the stack pointer,
  1041. it is the recommended way, but only if not optimizing for size }
  1042. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  1043. (cs_opt_size in current_settings.optimizerswitches) then
  1044. with taicpu(p).oper[0]^.ref^ do
  1045. if (base = taicpu(p).oper[1]^.reg) then
  1046. begin
  1047. l := offset;
  1048. if (l=1) and UseIncDec then
  1049. begin
  1050. taicpu(p).opcode := A_INC;
  1051. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1052. taicpu(p).ops := 1
  1053. end
  1054. else if (l=-1) and UseIncDec then
  1055. begin
  1056. taicpu(p).opcode := A_DEC;
  1057. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1058. taicpu(p).ops := 1;
  1059. end
  1060. else
  1061. begin
  1062. if (l<0) and (l<>-2147483648) then
  1063. begin
  1064. taicpu(p).opcode := A_SUB;
  1065. taicpu(p).loadConst(0,-l);
  1066. end
  1067. else
  1068. begin
  1069. taicpu(p).opcode := A_ADD;
  1070. taicpu(p).loadConst(0,l);
  1071. end;
  1072. end;
  1073. end;
  1074. end
  1075. (*
  1076. This is unsafe, lea doesn't modify the flags but "add"
  1077. does. This breaks webtbs/tw15694.pp. The above
  1078. transformations are also unsafe, but they don't seem to
  1079. be triggered by code that FPC generators (or that at
  1080. least does not occur in the tests...). This needs to be
  1081. fixed by checking for the liveness of the flags register.
  1082. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  1083. begin
  1084. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  1085. taicpu(p).oper[0]^.ref^.base);
  1086. InsertLLItem(asml,p.previous,p.next, hp1);
  1087. DebugMsg('Peephole Lea2AddBase done',hp1);
  1088. p.free;
  1089. p:=hp1;
  1090. continue;
  1091. end
  1092. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  1093. begin
  1094. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  1095. taicpu(p).oper[0]^.ref^.index);
  1096. InsertLLItem(asml,p.previous,p.next,hp1);
  1097. DebugMsg('Peephole Lea2AddIndex done',hp1);
  1098. p.free;
  1099. p:=hp1;
  1100. continue;
  1101. end
  1102. *)
  1103. end;
  1104. A_MOV:
  1105. begin
  1106. If OptPass1MOV(p) then
  1107. Continue;
  1108. end;
  1109. A_MOVSX,
  1110. A_MOVZX :
  1111. begin
  1112. if (taicpu(p).oper[1]^.typ = top_reg) and
  1113. GetNextInstruction(p,hp1) and
  1114. (hp1.typ = ait_instruction) and
  1115. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1116. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1117. GetNextInstruction(hp1,hp2) and
  1118. MatchInstruction(hp2,A_MOV,[]) and
  1119. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1120. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
  1121. (((taicpu(hp1).ops=2) and
  1122. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[1]^.reg))) or
  1123. ((taicpu(hp1).ops=1) and
  1124. (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp1).oper[0]^.reg)))) and
  1125. not(RegUsedAfterInstruction(taicpu(hp2).oper[0]^.reg,hp2,UsedRegs)) then
  1126. { change movsX/movzX reg/ref, reg2 }
  1127. { add/sub/or/... reg3/$const, reg2 }
  1128. { mov reg2 reg/ref }
  1129. { to add/sub/or/... reg3/$const, reg/ref }
  1130. begin
  1131. { by example:
  1132. movswl %si,%eax movswl %si,%eax p
  1133. decl %eax addl %edx,%eax hp1
  1134. movw %ax,%si movw %ax,%si hp2
  1135. ->
  1136. movswl %si,%eax movswl %si,%eax p
  1137. decw %eax addw %edx,%eax hp1
  1138. movw %ax,%si movw %ax,%si hp2
  1139. }
  1140. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1141. {
  1142. ->
  1143. movswl %si,%eax movswl %si,%eax p
  1144. decw %si addw %dx,%si hp1
  1145. movw %ax,%si movw %ax,%si hp2
  1146. }
  1147. case taicpu(hp1).ops of
  1148. 1:
  1149. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1150. 2:
  1151. begin
  1152. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1153. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1154. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1155. end;
  1156. else
  1157. internalerror(2008042701);
  1158. end;
  1159. {
  1160. ->
  1161. decw %si addw %dx,%si p
  1162. }
  1163. asml.remove(p);
  1164. asml.remove(hp2);
  1165. p.free;
  1166. hp2.free;
  1167. p := hp1
  1168. end
  1169. { removes superfluous And's after movzx's }
  1170. else if taicpu(p).opcode=A_MOVZX then
  1171. begin
  1172. if (taicpu(p).oper[1]^.typ = top_reg) and
  1173. GetNextInstruction(p, hp1) and
  1174. (tai(hp1).typ = ait_instruction) and
  1175. (taicpu(hp1).opcode = A_AND) and
  1176. (taicpu(hp1).oper[0]^.typ = top_const) and
  1177. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1178. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1179. case taicpu(p).opsize Of
  1180. S_BL, S_BW:
  1181. if (taicpu(hp1).oper[0]^.val = $ff) then
  1182. begin
  1183. asml.remove(hp1);
  1184. hp1.free;
  1185. end;
  1186. S_WL:
  1187. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1188. begin
  1189. asml.remove(hp1);
  1190. hp1.free;
  1191. end;
  1192. end;
  1193. {changes some movzx constructs to faster synonims (all examples
  1194. are given with eax/ax, but are also valid for other registers)}
  1195. if (taicpu(p).oper[1]^.typ = top_reg) then
  1196. if (taicpu(p).oper[0]^.typ = top_reg) then
  1197. case taicpu(p).opsize of
  1198. S_BW:
  1199. begin
  1200. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1201. not(cs_opt_size in current_settings.optimizerswitches) then
  1202. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1203. begin
  1204. taicpu(p).opcode := A_AND;
  1205. taicpu(p).changeopsize(S_W);
  1206. taicpu(p).loadConst(0,$ff);
  1207. end
  1208. else if GetNextInstruction(p, hp1) and
  1209. (tai(hp1).typ = ait_instruction) and
  1210. (taicpu(hp1).opcode = A_AND) and
  1211. (taicpu(hp1).oper[0]^.typ = top_const) and
  1212. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1213. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1214. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1215. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1216. begin
  1217. taicpu(p).opcode := A_MOV;
  1218. taicpu(p).changeopsize(S_W);
  1219. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1220. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1221. end;
  1222. end;
  1223. S_BL:
  1224. begin
  1225. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1226. not(cs_opt_size in current_settings.optimizerswitches) then
  1227. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1228. begin
  1229. taicpu(p).opcode := A_AND;
  1230. taicpu(p).changeopsize(S_L);
  1231. taicpu(p).loadConst(0,$ff)
  1232. end
  1233. else if GetNextInstruction(p, hp1) and
  1234. (tai(hp1).typ = ait_instruction) and
  1235. (taicpu(hp1).opcode = A_AND) and
  1236. (taicpu(hp1).oper[0]^.typ = top_const) and
  1237. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1238. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1239. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1240. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1241. begin
  1242. taicpu(p).opcode := A_MOV;
  1243. taicpu(p).changeopsize(S_L);
  1244. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1245. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1246. end
  1247. end;
  1248. S_WL:
  1249. begin
  1250. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1251. not(cs_opt_size in current_settings.optimizerswitches) then
  1252. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1253. begin
  1254. taicpu(p).opcode := A_AND;
  1255. taicpu(p).changeopsize(S_L);
  1256. taicpu(p).loadConst(0,$ffff);
  1257. end
  1258. else if GetNextInstruction(p, hp1) and
  1259. (tai(hp1).typ = ait_instruction) and
  1260. (taicpu(hp1).opcode = A_AND) and
  1261. (taicpu(hp1).oper[0]^.typ = top_const) and
  1262. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1263. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1264. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1265. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1266. begin
  1267. taicpu(p).opcode := A_MOV;
  1268. taicpu(p).changeopsize(S_L);
  1269. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1270. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1271. end;
  1272. end;
  1273. end
  1274. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1275. begin
  1276. if GetNextInstruction(p, hp1) and
  1277. (tai(hp1).typ = ait_instruction) and
  1278. (taicpu(hp1).opcode = A_AND) and
  1279. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1280. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1281. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1282. begin
  1283. taicpu(p).opcode := A_MOV;
  1284. case taicpu(p).opsize Of
  1285. S_BL:
  1286. begin
  1287. taicpu(p).changeopsize(S_L);
  1288. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1289. end;
  1290. S_WL:
  1291. begin
  1292. taicpu(p).changeopsize(S_L);
  1293. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1294. end;
  1295. S_BW:
  1296. begin
  1297. taicpu(p).changeopsize(S_W);
  1298. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1299. end;
  1300. end;
  1301. end;
  1302. end;
  1303. end;
  1304. end;
  1305. (* should not be generated anymore by the current code generator
  1306. A_POP:
  1307. begin
  1308. if target_info.system=system_i386_go32v2 then
  1309. begin
  1310. { Transform a series of pop/pop/pop/push/push/push to }
  1311. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1312. { because I'm not sure whether they can cope with }
  1313. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1314. { such a problem when using esp as frame pointer (JM) }
  1315. if (taicpu(p).oper[0]^.typ = top_reg) then
  1316. begin
  1317. hp1 := p;
  1318. hp2 := p;
  1319. l := 0;
  1320. while getNextInstruction(hp1,hp1) and
  1321. (hp1.typ = ait_instruction) and
  1322. (taicpu(hp1).opcode = A_POP) and
  1323. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1324. begin
  1325. hp2 := hp1;
  1326. inc(l,4);
  1327. end;
  1328. getLastInstruction(p,hp3);
  1329. l1 := 0;
  1330. while (hp2 <> hp3) and
  1331. assigned(hp1) and
  1332. (hp1.typ = ait_instruction) and
  1333. (taicpu(hp1).opcode = A_PUSH) and
  1334. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1335. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1336. begin
  1337. { change it to a two op operation }
  1338. taicpu(hp2).oper[1]^.typ:=top_none;
  1339. taicpu(hp2).ops:=2;
  1340. taicpu(hp2).opcode := A_MOV;
  1341. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1342. reference_reset(tmpref);
  1343. tmpRef.base.enum:=R_INTREGISTER;
  1344. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1345. convert_register_to_enum(tmpref.base);
  1346. tmpRef.offset := l;
  1347. taicpu(hp2).loadRef(0,tmpRef);
  1348. hp4 := hp1;
  1349. getNextInstruction(hp1,hp1);
  1350. asml.remove(hp4);
  1351. hp4.free;
  1352. getLastInstruction(hp2,hp2);
  1353. dec(l,4);
  1354. inc(l1);
  1355. end;
  1356. if l <> -4 then
  1357. begin
  1358. inc(l,4);
  1359. for l1 := l1 downto 1 do
  1360. begin
  1361. getNextInstruction(hp2,hp2);
  1362. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1363. end
  1364. end
  1365. end
  1366. end
  1367. else
  1368. begin
  1369. if (taicpu(p).oper[0]^.typ = top_reg) and
  1370. GetNextInstruction(p, hp1) and
  1371. (tai(hp1).typ=ait_instruction) and
  1372. (taicpu(hp1).opcode=A_PUSH) and
  1373. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1374. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1375. begin
  1376. { change it to a two op operation }
  1377. taicpu(p).oper[1]^.typ:=top_none;
  1378. taicpu(p).ops:=2;
  1379. taicpu(p).opcode := A_MOV;
  1380. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1381. reference_reset(tmpref);
  1382. TmpRef.base.enum := R_ESP;
  1383. taicpu(p).loadRef(0,TmpRef);
  1384. asml.remove(hp1);
  1385. hp1.free;
  1386. end;
  1387. end;
  1388. end;
  1389. *)
  1390. A_PUSH:
  1391. begin
  1392. if (taicpu(p).opsize = S_W) and
  1393. (taicpu(p).oper[0]^.typ = Top_Const) and
  1394. GetNextInstruction(p, hp1) and
  1395. (tai(hp1).typ = ait_instruction) and
  1396. (taicpu(hp1).opcode = A_PUSH) and
  1397. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1398. (taicpu(hp1).opsize = S_W) then
  1399. begin
  1400. taicpu(p).changeopsize(S_L);
  1401. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1402. asml.remove(hp1);
  1403. hp1.free;
  1404. end;
  1405. end;
  1406. A_SHL, A_SAL:
  1407. begin
  1408. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1409. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1410. (taicpu(p).opsize = S_L) and
  1411. (taicpu(p).oper[0]^.val <= 3) then
  1412. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1413. begin
  1414. TmpBool1 := True; {should we check the next instruction?}
  1415. TmpBool2 := False; {have we found an add/sub which could be
  1416. integrated in the lea?}
  1417. reference_reset(tmpref,2,[]);
  1418. TmpRef.index := taicpu(p).oper[1]^.reg;
  1419. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1420. while TmpBool1 and
  1421. GetNextInstruction(p, hp1) and
  1422. (tai(hp1).typ = ait_instruction) and
  1423. ((((taicpu(hp1).opcode = A_ADD) or
  1424. (taicpu(hp1).opcode = A_SUB)) and
  1425. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1426. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1427. (((taicpu(hp1).opcode = A_INC) or
  1428. (taicpu(hp1).opcode = A_DEC)) and
  1429. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1430. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1431. (not GetNextInstruction(hp1,hp2) or
  1432. not instrReadsFlags(hp2)) Do
  1433. begin
  1434. TmpBool1 := False;
  1435. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1436. begin
  1437. TmpBool1 := True;
  1438. TmpBool2 := True;
  1439. case taicpu(hp1).opcode of
  1440. A_ADD:
  1441. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1442. A_SUB:
  1443. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1444. end;
  1445. asml.remove(hp1);
  1446. hp1.free;
  1447. end
  1448. else
  1449. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1450. (((taicpu(hp1).opcode = A_ADD) and
  1451. (TmpRef.base = NR_NO)) or
  1452. (taicpu(hp1).opcode = A_INC) or
  1453. (taicpu(hp1).opcode = A_DEC)) then
  1454. begin
  1455. TmpBool1 := True;
  1456. TmpBool2 := True;
  1457. case taicpu(hp1).opcode of
  1458. A_ADD:
  1459. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1460. A_INC:
  1461. inc(TmpRef.offset);
  1462. A_DEC:
  1463. dec(TmpRef.offset);
  1464. end;
  1465. asml.remove(hp1);
  1466. hp1.free;
  1467. end;
  1468. end;
  1469. if TmpBool2 or
  1470. ((current_settings.optimizecputype < cpu_Pentium2) and
  1471. (taicpu(p).oper[0]^.val <= 3) and
  1472. not(cs_opt_size in current_settings.optimizerswitches)) then
  1473. begin
  1474. if not(TmpBool2) and
  1475. (taicpu(p).oper[0]^.val = 1) then
  1476. begin
  1477. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1478. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1479. end
  1480. else
  1481. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1482. taicpu(p).oper[1]^.reg);
  1483. InsertLLItem(p.previous, p.next, hp1);
  1484. p.free;
  1485. p := hp1;
  1486. end;
  1487. end
  1488. else
  1489. if (current_settings.optimizecputype < cpu_Pentium2) and
  1490. (taicpu(p).oper[0]^.typ = top_const) and
  1491. (taicpu(p).oper[1]^.typ = top_reg) then
  1492. if (taicpu(p).oper[0]^.val = 1) then
  1493. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1494. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1495. (unlike shl, which is only Tairable in the U pipe)}
  1496. begin
  1497. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1498. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1499. InsertLLItem(p.previous, p.next, hp1);
  1500. p.free;
  1501. p := hp1;
  1502. end
  1503. else if (taicpu(p).opsize = S_L) and
  1504. (taicpu(p).oper[0]^.val<= 3) then
  1505. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1506. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1507. begin
  1508. reference_reset(tmpref,2,[]);
  1509. TmpRef.index := taicpu(p).oper[1]^.reg;
  1510. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1511. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1512. InsertLLItem(p.previous, p.next, hp1);
  1513. p.free;
  1514. p := hp1;
  1515. end
  1516. end;
  1517. A_SETcc :
  1518. { changes
  1519. setcc (funcres) setcc reg
  1520. movb (funcres), reg to leave/ret
  1521. leave/ret }
  1522. begin
  1523. if (taicpu(p).oper[0]^.typ = top_ref) and
  1524. GetNextInstruction(p, hp1) and
  1525. GetNextInstruction(hp1, hp2) and
  1526. IsExitCode(hp2) and
  1527. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1528. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1529. not(assigned(current_procinfo.procdef.funcretsym) and
  1530. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1531. (hp1.typ = ait_instruction) and
  1532. (taicpu(hp1).opcode = A_MOV) and
  1533. (taicpu(hp1).opsize = S_B) and
  1534. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1535. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1536. begin
  1537. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1538. asml.remove(hp1);
  1539. hp1.free;
  1540. end
  1541. end;
  1542. A_SUB:
  1543. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1544. { * change "sub/add const1, reg" or "dec reg" followed by
  1545. "sub const2, reg" to one "sub ..., reg" }
  1546. begin
  1547. if (taicpu(p).oper[0]^.typ = top_const) and
  1548. (taicpu(p).oper[1]^.typ = top_reg) then
  1549. if (taicpu(p).oper[0]^.val = 2) and
  1550. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1551. { Don't do the sub/push optimization if the sub }
  1552. { comes from setting up the stack frame (JM) }
  1553. (not getLastInstruction(p,hp1) or
  1554. (hp1.typ <> ait_instruction) or
  1555. (taicpu(hp1).opcode <> A_MOV) or
  1556. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1557. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1558. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1559. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1560. begin
  1561. hp1 := tai(p.next);
  1562. while Assigned(hp1) and
  1563. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1564. not RegReadByInstruction(NR_ESP,hp1) and
  1565. not RegModifiedByInstruction(NR_ESP,hp1) do
  1566. hp1 := tai(hp1.next);
  1567. if Assigned(hp1) and
  1568. (tai(hp1).typ = ait_instruction) and
  1569. (taicpu(hp1).opcode = A_PUSH) and
  1570. (taicpu(hp1).opsize = S_W) then
  1571. begin
  1572. taicpu(hp1).changeopsize(S_L);
  1573. if taicpu(hp1).oper[0]^.typ=top_reg then
  1574. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1575. hp1 := tai(p.next);
  1576. asml.remove(p);
  1577. p.free;
  1578. p := hp1;
  1579. continue
  1580. end;
  1581. if DoSubAddOpt(p) then
  1582. continue;
  1583. end
  1584. else if DoSubAddOpt(p) then
  1585. continue
  1586. end;
  1587. A_VMOVAPS,
  1588. A_VMOVAPD:
  1589. if OptPass1VMOVAP(p) then
  1590. continue;
  1591. A_VDIVSD,
  1592. A_VDIVSS,
  1593. A_VSUBSD,
  1594. A_VSUBSS,
  1595. A_VMULSD,
  1596. A_VMULSS,
  1597. A_VADDSD,
  1598. A_VADDSS:
  1599. if OptPass1VOP(p) then
  1600. continue;
  1601. end;
  1602. end; { if is_jmp }
  1603. end;
  1604. end;
  1605. updateUsedRegs(UsedRegs,p);
  1606. p:=tai(p.next);
  1607. end;
  1608. end;
  1609. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1610. {$ifdef DEBUG_AOPTCPU}
  1611. procedure DebugMsg(const s: string;p : tai);
  1612. begin
  1613. asml.insertbefore(tai_comment.Create(strpnew(s)), p);
  1614. end;
  1615. {$else DEBUG_AOPTCPU}
  1616. procedure DebugMsg(const s: string;p : tai);inline;
  1617. begin
  1618. end;
  1619. {$endif DEBUG_AOPTCPU}
  1620. function CanBeCMOV(p : tai) : boolean;
  1621. begin
  1622. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1623. (taicpu(p).opcode=A_MOV) and
  1624. (taicpu(p).opsize in [S_L,S_W]) and
  1625. ((taicpu(p).oper[0]^.typ = top_reg)
  1626. { we can't use cmov ref,reg because
  1627. ref could be nil and cmov still throws an exception
  1628. if ref=nil but the mov isn't done (FK)
  1629. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1630. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1631. }
  1632. ) and
  1633. (taicpu(p).oper[1]^.typ in [top_reg]);
  1634. end;
  1635. var
  1636. p,hp1,hp2,hp3: tai;
  1637. l : longint;
  1638. condition : tasmcond;
  1639. carryadd_opcode: Tasmop;
  1640. begin
  1641. p := BlockStart;
  1642. ClearUsedRegs;
  1643. while (p <> BlockEnd) Do
  1644. begin
  1645. UpdateUsedRegs(UsedRegs, tai(p.next));
  1646. case p.Typ Of
  1647. Ait_Instruction:
  1648. begin
  1649. if InsContainsSegRef(taicpu(p)) then
  1650. begin
  1651. p := tai(p.next);
  1652. continue;
  1653. end;
  1654. case taicpu(p).opcode Of
  1655. A_Jcc:
  1656. begin
  1657. { jb @@1 cmc
  1658. inc/dec operand --> adc/sbb operand,0
  1659. @@1:
  1660. ... and ...
  1661. jnb @@1
  1662. inc/dec operand --> adc/sbb operand,0
  1663. @@1: }
  1664. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1665. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1666. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1667. begin
  1668. carryadd_opcode:=A_NONE;
  1669. if Taicpu(p).condition in [C_NAE,C_B] then
  1670. begin
  1671. if Taicpu(hp1).opcode=A_INC then
  1672. carryadd_opcode:=A_ADC;
  1673. if Taicpu(hp1).opcode=A_DEC then
  1674. carryadd_opcode:=A_SBB;
  1675. if carryadd_opcode<>A_NONE then
  1676. begin
  1677. Taicpu(p).clearop(0);
  1678. Taicpu(p).ops:=0;
  1679. Taicpu(p).is_jmp:=false;
  1680. Taicpu(p).opcode:=A_CMC;
  1681. Taicpu(p).condition:=C_NONE;
  1682. Taicpu(hp1).ops:=2;
  1683. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1684. Taicpu(hp1).loadconst(0,0);
  1685. Taicpu(hp1).opcode:=carryadd_opcode;
  1686. continue;
  1687. end;
  1688. end;
  1689. if Taicpu(p).condition in [C_AE,C_NB] then
  1690. begin
  1691. if Taicpu(hp1).opcode=A_INC then
  1692. carryadd_opcode:=A_ADC;
  1693. if Taicpu(hp1).opcode=A_DEC then
  1694. carryadd_opcode:=A_SBB;
  1695. if carryadd_opcode<>A_NONE then
  1696. begin
  1697. asml.remove(p);
  1698. p.free;
  1699. Taicpu(hp1).ops:=2;
  1700. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1701. Taicpu(hp1).loadconst(0,0);
  1702. Taicpu(hp1).opcode:=carryadd_opcode;
  1703. p:=hp1;
  1704. continue;
  1705. end;
  1706. end;
  1707. end;
  1708. if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
  1709. begin
  1710. { check for
  1711. jCC xxx
  1712. <several movs>
  1713. xxx:
  1714. }
  1715. l:=0;
  1716. GetNextInstruction(p, hp1);
  1717. while assigned(hp1) and
  1718. CanBeCMOV(hp1) and
  1719. { stop on labels }
  1720. not(hp1.typ=ait_label) do
  1721. begin
  1722. inc(l);
  1723. GetNextInstruction(hp1,hp1);
  1724. end;
  1725. if assigned(hp1) then
  1726. begin
  1727. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1728. begin
  1729. if (l<=4) and (l>0) then
  1730. begin
  1731. condition:=inverse_cond(taicpu(p).condition);
  1732. hp2:=p;
  1733. GetNextInstruction(p,hp1);
  1734. p:=hp1;
  1735. repeat
  1736. taicpu(hp1).opcode:=A_CMOVcc;
  1737. taicpu(hp1).condition:=condition;
  1738. GetNextInstruction(hp1,hp1);
  1739. until not(assigned(hp1)) or
  1740. not(CanBeCMOV(hp1));
  1741. { wait with removing else GetNextInstruction could
  1742. ignore the label if it was the only usage in the
  1743. jump moved away }
  1744. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1745. asml.remove(hp2);
  1746. hp2.free;
  1747. continue;
  1748. end;
  1749. end
  1750. else
  1751. begin
  1752. { check further for
  1753. jCC xxx
  1754. <several movs 1>
  1755. jmp yyy
  1756. xxx:
  1757. <several movs 2>
  1758. yyy:
  1759. }
  1760. { hp2 points to jmp yyy }
  1761. hp2:=hp1;
  1762. { skip hp1 to xxx }
  1763. GetNextInstruction(hp1, hp1);
  1764. if assigned(hp2) and
  1765. assigned(hp1) and
  1766. (l<=3) and
  1767. (hp2.typ=ait_instruction) and
  1768. (taicpu(hp2).is_jmp) and
  1769. (taicpu(hp2).condition=C_None) and
  1770. { real label and jump, no further references to the
  1771. label are allowed }
  1772. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=1) and
  1773. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1774. begin
  1775. l:=0;
  1776. { skip hp1 to <several moves 2> }
  1777. GetNextInstruction(hp1, hp1);
  1778. while assigned(hp1) and
  1779. CanBeCMOV(hp1) do
  1780. begin
  1781. inc(l);
  1782. GetNextInstruction(hp1, hp1);
  1783. end;
  1784. { hp1 points to yyy: }
  1785. if assigned(hp1) and
  1786. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1787. begin
  1788. condition:=inverse_cond(taicpu(p).condition);
  1789. GetNextInstruction(p,hp1);
  1790. hp3:=p;
  1791. p:=hp1;
  1792. repeat
  1793. taicpu(hp1).opcode:=A_CMOVcc;
  1794. taicpu(hp1).condition:=condition;
  1795. GetNextInstruction(hp1,hp1);
  1796. until not(assigned(hp1)) or
  1797. not(CanBeCMOV(hp1));
  1798. { hp2 is still at jmp yyy }
  1799. GetNextInstruction(hp2,hp1);
  1800. { hp2 is now at xxx: }
  1801. condition:=inverse_cond(condition);
  1802. GetNextInstruction(hp1,hp1);
  1803. { hp1 is now at <several movs 2> }
  1804. repeat
  1805. taicpu(hp1).opcode:=A_CMOVcc;
  1806. taicpu(hp1).condition:=condition;
  1807. GetNextInstruction(hp1,hp1);
  1808. until not(assigned(hp1)) or
  1809. not(CanBeCMOV(hp1));
  1810. {
  1811. asml.remove(hp1.next)
  1812. hp1.next.free;
  1813. asml.remove(hp1);
  1814. hp1.free;
  1815. }
  1816. { remove jCC }
  1817. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1818. asml.remove(hp3);
  1819. hp3.free;
  1820. { remove jmp }
  1821. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1822. asml.remove(hp2);
  1823. hp2.free;
  1824. continue;
  1825. end;
  1826. end;
  1827. end;
  1828. end;
  1829. end;
  1830. end;
  1831. A_FSTP,A_FISTP:
  1832. if DoFpuLoadStoreOpt(p) then
  1833. continue;
  1834. A_IMUL:
  1835. if OptPass2Imul(p) then
  1836. continue;
  1837. A_JMP:
  1838. {
  1839. change
  1840. jmp .L1
  1841. ...
  1842. .L1:
  1843. ret
  1844. into
  1845. ret
  1846. }
  1847. if (taicpu(p).oper[0]^.typ=top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full) then
  1848. begin
  1849. hp1:=getlabelwithsym(tasmlabel(taicpu(p).oper[0]^.ref^.symbol));
  1850. if assigned(hp1) and SkipLabels(hp1,hp1) and (hp1.typ=ait_instruction) and (taicpu(hp1).opcode=A_RET) and (taicpu(p).condition=C_None) then
  1851. begin
  1852. tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
  1853. taicpu(p).opcode:=A_RET;
  1854. taicpu(p).is_jmp:=false;
  1855. taicpu(p).ops:=taicpu(hp1).ops;
  1856. case taicpu(hp1).ops of
  1857. 0:
  1858. taicpu(p).clearop(0);
  1859. 1:
  1860. taicpu(p).loadconst(0,taicpu(hp1).oper[0]^.val);
  1861. else
  1862. internalerror(2016041301);
  1863. end;
  1864. continue;
  1865. end;
  1866. end;
  1867. A_MOV:
  1868. if OptPass2MOV(p) then
  1869. continue;
  1870. end;
  1871. end;
  1872. end;
  1873. p := tai(p.next)
  1874. end;
  1875. end;
  1876. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1877. var
  1878. p,hp1,hp2: tai;
  1879. IsTestConstX: boolean;
  1880. begin
  1881. p := BlockStart;
  1882. ClearUsedRegs;
  1883. while (p <> BlockEnd) Do
  1884. begin
  1885. UpdateUsedRegs(UsedRegs, tai(p.next));
  1886. case p.Typ Of
  1887. Ait_Instruction:
  1888. begin
  1889. if InsContainsSegRef(taicpu(p)) then
  1890. begin
  1891. p := tai(p.next);
  1892. continue;
  1893. end;
  1894. case taicpu(p).opcode Of
  1895. A_CALL:
  1896. begin
  1897. { don't do this on modern CPUs, this really hurts them due to
  1898. broken call/ret pairing }
  1899. if (current_settings.optimizecputype < cpu_Pentium2) and
  1900. not(cs_create_pic in current_settings.moduleswitches) and
  1901. GetNextInstruction(p, hp1) and
  1902. (hp1.typ = ait_instruction) and
  1903. (taicpu(hp1).opcode = A_JMP) and
  1904. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1905. begin
  1906. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1907. InsertLLItem(p.previous, p, hp2);
  1908. taicpu(p).opcode := A_JMP;
  1909. taicpu(p).is_jmp := true;
  1910. asml.remove(hp1);
  1911. hp1.free;
  1912. end
  1913. { replace
  1914. call procname
  1915. ret
  1916. by
  1917. jmp procname
  1918. this should never hurt except when pic is used, not sure
  1919. how to handle it then
  1920. but do it only on level 4 because it destroys stack back traces
  1921. }
  1922. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1923. not(cs_create_pic in current_settings.moduleswitches) and
  1924. GetNextInstruction(p, hp1) and
  1925. (hp1.typ = ait_instruction) and
  1926. (taicpu(hp1).opcode = A_RET) and
  1927. (taicpu(hp1).ops=0) then
  1928. begin
  1929. taicpu(p).opcode := A_JMP;
  1930. taicpu(p).is_jmp := true;
  1931. asml.remove(hp1);
  1932. hp1.free;
  1933. end;
  1934. end;
  1935. A_CMP:
  1936. begin
  1937. if (taicpu(p).oper[0]^.typ = top_const) and
  1938. (taicpu(p).oper[0]^.val = 0) and
  1939. (taicpu(p).oper[1]^.typ = top_reg) then
  1940. {change "cmp $0, %reg" to "test %reg, %reg"}
  1941. begin
  1942. taicpu(p).opcode := A_TEST;
  1943. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1944. continue;
  1945. end;
  1946. end;
  1947. A_MOV:
  1948. PostPeepholeOptMov(p);
  1949. A_MOVZX:
  1950. { if register vars are on, it's possible there is code like }
  1951. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1952. { so we can't safely replace the movzx then with xor/mov, }
  1953. { since that would change the flags (JM) }
  1954. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1955. begin
  1956. if (taicpu(p).oper[1]^.typ = top_reg) then
  1957. if (taicpu(p).oper[0]^.typ = top_reg)
  1958. then
  1959. case taicpu(p).opsize of
  1960. S_BL:
  1961. begin
  1962. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1963. not(cs_opt_size in current_settings.optimizerswitches) and
  1964. (current_settings.optimizecputype = cpu_Pentium) then
  1965. {Change "movzbl %reg1, %reg2" to
  1966. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1967. PentiumMMX}
  1968. begin
  1969. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1970. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1971. InsertLLItem(p.previous, p, hp1);
  1972. taicpu(p).opcode := A_MOV;
  1973. taicpu(p).changeopsize(S_B);
  1974. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1975. end;
  1976. end;
  1977. end
  1978. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1979. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1980. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1981. not(cs_opt_size in current_settings.optimizerswitches) and
  1982. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1983. (current_settings.optimizecputype = cpu_Pentium) and
  1984. (taicpu(p).opsize = S_BL) then
  1985. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1986. Pentium and PentiumMMX}
  1987. begin
  1988. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1989. taicpu(p).oper[1]^.reg);
  1990. taicpu(p).opcode := A_MOV;
  1991. taicpu(p).changeopsize(S_B);
  1992. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1993. InsertLLItem(p.previous, p, hp1);
  1994. end;
  1995. end;
  1996. A_TEST, A_OR:
  1997. {removes the line marked with (x) from the sequence
  1998. and/or/xor/add/sub/... $x, %y
  1999. test/or %y, %y | test $-1, %y (x)
  2000. j(n)z _Label
  2001. as the first instruction already adjusts the ZF
  2002. %y operand may also be a reference }
  2003. begin
  2004. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  2005. MatchOperand(taicpu(p).oper[0]^,-1);
  2006. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  2007. GetLastInstruction(p, hp1) and
  2008. (tai(hp1).typ = ait_instruction) and
  2009. GetNextInstruction(p,hp2) and
  2010. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  2011. case taicpu(hp1).opcode Of
  2012. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  2013. begin
  2014. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2015. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2016. { and in case of carry for A(E)/B(E)/C/NC }
  2017. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2018. ((taicpu(hp1).opcode <> A_ADD) and
  2019. (taicpu(hp1).opcode <> A_SUB))) then
  2020. begin
  2021. hp1 := tai(p.next);
  2022. asml.remove(p);
  2023. p.free;
  2024. p := tai(hp1);
  2025. continue
  2026. end;
  2027. end;
  2028. A_SHL, A_SAL, A_SHR, A_SAR:
  2029. begin
  2030. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  2031. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  2032. { therefore, it's only safe to do this optimization for }
  2033. { shifts by a (nonzero) constant }
  2034. (taicpu(hp1).oper[0]^.typ = top_const) and
  2035. (taicpu(hp1).oper[0]^.val <> 0) and
  2036. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2037. { and in case of carry for A(E)/B(E)/C/NC }
  2038. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2039. begin
  2040. hp1 := tai(p.next);
  2041. asml.remove(p);
  2042. p.free;
  2043. p := tai(hp1);
  2044. continue
  2045. end;
  2046. end;
  2047. A_DEC, A_INC, A_NEG:
  2048. begin
  2049. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  2050. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2051. { and in case of carry for A(E)/B(E)/C/NC }
  2052. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2053. begin
  2054. case taicpu(hp1).opcode Of
  2055. A_DEC, A_INC:
  2056. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2057. begin
  2058. case taicpu(hp1).opcode Of
  2059. A_DEC: taicpu(hp1).opcode := A_SUB;
  2060. A_INC: taicpu(hp1).opcode := A_ADD;
  2061. end;
  2062. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2063. taicpu(hp1).loadConst(0,1);
  2064. taicpu(hp1).ops:=2;
  2065. end
  2066. end;
  2067. hp1 := tai(p.next);
  2068. asml.remove(p);
  2069. p.free;
  2070. p := tai(hp1);
  2071. continue
  2072. end;
  2073. end
  2074. else
  2075. { change "test $-1,%reg" into "test %reg,%reg" }
  2076. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2077. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2078. end { case }
  2079. else
  2080. { change "test $-1,%reg" into "test %reg,%reg" }
  2081. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  2082. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  2083. end;
  2084. end;
  2085. end;
  2086. end;
  2087. p := tai(p.next)
  2088. end;
  2089. end;
  2090. Procedure TCpuAsmOptimizer.Optimize;
  2091. Var
  2092. HP: Tai;
  2093. pass: longint;
  2094. slowopt, changed, lastLoop: boolean;
  2095. Begin
  2096. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  2097. pass := 0;
  2098. changed := false;
  2099. repeat
  2100. lastLoop :=
  2101. not(slowopt) or
  2102. (not changed and (pass > 2)) or
  2103. { prevent endless loops }
  2104. (pass = 4);
  2105. changed := false;
  2106. { Setup labeltable, always necessary }
  2107. blockstart := tai(asml.first);
  2108. pass_1;
  2109. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  2110. { or nil }
  2111. While Assigned(BlockStart) Do
  2112. Begin
  2113. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2114. begin
  2115. if (pass = 0) then
  2116. PrePeepHoleOpts;
  2117. { Peephole optimizations }
  2118. PeepHoleOptPass1;
  2119. { Only perform them twice in the first pass }
  2120. if pass = 0 then
  2121. PeepHoleOptPass1;
  2122. end;
  2123. { More peephole optimizations }
  2124. if (cs_opt_peephole in current_settings.optimizerswitches) then
  2125. begin
  2126. PeepHoleOptPass2;
  2127. if lastLoop then
  2128. PostPeepHoleOpts;
  2129. end;
  2130. { Continue where we left off, BlockEnd is either the start of an }
  2131. { assembler block or nil }
  2132. BlockStart := BlockEnd;
  2133. While Assigned(BlockStart) And
  2134. (BlockStart.typ = ait_Marker) And
  2135. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  2136. Begin
  2137. { We stopped at an assembler block, so skip it }
  2138. Repeat
  2139. BlockStart := Tai(BlockStart.Next);
  2140. Until (BlockStart.Typ = Ait_Marker) And
  2141. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  2142. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  2143. If GetNextInstruction(BlockStart, HP) And
  2144. ((HP.typ <> ait_Marker) Or
  2145. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  2146. { There is no assembler block anymore after the current one, so }
  2147. { optimize the next block of "normal" instructions }
  2148. pass_1
  2149. { Otherwise, skip the next assembler block }
  2150. else
  2151. blockStart := hp;
  2152. End;
  2153. End;
  2154. inc(pass);
  2155. until lastLoop;
  2156. dfa.free;
  2157. End;
  2158. begin
  2159. casmoptimizer:=TCpuAsmOptimizer;
  2160. end.