popt386.pas 115 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit popt386;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses Aasmbase,aasmtai,aasmdata,aasmcpu,verbose;
  21. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  22. procedure PeepHoleOptPass1(asml: TAsmList; BlockStart, BlockEnd: tai);
  23. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  24. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  25. implementation
  26. uses
  27. globtype,systems,
  28. globals,cgbase,procinfo,
  29. symsym,
  30. {$ifdef finaldestdebug}
  31. cobjects,
  32. {$endif finaldestdebug}
  33. cpuinfo,cpubase,cgutils,daopt386;
  34. function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
  35. begin
  36. isFoldableArithOp := False;
  37. case hp1.opcode of
  38. A_ADD,A_SUB,A_OR,A_XOR,A_AND,A_SHL,A_SHR,A_SAR:
  39. isFoldableArithOp :=
  40. ((taicpu(hp1).oper[0]^.typ = top_const) or
  41. ((taicpu(hp1).oper[0]^.typ = top_reg) and
  42. (taicpu(hp1).oper[0]^.reg <> reg))) and
  43. (taicpu(hp1).oper[1]^.typ = top_reg) and
  44. (taicpu(hp1).oper[1]^.reg = reg);
  45. A_INC,A_DEC:
  46. isFoldableArithOp :=
  47. (taicpu(hp1).oper[0]^.typ = top_reg) and
  48. (taicpu(hp1).oper[0]^.reg = reg);
  49. end;
  50. end;
  51. function RegUsedAfterInstruction(reg: Tregister; p: tai; var UsedRegs: TRegSet): Boolean;
  52. var
  53. supreg: tsuperregister;
  54. begin
  55. supreg := getsupreg(reg);
  56. UpdateUsedRegs(UsedRegs, tai(p.Next));
  57. RegUsedAfterInstruction :=
  58. (supreg in UsedRegs) and
  59. (not(getNextInstruction(p,p)) or
  60. not(regLoadedWithNewValue(supreg,false,p)));
  61. end;
  62. function doFpuLoadStoreOpt(asmL: TAsmList; var p: tai): boolean;
  63. { returns true if a "continue" should be done after this optimization }
  64. var hp1, hp2: tai;
  65. begin
  66. doFpuLoadStoreOpt := false;
  67. if (taicpu(p).oper[0]^.typ = top_ref) and
  68. getNextInstruction(p, hp1) and
  69. (hp1.typ = ait_instruction) and
  70. (((taicpu(hp1).opcode = A_FLD) and
  71. (taicpu(p).opcode = A_FSTP)) or
  72. ((taicpu(p).opcode = A_FISTP) and
  73. (taicpu(hp1).opcode = A_FILD))) and
  74. (taicpu(hp1).oper[0]^.typ = top_ref) and
  75. (taicpu(hp1).opsize = taicpu(p).opsize) and
  76. refsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  77. begin
  78. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  79. if (taicpu(p).opsize=S_FX) and
  80. getNextInstruction(hp1, hp2) and
  81. (hp2.typ = ait_instruction) and
  82. ((taicpu(hp2).opcode = A_LEAVE) or
  83. (taicpu(hp2).opcode = A_RET)) and
  84. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  85. not(assigned(current_procinfo.procdef.funcretsym) and
  86. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  87. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  88. begin
  89. asml.remove(p);
  90. asml.remove(hp1);
  91. p.free;
  92. hp1.free;
  93. p := hp2;
  94. removeLastDeallocForFuncRes(asmL, p);
  95. doFPULoadStoreOpt := true;
  96. end
  97. (* can't be done because the store operation rounds
  98. else
  99. { fst can't store an extended value! }
  100. if (taicpu(p).opsize <> S_FX) and
  101. (taicpu(p).opsize <> S_IQ) then
  102. begin
  103. if (taicpu(p).opcode = A_FSTP) then
  104. taicpu(p).opcode := A_FST
  105. else taicpu(p).opcode := A_FIST;
  106. asml.remove(hp1);
  107. hp1.free;
  108. end
  109. *)
  110. end;
  111. end;
  112. { returns true if p contains a memory operand with a segment set }
  113. function InsContainsSegRef(p: taicpu): boolean;
  114. var
  115. i: longint;
  116. begin
  117. result:=true;
  118. for i:=0 to p.opercnt-1 do
  119. if (p.oper[i]^.typ=top_ref) and
  120. (p.oper[i]^.ref^.segment<>NR_NO) then
  121. exit;
  122. result:=false;
  123. end;
  124. procedure PrePeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  125. var
  126. p,hp1: tai;
  127. l: aint;
  128. tmpRef: treference;
  129. begin
  130. p := BlockStart;
  131. while (p <> BlockEnd) Do
  132. begin
  133. case p.Typ Of
  134. Ait_Instruction:
  135. begin
  136. if InsContainsSegRef(taicpu(p)) then
  137. begin
  138. p := tai(p.next);
  139. continue;
  140. end;
  141. case taicpu(p).opcode Of
  142. A_IMUL:
  143. {changes certain "imul const, %reg"'s to lea sequences}
  144. begin
  145. if (taicpu(p).oper[0]^.typ = Top_Const) and
  146. (taicpu(p).oper[1]^.typ = Top_Reg) and
  147. (taicpu(p).opsize = S_L) then
  148. if (taicpu(p).oper[0]^.val = 1) then
  149. if (taicpu(p).ops = 2) then
  150. {remove "imul $1, reg"}
  151. begin
  152. hp1 := tai(p.Next);
  153. asml.remove(p);
  154. p.free;
  155. p := hp1;
  156. continue;
  157. end
  158. else
  159. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  160. begin
  161. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  162. InsertLLItem(asml, p.previous, p.next, hp1);
  163. p.free;
  164. p := hp1;
  165. end
  166. else if
  167. ((taicpu(p).ops <= 2) or
  168. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  169. (current_settings.optimizecputype < cpu_Pentium2) and
  170. (taicpu(p).oper[0]^.val <= 12) and
  171. not(cs_opt_size in current_settings.optimizerswitches) and
  172. (not(GetNextInstruction(p, hp1)) or
  173. {GetNextInstruction(p, hp1) and}
  174. not((tai(hp1).typ = ait_instruction) and
  175. ((taicpu(hp1).opcode=A_Jcc) and
  176. (taicpu(hp1).condition in [C_O,C_NO])))) then
  177. begin
  178. reference_reset(tmpref,1);
  179. case taicpu(p).oper[0]^.val Of
  180. 3: begin
  181. {imul 3, reg1, reg2 to
  182. lea (reg1,reg1,2), reg2
  183. imul 3, reg1 to
  184. lea (reg1,reg1,2), reg1}
  185. TmpRef.base := taicpu(p).oper[1]^.reg;
  186. TmpRef.index := taicpu(p).oper[1]^.reg;
  187. TmpRef.ScaleFactor := 2;
  188. if (taicpu(p).ops = 2) then
  189. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  190. else
  191. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  192. InsertLLItem(asml,p.previous, p.next, hp1);
  193. p.free;
  194. p := hp1;
  195. end;
  196. 5: begin
  197. {imul 5, reg1, reg2 to
  198. lea (reg1,reg1,4), reg2
  199. imul 5, reg1 to
  200. lea (reg1,reg1,4), reg1}
  201. TmpRef.base := taicpu(p).oper[1]^.reg;
  202. TmpRef.index := taicpu(p).oper[1]^.reg;
  203. TmpRef.ScaleFactor := 4;
  204. if (taicpu(p).ops = 2) then
  205. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  206. else
  207. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  208. InsertLLItem(asml,p.previous, p.next, hp1);
  209. p.free;
  210. p := hp1;
  211. end;
  212. 6: begin
  213. {imul 6, reg1, reg2 to
  214. lea (,reg1,2), reg2
  215. lea (reg2,reg1,4), reg2
  216. imul 6, reg1 to
  217. lea (reg1,reg1,2), reg1
  218. add reg1, reg1}
  219. if (current_settings.optimizecputype <= cpu_386) then
  220. begin
  221. TmpRef.index := taicpu(p).oper[1]^.reg;
  222. if (taicpu(p).ops = 3) then
  223. begin
  224. TmpRef.base := taicpu(p).oper[2]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  227. end
  228. else
  229. begin
  230. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  231. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  232. end;
  233. InsertLLItem(asml,p, p.next, hp1);
  234. reference_reset(tmpref,2);
  235. TmpRef.index := taicpu(p).oper[1]^.reg;
  236. TmpRef.ScaleFactor := 2;
  237. if (taicpu(p).ops = 3) then
  238. begin
  239. TmpRef.base := NR_NO;
  240. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  241. taicpu(p).oper[2]^.reg);
  242. end
  243. else
  244. begin
  245. TmpRef.base := taicpu(p).oper[1]^.reg;
  246. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  247. end;
  248. InsertLLItem(asml,p.previous, p.next, hp1);
  249. p.free;
  250. p := tai(hp1.next);
  251. end
  252. end;
  253. 9: begin
  254. {imul 9, reg1, reg2 to
  255. lea (reg1,reg1,8), reg2
  256. imul 9, reg1 to
  257. lea (reg1,reg1,8), reg1}
  258. TmpRef.base := taicpu(p).oper[1]^.reg;
  259. TmpRef.index := taicpu(p).oper[1]^.reg;
  260. TmpRef.ScaleFactor := 8;
  261. if (taicpu(p).ops = 2) then
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  263. else
  264. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  265. InsertLLItem(asml,p.previous, p.next, hp1);
  266. p.free;
  267. p := hp1;
  268. end;
  269. 10: begin
  270. {imul 10, reg1, reg2 to
  271. lea (reg1,reg1,4), reg2
  272. add reg2, reg2
  273. imul 10, reg1 to
  274. lea (reg1,reg1,4), reg1
  275. add reg1, reg1}
  276. if (current_settings.optimizecputype <= cpu_386) then
  277. begin
  278. if (taicpu(p).ops = 3) then
  279. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  280. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  281. else
  282. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  283. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  284. InsertLLItem(asml,p, p.next, hp1);
  285. TmpRef.base := taicpu(p).oper[1]^.reg;
  286. TmpRef.index := taicpu(p).oper[1]^.reg;
  287. TmpRef.ScaleFactor := 4;
  288. if (taicpu(p).ops = 3) then
  289. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  290. else
  291. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  292. InsertLLItem(asml,p.previous, p.next, hp1);
  293. p.free;
  294. p := tai(hp1.next);
  295. end
  296. end;
  297. 12: begin
  298. {imul 12, reg1, reg2 to
  299. lea (,reg1,4), reg2
  300. lea (,reg1,8) reg2
  301. imul 12, reg1 to
  302. lea (reg1,reg1,2), reg1
  303. lea (,reg1,4), reg1}
  304. if (current_settings.optimizecputype <= cpu_386)
  305. then
  306. begin
  307. TmpRef.index := taicpu(p).oper[1]^.reg;
  308. if (taicpu(p).ops = 3) then
  309. begin
  310. TmpRef.base := taicpu(p).oper[2]^.reg;
  311. TmpRef.ScaleFactor := 8;
  312. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  313. end
  314. else
  315. begin
  316. TmpRef.base := NR_NO;
  317. TmpRef.ScaleFactor := 4;
  318. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  319. end;
  320. InsertLLItem(asml,p, p.next, hp1);
  321. reference_reset(tmpref,2);
  322. TmpRef.index := taicpu(p).oper[1]^.reg;
  323. if (taicpu(p).ops = 3) then
  324. begin
  325. TmpRef.base := NR_NO;
  326. TmpRef.ScaleFactor := 4;
  327. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  328. end
  329. else
  330. begin
  331. TmpRef.base := taicpu(p).oper[1]^.reg;
  332. TmpRef.ScaleFactor := 2;
  333. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  334. end;
  335. InsertLLItem(asml,p.previous, p.next, hp1);
  336. p.free;
  337. p := tai(hp1.next);
  338. end
  339. end
  340. end;
  341. end;
  342. end;
  343. A_SAR, A_SHR:
  344. {changes the code sequence
  345. shr/sar const1, x
  346. shl const2, x
  347. to either "sar/and", "shl/and" or just "and" depending on const1 and const2}
  348. begin
  349. if GetNextInstruction(p, hp1) and
  350. (tai(hp1).typ = ait_instruction) and
  351. (taicpu(hp1).opcode = A_SHL) and
  352. (taicpu(p).oper[0]^.typ = top_const) and
  353. (taicpu(hp1).oper[0]^.typ = top_const) and
  354. (taicpu(hp1).opsize = taicpu(p).opsize) and
  355. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[1]^.typ) and
  356. OpsEqual(taicpu(hp1).oper[1]^, taicpu(p).oper[1]^) then
  357. if (taicpu(p).oper[0]^.val > taicpu(hp1).oper[0]^.val) and
  358. not(cs_opt_size in current_settings.optimizerswitches) then
  359. { shr/sar const1, %reg
  360. shl const2, %reg
  361. with const1 > const2 }
  362. begin
  363. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  364. taicpu(hp1).opcode := A_AND;
  365. l := (1 shl (taicpu(hp1).oper[0]^.val)) - 1;
  366. case taicpu(p).opsize Of
  367. S_L: taicpu(hp1).loadConst(0,l Xor aint($ffffffff));
  368. S_B: taicpu(hp1).loadConst(0,l Xor $ff);
  369. S_W: taicpu(hp1).loadConst(0,l Xor $ffff);
  370. end;
  371. end
  372. else if (taicpu(p).oper[0]^.val<taicpu(hp1).oper[0]^.val) and
  373. not(cs_opt_size in current_settings.optimizerswitches) then
  374. { shr/sar const1, %reg
  375. shl const2, %reg
  376. with const1 < const2 }
  377. begin
  378. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val-taicpu(p).oper[0]^.val);
  379. taicpu(p).opcode := A_AND;
  380. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  381. case taicpu(p).opsize Of
  382. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  383. S_B: taicpu(p).loadConst(0,l Xor $ff);
  384. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  385. end;
  386. end
  387. else
  388. { shr/sar const1, %reg
  389. shl const2, %reg
  390. with const1 = const2 }
  391. if (taicpu(p).oper[0]^.val = taicpu(hp1).oper[0]^.val) then
  392. begin
  393. taicpu(p).opcode := A_AND;
  394. l := (1 shl (taicpu(p).oper[0]^.val))-1;
  395. case taicpu(p).opsize Of
  396. S_B: taicpu(p).loadConst(0,l Xor $ff);
  397. S_W: taicpu(p).loadConst(0,l Xor $ffff);
  398. S_L: taicpu(p).loadConst(0,l Xor aint($ffffffff));
  399. end;
  400. asml.remove(hp1);
  401. hp1.free;
  402. end;
  403. end;
  404. A_XOR:
  405. if (taicpu(p).oper[0]^.typ = top_reg) and
  406. (taicpu(p).oper[1]^.typ = top_reg) and
  407. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  408. { temporarily change this to 'mov reg,0' to make it easier }
  409. { for the CSE. Will be changed back in pass 2 }
  410. begin
  411. taicpu(p).opcode := A_MOV;
  412. taicpu(p).loadConst(0,0);
  413. end;
  414. end;
  415. end;
  416. end;
  417. p := tai(p.next)
  418. end;
  419. end;
  420. procedure PeepHoleOptPass1(Asml: TAsmList; BlockStart, BlockEnd: tai);
  421. {First pass of peepholeoptimizations}
  422. var
  423. l : longint;
  424. p,hp1,hp2 : tai;
  425. hp3,hp4: tai;
  426. v:aint;
  427. TmpRef: TReference;
  428. UsedRegs, TmpUsedRegs: TRegSet;
  429. TmpBool1, TmpBool2: Boolean;
  430. function SkipLabels(hp: tai; var hp2: tai): boolean;
  431. {skips all labels and returns the next "real" instruction}
  432. begin
  433. while assigned(hp.next) and
  434. (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do
  435. hp := tai(hp.next);
  436. if assigned(hp.next) then
  437. begin
  438. SkipLabels := True;
  439. hp2 := tai(hp.next)
  440. end
  441. else
  442. begin
  443. hp2 := hp;
  444. SkipLabels := False
  445. end;
  446. end;
  447. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  448. {traces sucessive jumps to their final destination and sets it, e.g.
  449. je l1 je l3
  450. <code> <code>
  451. l1: becomes l1:
  452. je l2 je l3
  453. <code> <code>
  454. l2: l2:
  455. jmp l3 jmp l3
  456. the level parameter denotes how deeep we have already followed the jump,
  457. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  458. var p1, p2: tai;
  459. l: tasmlabel;
  460. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  461. begin
  462. FindAnyLabel := false;
  463. while assigned(hp.next) and
  464. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  465. hp := tai(hp.next);
  466. if assigned(hp.next) and
  467. (tai(hp.next).typ = ait_label) then
  468. begin
  469. FindAnyLabel := true;
  470. l := tai_label(hp.next).labsym;
  471. end
  472. end;
  473. begin
  474. GetfinalDestination := false;
  475. if level > 20 then
  476. exit;
  477. p1 := dfa.getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  478. if assigned(p1) then
  479. begin
  480. SkipLabels(p1,p1);
  481. if (tai(p1).typ = ait_instruction) and
  482. (taicpu(p1).is_jmp) then
  483. if { the next instruction after the label where the jump hp arrives}
  484. { is unconditional or of the same type as hp, so continue }
  485. (taicpu(p1).condition in [C_None,hp.condition]) or
  486. { the next instruction after the label where the jump hp arrives}
  487. { is the opposite of hp (so this one is never taken), but after }
  488. { that one there is a branch that will be taken, so perform a }
  489. { little hack: set p1 equal to this instruction (that's what the}
  490. { last SkipLabels is for, only works with short bool evaluation)}
  491. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  492. SkipLabels(p1,p2) and
  493. (p2.typ = ait_instruction) and
  494. (taicpu(p2).is_jmp) and
  495. (taicpu(p2).condition in [C_None,hp.condition]) and
  496. SkipLabels(p1,p1)) then
  497. begin
  498. { quick check for loops of the form "l5: ; jmp l5 }
  499. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  500. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  501. exit;
  502. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  503. exit;
  504. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  505. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  506. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  507. end
  508. else
  509. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  510. if not FindAnyLabel(p1,l) then
  511. begin
  512. {$ifdef finaldestdebug}
  513. insertllitem(asml,p1,p1.next,tai_comment.Create(
  514. strpnew('previous label inserted'))));
  515. {$endif finaldestdebug}
  516. current_asmdata.getjumplabel(l);
  517. insertllitem(asml,p1,p1.next,tai_label.Create(l));
  518. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  519. hp.oper[0]^.ref^.symbol := l;
  520. l.increfs;
  521. { this won't work, since the new label isn't in the labeltable }
  522. { so it will fail the rangecheck. Labeltable should become a }
  523. { hashtable to support this: }
  524. { GetFinalDestination(asml, hp); }
  525. end
  526. else
  527. begin
  528. {$ifdef finaldestdebug}
  529. insertllitem(asml,p1,p1.next,tai_comment.Create(
  530. strpnew('next label reused'))));
  531. {$endif finaldestdebug}
  532. l.increfs;
  533. hp.oper[0]^.ref^.symbol := l;
  534. if not GetFinalDestination(asml, hp,succ(level)) then
  535. exit;
  536. end;
  537. end;
  538. GetFinalDestination := true;
  539. end;
  540. function DoSubAddOpt(var p: tai): Boolean;
  541. begin
  542. DoSubAddOpt := False;
  543. if GetLastInstruction(p, hp1) and
  544. (hp1.typ = ait_instruction) and
  545. (taicpu(hp1).opsize = taicpu(p).opsize) then
  546. case taicpu(hp1).opcode Of
  547. A_DEC:
  548. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  549. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  550. begin
  551. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  552. asml.remove(hp1);
  553. hp1.free;
  554. end;
  555. A_SUB:
  556. if (taicpu(hp1).oper[0]^.typ = top_const) and
  557. (taicpu(hp1).oper[1]^.typ = top_reg) and
  558. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  559. begin
  560. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  561. asml.remove(hp1);
  562. hp1.free;
  563. end;
  564. A_ADD:
  565. if (taicpu(hp1).oper[0]^.typ = top_const) and
  566. (taicpu(hp1).oper[1]^.typ = top_reg) and
  567. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  568. begin
  569. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  570. asml.remove(hp1);
  571. hp1.free;
  572. if (taicpu(p).oper[0]^.val = 0) then
  573. begin
  574. hp1 := tai(p.next);
  575. asml.remove(p);
  576. p.free;
  577. if not GetLastInstruction(hp1, p) then
  578. p := hp1;
  579. DoSubAddOpt := True;
  580. end
  581. end;
  582. end;
  583. end;
  584. begin
  585. p := BlockStart;
  586. UsedRegs := [];
  587. while (p <> BlockEnd) Do
  588. begin
  589. UpDateUsedRegs(UsedRegs, tai(p.next));
  590. case p.Typ Of
  591. ait_instruction:
  592. begin
  593. if InsContainsSegRef(taicpu(p)) then
  594. begin
  595. p := tai(p.next);
  596. continue;
  597. end;
  598. { Handle Jmp Optimizations }
  599. if taicpu(p).is_jmp then
  600. begin
  601. {the following if-block removes all code between a jmp and the next label,
  602. because it can never be executed}
  603. if (taicpu(p).opcode = A_JMP) then
  604. begin
  605. while GetNextInstruction(p, hp1) and
  606. (hp1.typ <> ait_label) do
  607. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  608. begin
  609. asml.remove(hp1);
  610. hp1.free;
  611. end
  612. else break;
  613. end;
  614. { remove jumps to a label coming right after them }
  615. if GetNextInstruction(p, hp1) then
  616. begin
  617. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  618. { TODO: FIXME removing the first instruction fails}
  619. (p<>blockstart) then
  620. begin
  621. hp2:=tai(hp1.next);
  622. asml.remove(p);
  623. p.free;
  624. p:=hp2;
  625. continue;
  626. end
  627. else
  628. begin
  629. if hp1.typ = ait_label then
  630. SkipLabels(hp1,hp1);
  631. if (tai(hp1).typ=ait_instruction) and
  632. (taicpu(hp1).opcode=A_JMP) and
  633. GetNextInstruction(hp1, hp2) and
  634. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  635. begin
  636. if taicpu(p).opcode=A_Jcc then
  637. begin
  638. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  639. tai_label(hp2).labsym.decrefs;
  640. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  641. { when free'ing hp1, the ref. isn't decresed, so we don't
  642. increase it (FK)
  643. taicpu(p).oper[0]^.ref^.symbol.increfs;
  644. }
  645. asml.remove(hp1);
  646. hp1.free;
  647. GetFinalDestination(asml, taicpu(p),0);
  648. end
  649. else
  650. begin
  651. GetFinalDestination(asml, taicpu(p),0);
  652. p:=tai(p.next);
  653. continue;
  654. end;
  655. end
  656. else
  657. GetFinalDestination(asml, taicpu(p),0);
  658. end;
  659. end;
  660. end
  661. else
  662. { All other optimizes }
  663. begin
  664. for l := 0 to taicpu(p).ops-1 Do
  665. if (taicpu(p).oper[l]^.typ = top_ref) then
  666. With taicpu(p).oper[l]^.ref^ Do
  667. begin
  668. if (base = NR_NO) and
  669. (index <> NR_NO) and
  670. (scalefactor in [0,1]) then
  671. begin
  672. base := index;
  673. index := NR_NO
  674. end
  675. end;
  676. case taicpu(p).opcode Of
  677. A_AND:
  678. begin
  679. if (taicpu(p).oper[0]^.typ = top_const) and
  680. (taicpu(p).oper[1]^.typ = top_reg) and
  681. GetNextInstruction(p, hp1) and
  682. (tai(hp1).typ = ait_instruction) and
  683. (taicpu(hp1).opcode = A_AND) and
  684. (taicpu(hp1).oper[0]^.typ = top_const) and
  685. (taicpu(hp1).oper[1]^.typ = top_reg) and
  686. (taicpu(p).oper[1]^.reg = taicpu(hp1).oper[1]^.reg) then
  687. {change "and const1, reg; and const2, reg" to "and (const1 and const2), reg"}
  688. begin
  689. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val and taicpu(hp1).oper[0]^.val);
  690. asml.remove(hp1);
  691. hp1.free;
  692. end
  693. else
  694. {change "and x, reg; jxx" to "test x, reg", if reg is deallocated before the
  695. jump, but only if it's a conditional jump (PFV) }
  696. if (taicpu(p).oper[1]^.typ = top_reg) and
  697. GetNextInstruction(p, hp1) and
  698. (hp1.typ = ait_instruction) and
  699. (taicpu(hp1).is_jmp) and
  700. (taicpu(hp1).opcode<>A_JMP) and
  701. not(getsupreg(taicpu(p).oper[1]^.reg) in UsedRegs) then
  702. taicpu(p).opcode := A_TEST;
  703. end;
  704. A_CMP:
  705. begin
  706. { cmp register,$8000 neg register
  707. je target --> jo target
  708. .... only if register is deallocated before jump.}
  709. case Taicpu(p).opsize of
  710. S_B: v:=$80;
  711. S_W: v:=$8000;
  712. S_L: v:=aint($80000000);
  713. end;
  714. if (taicpu(p).oper[0]^.typ=Top_const) and
  715. (taicpu(p).oper[0]^.val=v) and
  716. (Taicpu(p).oper[1]^.typ=top_reg) and
  717. GetNextInstruction(p, hp1) and
  718. (hp1.typ=ait_instruction) and
  719. (taicpu(hp1).opcode=A_Jcc) and
  720. (Taicpu(hp1).condition in [C_E,C_NE]) and
  721. not(getsupreg(Taicpu(p).oper[1]^.reg) in usedregs) then
  722. begin
  723. Taicpu(p).opcode:=A_NEG;
  724. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  725. Taicpu(p).clearop(1);
  726. Taicpu(p).ops:=1;
  727. if Taicpu(hp1).condition=C_E then
  728. Taicpu(hp1).condition:=C_O
  729. else
  730. Taicpu(hp1).condition:=C_NO;
  731. continue;
  732. end;
  733. {
  734. @@2: @@2:
  735. .... ....
  736. cmp operand1,0
  737. jle/jbe @@1
  738. dec operand1 --> sub operand1,1
  739. jmp @@2 jge/jae @@2
  740. @@1: @@1:
  741. ... ....}
  742. if (taicpu(p).oper[0]^.typ = top_const) and
  743. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  744. (taicpu(p).oper[0]^.val = 0) and
  745. GetNextInstruction(p, hp1) and
  746. (hp1.typ = ait_instruction) and
  747. (taicpu(hp1).is_jmp) and
  748. (taicpu(hp1).opcode=A_Jcc) and
  749. (taicpu(hp1).condition in [C_LE,C_BE]) and
  750. GetNextInstruction(hp1,hp2) and
  751. (hp2.typ = ait_instruction) and
  752. (taicpu(hp2).opcode = A_DEC) and
  753. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  754. GetNextInstruction(hp2, hp3) and
  755. (hp3.typ = ait_instruction) and
  756. (taicpu(hp3).is_jmp) and
  757. (taicpu(hp3).opcode = A_JMP) and
  758. GetNextInstruction(hp3, hp4) and
  759. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  760. begin
  761. taicpu(hp2).Opcode := A_SUB;
  762. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  763. taicpu(hp2).loadConst(0,1);
  764. taicpu(hp2).ops:=2;
  765. taicpu(hp3).Opcode := A_Jcc;
  766. case taicpu(hp1).condition of
  767. C_LE: taicpu(hp3).condition := C_GE;
  768. C_BE: taicpu(hp3).condition := C_AE;
  769. end;
  770. asml.remove(p);
  771. asml.remove(hp1);
  772. p.free;
  773. hp1.free;
  774. p := hp2;
  775. continue;
  776. end
  777. end;
  778. A_FLD:
  779. begin
  780. if (taicpu(p).oper[0]^.typ = top_reg) and
  781. GetNextInstruction(p, hp1) and
  782. (hp1.typ = Ait_Instruction) and
  783. (taicpu(hp1).oper[0]^.typ = top_reg) and
  784. (taicpu(hp1).oper[1]^.typ = top_reg) and
  785. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  786. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  787. { change to
  788. fld reg fxxx reg,st
  789. fxxxp st, st1 (hp1)
  790. Remark: non commutative operations must be reversed!
  791. }
  792. begin
  793. case taicpu(hp1).opcode Of
  794. A_FMULP,A_FADDP,
  795. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  796. begin
  797. case taicpu(hp1).opcode Of
  798. A_FADDP: taicpu(hp1).opcode := A_FADD;
  799. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  800. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  801. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  802. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  803. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  804. end;
  805. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  806. taicpu(hp1).oper[1]^.reg := NR_ST;
  807. asml.remove(p);
  808. p.free;
  809. p := hp1;
  810. continue;
  811. end;
  812. end;
  813. end
  814. else
  815. if (taicpu(p).oper[0]^.typ = top_ref) and
  816. GetNextInstruction(p, hp2) and
  817. (hp2.typ = Ait_Instruction) and
  818. (taicpu(hp2).ops = 2) and
  819. (taicpu(hp2).oper[0]^.typ = top_reg) and
  820. (taicpu(hp2).oper[1]^.typ = top_reg) and
  821. (taicpu(p).opsize in [S_FS, S_FL]) and
  822. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  823. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  824. if GetLastInstruction(p, hp1) and
  825. (hp1.typ = Ait_Instruction) and
  826. ((taicpu(hp1).opcode = A_FLD) or
  827. (taicpu(hp1).opcode = A_FST)) and
  828. (taicpu(hp1).opsize = taicpu(p).opsize) and
  829. (taicpu(hp1).oper[0]^.typ = top_ref) and
  830. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  831. if ((taicpu(hp2).opcode = A_FMULP) or
  832. (taicpu(hp2).opcode = A_FADDP)) then
  833. { change to
  834. fld/fst mem1 (hp1) fld/fst mem1
  835. fld mem1 (p) fadd/
  836. faddp/ fmul st, st
  837. fmulp st, st1 (hp2) }
  838. begin
  839. asml.remove(p);
  840. p.free;
  841. p := hp1;
  842. if (taicpu(hp2).opcode = A_FADDP) then
  843. taicpu(hp2).opcode := A_FADD
  844. else
  845. taicpu(hp2).opcode := A_FMUL;
  846. taicpu(hp2).oper[1]^.reg := NR_ST;
  847. end
  848. else
  849. { change to
  850. fld/fst mem1 (hp1) fld/fst mem1
  851. fld mem1 (p) fld st}
  852. begin
  853. taicpu(p).changeopsize(S_FL);
  854. taicpu(p).loadreg(0,NR_ST);
  855. end
  856. else
  857. begin
  858. case taicpu(hp2).opcode Of
  859. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  860. { change to
  861. fld/fst mem1 (hp1) fld/fst mem1
  862. fld mem2 (p) fxxx mem2
  863. fxxxp st, st1 (hp2) }
  864. begin
  865. case taicpu(hp2).opcode Of
  866. A_FADDP: taicpu(p).opcode := A_FADD;
  867. A_FMULP: taicpu(p).opcode := A_FMUL;
  868. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  869. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  870. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  871. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  872. end;
  873. asml.remove(hp2);
  874. hp2.free;
  875. end
  876. end
  877. end
  878. end;
  879. A_FSTP,A_FISTP:
  880. if doFpuLoadStoreOpt(asmL,p) then
  881. continue;
  882. A_LEA:
  883. begin
  884. {removes seg register prefixes from LEA operations, as they
  885. don't do anything}
  886. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  887. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  888. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  889. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  890. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  891. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  892. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  893. (taicpu(p).oper[0]^.ref^.offset = 0) then
  894. begin
  895. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  896. taicpu(p).oper[1]^.reg);
  897. InsertLLItem(asml,p.previous,p.next, hp1);
  898. p.free;
  899. p := hp1;
  900. continue;
  901. end
  902. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  903. begin
  904. hp1 := tai(p.Next);
  905. asml.remove(p);
  906. p.free;
  907. p := hp1;
  908. continue;
  909. end
  910. else
  911. with taicpu(p).oper[0]^.ref^ do
  912. if (base = taicpu(p).oper[1]^.reg) then
  913. begin
  914. l := offset;
  915. if (l=1) then
  916. begin
  917. taicpu(p).opcode := A_INC;
  918. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  919. taicpu(p).ops := 1
  920. end
  921. else if (l=-1) then
  922. begin
  923. taicpu(p).opcode := A_DEC;
  924. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  925. taicpu(p).ops := 1;
  926. end
  927. else
  928. begin
  929. taicpu(p).opcode := A_ADD;
  930. taicpu(p).loadConst(0,l);
  931. end;
  932. end;
  933. end;
  934. A_MOV:
  935. begin
  936. TmpUsedRegs := UsedRegs;
  937. if (taicpu(p).oper[1]^.typ = top_reg) and
  938. (getsupreg(taicpu(p).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX, RS_ESI, RS_EDI]) and
  939. GetNextInstruction(p, hp1) and
  940. (tai(hp1).typ = ait_instruction) and
  941. (taicpu(hp1).opcode = A_MOV) and
  942. (taicpu(hp1).oper[0]^.typ = top_reg) and
  943. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  944. begin
  945. {we have "mov x, %treg; mov %treg, y}
  946. if not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg, hp1, TmpUsedRegs)) then
  947. {we've got "mov x, %treg; mov %treg, y; with %treg is not used after }
  948. case taicpu(p).oper[0]^.typ Of
  949. top_reg:
  950. begin
  951. { change "mov %reg, %treg; mov %treg, y"
  952. to "mov %reg, y" }
  953. taicpu(p).loadOper(1,taicpu(hp1).oper[1]^);
  954. asml.remove(hp1);
  955. hp1.free;
  956. continue;
  957. end;
  958. top_ref:
  959. if (taicpu(hp1).oper[1]^.typ = top_reg) then
  960. begin
  961. { change "mov mem, %treg; mov %treg, %reg"
  962. to "mov mem, %reg" }
  963. taicpu(p).loadoper(1,taicpu(hp1).oper[1]^);
  964. asml.remove(hp1);
  965. hp1.free;
  966. continue;
  967. end;
  968. end
  969. end
  970. else
  971. {Change "mov %reg1, %reg2; xxx %reg2, ???" to
  972. "mov %reg1, %reg2; xxx %reg1, ???" to avoid a write/read
  973. penalty}
  974. if (taicpu(p).oper[0]^.typ = top_reg) and
  975. (taicpu(p).oper[1]^.typ = top_reg) and
  976. GetNextInstruction(p,hp1) and
  977. (tai(hp1).typ = ait_instruction) and
  978. (taicpu(hp1).ops >= 1) and
  979. (taicpu(hp1).oper[0]^.typ = top_reg) and
  980. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  981. {we have "mov %reg1, %reg2; XXX %reg2, ???"}
  982. begin
  983. if ((taicpu(hp1).opcode = A_OR) or
  984. (taicpu(hp1).opcode = A_TEST)) and
  985. (taicpu(hp1).oper[1]^.typ = top_reg) and
  986. (taicpu(hp1).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) then
  987. {we have "mov %reg1, %reg2; test/or %reg2, %reg2"}
  988. begin
  989. TmpUsedRegs := UsedRegs;
  990. { reg1 will be used after the first instruction, }
  991. { so update the allocation info }
  992. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  993. if GetNextInstruction(hp1, hp2) and
  994. (hp2.typ = ait_instruction) and
  995. taicpu(hp2).is_jmp and
  996. not(RegUsedAfterInstruction(taicpu(hp1).oper[0]^.reg, hp1, TmpUsedRegs)) then
  997. { change "mov %reg1, %reg2; test/or %reg2, %reg2; jxx" to
  998. "test %reg1, %reg1; jxx" }
  999. begin
  1000. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1001. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1002. asml.remove(p);
  1003. p.free;
  1004. p := hp1;
  1005. continue
  1006. end
  1007. else
  1008. {change "mov %reg1, %reg2; test/or %reg2, %reg2" to
  1009. "mov %reg1, %reg2; test/or %reg1, %reg1"}
  1010. begin
  1011. taicpu(hp1).loadoper(0,taicpu(p).oper[0]^);
  1012. taicpu(hp1).loadoper(1,taicpu(p).oper[0]^);
  1013. end;
  1014. end
  1015. { else
  1016. if (taicpu(p.next)^.opcode
  1017. in [A_PUSH, A_OR, A_XOR, A_AND, A_TEST])}
  1018. {change "mov %reg1, %reg2; push/or/xor/... %reg2, ???" to
  1019. "mov %reg1, %reg2; push/or/xor/... %reg1, ???"}
  1020. end
  1021. else
  1022. {leave out the mov from "mov reg, x(%frame_pointer); leave/ret" (with
  1023. x >= RetOffset) as it doesn't do anything (it writes either to a
  1024. parameter or to the temporary storage room for the function
  1025. result)}
  1026. if GetNextInstruction(p, hp1) and
  1027. (tai(hp1).typ = ait_instruction) then
  1028. if ((taicpu(hp1).opcode = A_LEAVE) or
  1029. (taicpu(hp1).opcode = A_RET)) and
  1030. (taicpu(p).oper[1]^.typ = top_ref) and
  1031. (taicpu(p).oper[1]^.ref^.base = current_procinfo.FramePointer) and
  1032. not(assigned(current_procinfo.procdef.funcretsym) and
  1033. (taicpu(p).oper[1]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1034. (taicpu(p).oper[1]^.ref^.index = NR_NO) and
  1035. (taicpu(p).oper[0]^.typ = top_reg) then
  1036. begin
  1037. asml.remove(p);
  1038. p.free;
  1039. p := hp1;
  1040. RemoveLastDeallocForFuncRes(asmL,p);
  1041. end
  1042. else
  1043. if (taicpu(p).oper[0]^.typ = top_reg) and
  1044. (taicpu(p).oper[1]^.typ = top_ref) and
  1045. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1046. (taicpu(hp1).opcode = A_CMP) and
  1047. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1048. RefsEqual(taicpu(p).oper[1]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1049. {change "mov reg1, mem1; cmp x, mem1" to "mov reg, mem1; cmp x, reg1"}
  1050. begin
  1051. taicpu(hp1).loadreg(1,taicpu(p).oper[0]^.reg);
  1052. allocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1053. end;
  1054. { Next instruction is also a MOV ? }
  1055. if GetNextInstruction(p, hp1) and
  1056. (tai(hp1).typ = ait_instruction) and
  1057. (taicpu(hp1).opcode = A_MOV) and
  1058. (taicpu(hp1).opsize = taicpu(p).opsize) then
  1059. begin
  1060. if (taicpu(hp1).oper[0]^.typ = taicpu(p).oper[1]^.typ) and
  1061. (taicpu(hp1).oper[1]^.typ = taicpu(p).oper[0]^.typ) then
  1062. {mov reg1, mem1 or mov mem1, reg1
  1063. mov mem2, reg2 mov reg2, mem2}
  1064. begin
  1065. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) then
  1066. {mov reg1, mem1 or mov mem1, reg1
  1067. mov mem2, reg1 mov reg2, mem1}
  1068. begin
  1069. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) then
  1070. { Removes the second statement from
  1071. mov reg1, mem1/reg2
  1072. mov mem1/reg2, reg1 }
  1073. begin
  1074. if (taicpu(p).oper[0]^.typ = top_reg) then
  1075. AllocRegBetween(asmL,taicpu(p).oper[0]^.reg,p,hp1,usedregs);
  1076. asml.remove(hp1);
  1077. hp1.free;
  1078. end
  1079. else
  1080. begin
  1081. TmpUsedRegs := UsedRegs;
  1082. UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
  1083. if (taicpu(p).oper[1]^.typ = top_ref) and
  1084. { mov reg1, mem1
  1085. mov mem2, reg1 }
  1086. (taicpu(hp1).oper[0]^.ref^.refaddr = addr_no) and
  1087. GetNextInstruction(hp1, hp2) and
  1088. (hp2.typ = ait_instruction) and
  1089. (taicpu(hp2).opcode = A_CMP) and
  1090. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1091. (taicpu(hp2).oper[0]^.typ = TOp_Ref) and
  1092. (taicpu(hp2).oper[1]^.typ = TOp_Reg) and
  1093. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(p).oper[1]^.ref^) and
  1094. (taicpu(hp2).oper[1]^.reg= taicpu(p).oper[0]^.reg) and
  1095. not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, hp2, TmpUsedRegs)) then
  1096. { change to
  1097. mov reg1, mem1 mov reg1, mem1
  1098. mov mem2, reg1 cmp reg1, mem2
  1099. cmp mem1, reg1 }
  1100. begin
  1101. asml.remove(hp2);
  1102. hp2.free;
  1103. taicpu(hp1).opcode := A_CMP;
  1104. taicpu(hp1).loadref(1,taicpu(hp1).oper[0]^.ref^);
  1105. taicpu(hp1).loadreg(0,taicpu(p).oper[0]^.reg);
  1106. end;
  1107. end;
  1108. end
  1109. else
  1110. begin
  1111. tmpUsedRegs := UsedRegs;
  1112. if GetNextInstruction(hp1, hp2) and
  1113. (taicpu(p).oper[0]^.typ = top_ref) and
  1114. (taicpu(p).oper[1]^.typ = top_reg) and
  1115. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1116. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  1117. (taicpu(hp1).oper[1]^.typ = top_ref) and
  1118. (tai(hp2).typ = ait_instruction) and
  1119. (taicpu(hp2).opcode = A_MOV) and
  1120. (taicpu(hp2).opsize = taicpu(p).opsize) and
  1121. (taicpu(hp2).oper[1]^.typ = top_reg) and
  1122. (taicpu(hp2).oper[0]^.typ = top_ref) and
  1123. RefsEqual(taicpu(hp2).oper[0]^.ref^, taicpu(hp1).oper[1]^.ref^) then
  1124. if not regInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^) and
  1125. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,hp1,tmpUsedRegs)) then
  1126. { mov mem1, %reg1
  1127. mov %reg1, mem2
  1128. mov mem2, reg2
  1129. to:
  1130. mov mem1, reg2
  1131. mov reg2, mem2}
  1132. begin
  1133. AllocRegBetween(asmL,taicpu(hp2).oper[1]^.reg,p,hp2,usedregs);
  1134. taicpu(p).loadoper(1,taicpu(hp2).oper[1]^);
  1135. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1136. asml.remove(hp2);
  1137. hp2.free;
  1138. end
  1139. else
  1140. if (taicpu(p).oper[1]^.reg <> taicpu(hp2).oper[1]^.reg) and
  1141. not(RegInRef(getsupreg(taicpu(p).oper[1]^.reg),taicpu(p).oper[0]^.ref^)) and
  1142. not(RegInRef(getsupreg(taicpu(hp2).oper[1]^.reg),taicpu(hp2).oper[0]^.ref^)) then
  1143. { mov mem1, reg1 mov mem1, reg1
  1144. mov reg1, mem2 mov reg1, mem2
  1145. mov mem2, reg2 mov mem2, reg1
  1146. to: to:
  1147. mov mem1, reg1 mov mem1, reg1
  1148. mov mem1, reg2 mov reg1, mem2
  1149. mov reg1, mem2
  1150. or (if mem1 depends on reg1
  1151. and/or if mem2 depends on reg2)
  1152. to:
  1153. mov mem1, reg1
  1154. mov reg1, mem2
  1155. mov reg1, reg2
  1156. }
  1157. begin
  1158. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^);
  1159. taicpu(hp1).loadReg(1,taicpu(hp2).oper[1]^.reg);
  1160. taicpu(hp2).loadRef(1,taicpu(hp2).oper[0]^.ref^);
  1161. taicpu(hp2).loadReg(0,taicpu(p).oper[1]^.reg);
  1162. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1163. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  1164. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1165. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.base,p,hp2,usedregs);
  1166. if (taicpu(p).oper[0]^.ref^.index <> NR_NO) and
  1167. (getsupreg(taicpu(p).oper[0]^.ref^.index) in [RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]) then
  1168. allocRegBetween(asmL,taicpu(p).oper[0]^.ref^.index,p,hp2,usedregs);
  1169. end
  1170. else
  1171. if (taicpu(hp1).Oper[0]^.reg <> taicpu(hp2).Oper[1]^.reg) then
  1172. begin
  1173. taicpu(hp2).loadReg(0,taicpu(hp1).Oper[0]^.reg);
  1174. allocRegBetween(asmL,taicpu(p).oper[1]^.reg,p,hp2,usedregs);
  1175. end
  1176. else
  1177. begin
  1178. asml.remove(hp2);
  1179. hp2.free;
  1180. end
  1181. end
  1182. end
  1183. else
  1184. (* {movl [mem1],reg1
  1185. movl [mem1],reg2
  1186. to:
  1187. movl [mem1],reg1
  1188. movl reg1,reg2 }
  1189. if (taicpu(p).oper[0]^.typ = top_ref) and
  1190. (taicpu(p).oper[1]^.typ = top_reg) and
  1191. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1192. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1193. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1194. RefsEqual(TReference(taicpu(p).oper[0]^^),taicpu(hp1).oper[0]^^.ref^) and
  1195. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.base) and
  1196. (taicpu(p).oper[1]^.reg<>taicpu(hp1).oper[0]^^.ref^.index) then
  1197. taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg)
  1198. else*)
  1199. { movl const1,[mem1]
  1200. movl [mem1],reg1
  1201. to:
  1202. movl const1,reg1
  1203. movl reg1,[mem1] }
  1204. if (taicpu(p).oper[0]^.typ = top_const) and
  1205. (taicpu(p).oper[1]^.typ = top_ref) and
  1206. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1207. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1208. (taicpu(p).opsize = taicpu(hp1).opsize) and
  1209. RefsEqual(taicpu(hp1).oper[0]^.ref^,taicpu(p).oper[1]^.ref^) and
  1210. not(reginref(getsupreg(taicpu(hp1).oper[1]^.reg),taicpu(hp1).oper[0]^.ref^)) then
  1211. begin
  1212. allocregbetween(asml,taicpu(hp1).oper[1]^.reg,p,hp1,usedregs);
  1213. taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1214. taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
  1215. taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
  1216. end
  1217. end;
  1218. if GetNextInstruction(p, hp1) and
  1219. (Tai(hp1).typ = ait_instruction) and
  1220. ((Taicpu(hp1).opcode = A_BTS) or (Taicpu(hp1).opcode = A_BTR)) and
  1221. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1222. GetNextInstruction(hp1, hp2) and
  1223. (Tai(hp2).typ = ait_instruction) and
  1224. (Taicpu(hp2).opcode = A_OR) and
  1225. (Taicpu(hp1).opsize = Taicpu(p).opsize) and
  1226. (Taicpu(hp2).opsize = Taicpu(p).opsize) and
  1227. (Taicpu(p).oper[0]^.typ = top_const) and (Taicpu(p).oper[0]^.val=0) and
  1228. (Taicpu(p).oper[1]^.typ = top_reg) and
  1229. (Taicpu(hp1).oper[1]^.typ = top_reg) and
  1230. (Taicpu(p).oper[1]^.reg=Taicpu(hp1).oper[1]^.reg) and
  1231. (Taicpu(hp2).oper[1]^.typ = top_reg) and
  1232. (Taicpu(p).oper[1]^.reg=Taicpu(hp2).oper[1]^.reg) then
  1233. {mov reg1,0
  1234. bts reg1,operand1 --> mov reg1,operand2
  1235. or reg1,operand2 bts reg1,operand1}
  1236. begin
  1237. Taicpu(hp2).opcode:=A_MOV;
  1238. asml.remove(hp1);
  1239. insertllitem(asml,hp2,hp2.next,hp1);
  1240. asml.remove(p);
  1241. p.free;
  1242. end;
  1243. end;
  1244. A_MOVSX,
  1245. A_MOVZX :
  1246. begin
  1247. if (taicpu(p).oper[1]^.typ = top_reg) and
  1248. GetNextInstruction(p,hp1) and
  1249. (hp1.typ = ait_instruction) and
  1250. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1251. (getsupreg(taicpu(hp1).oper[0]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) and
  1252. GetNextInstruction(hp1,hp2) and
  1253. (hp2.typ = ait_instruction) and
  1254. (taicpu(hp2).opcode = A_MOV) and
  1255. (taicpu(hp2).oper[0]^.typ = top_reg) and
  1256. OpsEqual(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) then
  1257. { change movsX/movzX reg/ref, reg2 }
  1258. { add/sub/or/... reg3/$const, reg2 }
  1259. { mov reg2 reg/ref }
  1260. { to add/sub/or/... reg3/$const, reg/ref }
  1261. begin
  1262. { by example:
  1263. movswl %si,%eax movswl %si,%eax p
  1264. decl %eax addl %edx,%eax hp1
  1265. movw %ax,%si movw %ax,%si hp2
  1266. ->
  1267. movswl %si,%eax movswl %si,%eax p
  1268. decw %eax addw %edx,%eax hp1
  1269. movw %ax,%si movw %ax,%si hp2
  1270. }
  1271. taicpu(hp1).changeopsize(taicpu(hp2).opsize);
  1272. {
  1273. ->
  1274. movswl %si,%eax movswl %si,%eax p
  1275. decw %si addw %dx,%si hp1
  1276. movw %ax,%si movw %ax,%si hp2
  1277. }
  1278. case taicpu(hp1).ops of
  1279. 1:
  1280. taicpu(hp1).loadoper(0,taicpu(hp2).oper[1]^);
  1281. 2:
  1282. begin
  1283. taicpu(hp1).loadoper(1,taicpu(hp2).oper[1]^);
  1284. if (taicpu(hp1).oper[0]^.typ = top_reg) then
  1285. setsubreg(taicpu(hp1).oper[0]^.reg,getsubreg(taicpu(hp2).oper[0]^.reg));
  1286. end;
  1287. else
  1288. internalerror(2008042701);
  1289. end;
  1290. {
  1291. ->
  1292. decw %si addw %dx,%si p
  1293. }
  1294. asml.remove(p);
  1295. asml.remove(hp2);
  1296. p.free;
  1297. hp2.free;
  1298. p := hp1
  1299. end
  1300. { removes superfluous And's after movzx's }
  1301. else if taicpu(p).opcode=A_MOVZX then
  1302. begin
  1303. if (taicpu(p).oper[1]^.typ = top_reg) and
  1304. GetNextInstruction(p, hp1) and
  1305. (tai(hp1).typ = ait_instruction) and
  1306. (taicpu(hp1).opcode = A_AND) and
  1307. (taicpu(hp1).oper[0]^.typ = top_const) and
  1308. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1309. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1310. case taicpu(p).opsize Of
  1311. S_BL, S_BW:
  1312. if (taicpu(hp1).oper[0]^.val = $ff) then
  1313. begin
  1314. asml.remove(hp1);
  1315. hp1.free;
  1316. end;
  1317. S_WL:
  1318. if (taicpu(hp1).oper[0]^.val = $ffff) then
  1319. begin
  1320. asml.remove(hp1);
  1321. hp1.free;
  1322. end;
  1323. end;
  1324. {changes some movzx constructs to faster synonims (all examples
  1325. are given with eax/ax, but are also valid for other registers)}
  1326. if (taicpu(p).oper[1]^.typ = top_reg) then
  1327. if (taicpu(p).oper[0]^.typ = top_reg) then
  1328. case taicpu(p).opsize of
  1329. S_BW:
  1330. begin
  1331. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1332. not(cs_opt_size in current_settings.optimizerswitches) then
  1333. {Change "movzbw %al, %ax" to "andw $0x0ffh, %ax"}
  1334. begin
  1335. taicpu(p).opcode := A_AND;
  1336. taicpu(p).changeopsize(S_W);
  1337. taicpu(p).loadConst(0,$ff);
  1338. end
  1339. else if GetNextInstruction(p, hp1) and
  1340. (tai(hp1).typ = ait_instruction) and
  1341. (taicpu(hp1).opcode = A_AND) and
  1342. (taicpu(hp1).oper[0]^.typ = top_const) and
  1343. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1344. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1345. {Change "movzbw %reg1, %reg2; andw $const, %reg2"
  1346. to "movw %reg1, reg2; andw $(const1 and $ff), %reg2"}
  1347. begin
  1348. taicpu(p).opcode := A_MOV;
  1349. taicpu(p).changeopsize(S_W);
  1350. setsubreg(taicpu(p).oper[0]^.reg,R_SUBW);
  1351. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1352. end;
  1353. end;
  1354. S_BL:
  1355. begin
  1356. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1357. not(cs_opt_size in current_settings.optimizerswitches) then
  1358. {Change "movzbl %al, %eax" to "andl $0x0ffh, %eax"}
  1359. begin
  1360. taicpu(p).opcode := A_AND;
  1361. taicpu(p).changeopsize(S_L);
  1362. taicpu(p).loadConst(0,$ff)
  1363. end
  1364. else if GetNextInstruction(p, hp1) and
  1365. (tai(hp1).typ = ait_instruction) and
  1366. (taicpu(hp1).opcode = A_AND) and
  1367. (taicpu(hp1).oper[0]^.typ = top_const) and
  1368. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1369. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1370. {Change "movzbl %reg1, %reg2; andl $const, %reg2"
  1371. to "movl %reg1, reg2; andl $(const1 and $ff), %reg2"}
  1372. begin
  1373. taicpu(p).opcode := A_MOV;
  1374. taicpu(p).changeopsize(S_L);
  1375. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1376. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1377. end
  1378. end;
  1379. S_WL:
  1380. begin
  1381. if (getsupreg(taicpu(p).oper[0]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)) and
  1382. not(cs_opt_size in current_settings.optimizerswitches) then
  1383. {Change "movzwl %ax, %eax" to "andl $0x0ffffh, %eax"}
  1384. begin
  1385. taicpu(p).opcode := A_AND;
  1386. taicpu(p).changeopsize(S_L);
  1387. taicpu(p).loadConst(0,$ffff);
  1388. end
  1389. else if GetNextInstruction(p, hp1) and
  1390. (tai(hp1).typ = ait_instruction) and
  1391. (taicpu(hp1).opcode = A_AND) and
  1392. (taicpu(hp1).oper[0]^.typ = top_const) and
  1393. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1394. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1395. {Change "movzwl %reg1, %reg2; andl $const, %reg2"
  1396. to "movl %reg1, reg2; andl $(const1 and $ffff), %reg2"}
  1397. begin
  1398. taicpu(p).opcode := A_MOV;
  1399. taicpu(p).changeopsize(S_L);
  1400. setsubreg(taicpu(p).oper[0]^.reg,R_SUBWHOLE);
  1401. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1402. end;
  1403. end;
  1404. end
  1405. else if (taicpu(p).oper[0]^.typ = top_ref) then
  1406. begin
  1407. if GetNextInstruction(p, hp1) and
  1408. (tai(hp1).typ = ait_instruction) and
  1409. (taicpu(hp1).opcode = A_AND) and
  1410. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1411. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1412. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1413. begin
  1414. taicpu(p).opcode := A_MOV;
  1415. case taicpu(p).opsize Of
  1416. S_BL:
  1417. begin
  1418. taicpu(p).changeopsize(S_L);
  1419. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1420. end;
  1421. S_WL:
  1422. begin
  1423. taicpu(p).changeopsize(S_L);
  1424. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ffff);
  1425. end;
  1426. S_BW:
  1427. begin
  1428. taicpu(p).changeopsize(S_W);
  1429. taicpu(hp1).loadConst(0,taicpu(hp1).oper[0]^.val and $ff);
  1430. end;
  1431. end;
  1432. end;
  1433. end;
  1434. end;
  1435. end;
  1436. (* should not be generated anymore by the current code generator
  1437. A_POP:
  1438. begin
  1439. if target_info.system=system_i386_go32v2 then
  1440. begin
  1441. { Transform a series of pop/pop/pop/push/push/push to }
  1442. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  1443. { because I'm not sure whether they can cope with }
  1444. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  1445. { such a problem when using esp as frame pointer (JM) }
  1446. if (taicpu(p).oper[0]^.typ = top_reg) then
  1447. begin
  1448. hp1 := p;
  1449. hp2 := p;
  1450. l := 0;
  1451. while getNextInstruction(hp1,hp1) and
  1452. (hp1.typ = ait_instruction) and
  1453. (taicpu(hp1).opcode = A_POP) and
  1454. (taicpu(hp1).oper[0]^.typ = top_reg) do
  1455. begin
  1456. hp2 := hp1;
  1457. inc(l,4);
  1458. end;
  1459. getLastInstruction(p,hp3);
  1460. l1 := 0;
  1461. while (hp2 <> hp3) and
  1462. assigned(hp1) and
  1463. (hp1.typ = ait_instruction) and
  1464. (taicpu(hp1).opcode = A_PUSH) and
  1465. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1466. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  1467. begin
  1468. { change it to a two op operation }
  1469. taicpu(hp2).oper[1]^.typ:=top_none;
  1470. taicpu(hp2).ops:=2;
  1471. taicpu(hp2).opcode := A_MOV;
  1472. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  1473. reference_reset(tmpref);
  1474. tmpRef.base.enum:=R_INTREGISTER;
  1475. tmpRef.base.number:=NR_STACK_POINTER_REG;
  1476. convert_register_to_enum(tmpref.base);
  1477. tmpRef.offset := l;
  1478. taicpu(hp2).loadRef(0,tmpRef);
  1479. hp4 := hp1;
  1480. getNextInstruction(hp1,hp1);
  1481. asml.remove(hp4);
  1482. hp4.free;
  1483. getLastInstruction(hp2,hp2);
  1484. dec(l,4);
  1485. inc(l1);
  1486. end;
  1487. if l <> -4 then
  1488. begin
  1489. inc(l,4);
  1490. for l1 := l1 downto 1 do
  1491. begin
  1492. getNextInstruction(hp2,hp2);
  1493. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  1494. end
  1495. end
  1496. end
  1497. end
  1498. else
  1499. begin
  1500. if (taicpu(p).oper[0]^.typ = top_reg) and
  1501. GetNextInstruction(p, hp1) and
  1502. (tai(hp1).typ=ait_instruction) and
  1503. (taicpu(hp1).opcode=A_PUSH) and
  1504. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1505. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  1506. begin
  1507. { change it to a two op operation }
  1508. taicpu(p).oper[1]^.typ:=top_none;
  1509. taicpu(p).ops:=2;
  1510. taicpu(p).opcode := A_MOV;
  1511. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1512. reference_reset(tmpref);
  1513. TmpRef.base.enum := R_ESP;
  1514. taicpu(p).loadRef(0,TmpRef);
  1515. asml.remove(hp1);
  1516. hp1.free;
  1517. end;
  1518. end;
  1519. end;
  1520. *)
  1521. A_PUSH:
  1522. begin
  1523. if (taicpu(p).opsize = S_W) and
  1524. (taicpu(p).oper[0]^.typ = Top_Const) and
  1525. GetNextInstruction(p, hp1) and
  1526. (tai(hp1).typ = ait_instruction) and
  1527. (taicpu(hp1).opcode = A_PUSH) and
  1528. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1529. (taicpu(hp1).opsize = S_W) then
  1530. begin
  1531. taicpu(p).changeopsize(S_L);
  1532. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1533. asml.remove(hp1);
  1534. hp1.free;
  1535. end;
  1536. end;
  1537. A_SHL, A_SAL:
  1538. begin
  1539. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1540. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1541. (taicpu(p).opsize = S_L) and
  1542. (taicpu(p).oper[0]^.val <= 3) then
  1543. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1544. begin
  1545. TmpBool1 := True; {should we check the next instruction?}
  1546. TmpBool2 := False; {have we found an add/sub which could be
  1547. integrated in the lea?}
  1548. reference_reset(tmpref,2);
  1549. TmpRef.index := taicpu(p).oper[1]^.reg;
  1550. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1551. while TmpBool1 and
  1552. GetNextInstruction(p, hp1) and
  1553. (tai(hp1).typ = ait_instruction) and
  1554. ((((taicpu(hp1).opcode = A_ADD) or
  1555. (taicpu(hp1).opcode = A_SUB)) and
  1556. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1557. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1558. (((taicpu(hp1).opcode = A_INC) or
  1559. (taicpu(hp1).opcode = A_DEC)) and
  1560. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1561. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1562. (not GetNextInstruction(hp1,hp2) or
  1563. not instrReadsFlags(hp2)) Do
  1564. begin
  1565. TmpBool1 := False;
  1566. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1567. begin
  1568. TmpBool1 := True;
  1569. TmpBool2 := True;
  1570. case taicpu(hp1).opcode of
  1571. A_ADD:
  1572. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1573. A_SUB:
  1574. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1575. end;
  1576. asml.remove(hp1);
  1577. hp1.free;
  1578. end
  1579. else
  1580. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1581. (((taicpu(hp1).opcode = A_ADD) and
  1582. (TmpRef.base = NR_NO)) or
  1583. (taicpu(hp1).opcode = A_INC) or
  1584. (taicpu(hp1).opcode = A_DEC)) then
  1585. begin
  1586. TmpBool1 := True;
  1587. TmpBool2 := True;
  1588. case taicpu(hp1).opcode of
  1589. A_ADD:
  1590. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1591. A_INC:
  1592. inc(TmpRef.offset);
  1593. A_DEC:
  1594. dec(TmpRef.offset);
  1595. end;
  1596. asml.remove(hp1);
  1597. hp1.free;
  1598. end;
  1599. end;
  1600. if TmpBool2 or
  1601. ((current_settings.optimizecputype < cpu_Pentium2) and
  1602. (taicpu(p).oper[0]^.val <= 3) and
  1603. not(cs_opt_size in current_settings.optimizerswitches)) then
  1604. begin
  1605. if not(TmpBool2) and
  1606. (taicpu(p).oper[0]^.val = 1) then
  1607. begin
  1608. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1609. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1610. end
  1611. else
  1612. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1613. taicpu(p).oper[1]^.reg);
  1614. InsertLLItem(asml,p.previous, p.next, hp1);
  1615. p.free;
  1616. p := hp1;
  1617. end;
  1618. end
  1619. else
  1620. if (current_settings.optimizecputype < cpu_Pentium2) and
  1621. (taicpu(p).oper[0]^.typ = top_const) and
  1622. (taicpu(p).oper[1]^.typ = top_reg) then
  1623. if (taicpu(p).oper[0]^.val = 1) then
  1624. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1625. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1626. (unlike shl, which is only Tairable in the U pipe)}
  1627. begin
  1628. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1629. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1630. InsertLLItem(asml,p.previous, p.next, hp1);
  1631. p.free;
  1632. p := hp1;
  1633. end
  1634. else if (taicpu(p).opsize = S_L) and
  1635. (taicpu(p).oper[0]^.val<= 3) then
  1636. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1637. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1638. begin
  1639. reference_reset(tmpref,2);
  1640. TmpRef.index := taicpu(p).oper[1]^.reg;
  1641. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1642. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1643. InsertLLItem(asml,p.previous, p.next, hp1);
  1644. p.free;
  1645. p := hp1;
  1646. end
  1647. end;
  1648. A_SETcc :
  1649. { changes
  1650. setcc (funcres) setcc reg
  1651. movb (funcres), reg to leave/ret
  1652. leave/ret }
  1653. begin
  1654. if (taicpu(p).oper[0]^.typ = top_ref) and
  1655. GetNextInstruction(p, hp1) and
  1656. GetNextInstruction(hp1, hp2) and
  1657. (hp2.typ = ait_instruction) and
  1658. ((taicpu(hp2).opcode = A_LEAVE) or
  1659. (taicpu(hp2).opcode = A_RET)) and
  1660. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1661. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1662. not(assigned(current_procinfo.procdef.funcretsym) and
  1663. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1664. (hp1.typ = ait_instruction) and
  1665. (taicpu(hp1).opcode = A_MOV) and
  1666. (taicpu(hp1).opsize = S_B) and
  1667. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1668. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1669. begin
  1670. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1671. asml.remove(hp1);
  1672. hp1.free;
  1673. end
  1674. end;
  1675. A_SUB:
  1676. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1677. { * change "sub/add const1, reg" or "dec reg" followed by
  1678. "sub const2, reg" to one "sub ..., reg" }
  1679. begin
  1680. if (taicpu(p).oper[0]^.typ = top_const) and
  1681. (taicpu(p).oper[1]^.typ = top_reg) then
  1682. if (taicpu(p).oper[0]^.val = 2) and
  1683. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1684. { Don't do the sub/push optimization if the sub }
  1685. { comes from setting up the stack frame (JM) }
  1686. (not getLastInstruction(p,hp1) or
  1687. (hp1.typ <> ait_instruction) or
  1688. (taicpu(hp1).opcode <> A_MOV) or
  1689. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1690. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1691. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1692. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1693. begin
  1694. hp1 := tai(p.next);
  1695. while Assigned(hp1) and
  1696. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1697. not regReadByInstruction(RS_ESP,hp1) and
  1698. not regModifiedByInstruction(RS_ESP,hp1) do
  1699. hp1 := tai(hp1.next);
  1700. if Assigned(hp1) and
  1701. (tai(hp1).typ = ait_instruction) and
  1702. (taicpu(hp1).opcode = A_PUSH) and
  1703. (taicpu(hp1).opsize = S_W) then
  1704. begin
  1705. taicpu(hp1).changeopsize(S_L);
  1706. if taicpu(hp1).oper[0]^.typ=top_reg then
  1707. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1708. hp1 := tai(p.next);
  1709. asml.remove(p);
  1710. p.free;
  1711. p := hp1;
  1712. continue
  1713. end;
  1714. if DoSubAddOpt(p) then
  1715. continue;
  1716. end
  1717. else if DoSubAddOpt(p) then
  1718. continue
  1719. end;
  1720. end;
  1721. end; { if is_jmp }
  1722. end;
  1723. end;
  1724. updateUsedRegs(UsedRegs,p);
  1725. p:=tai(p.next);
  1726. end;
  1727. end;
  1728. procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
  1729. function CanBeCMOV(p : tai) : boolean;
  1730. begin
  1731. CanBeCMOV:=assigned(p) and (p.typ=ait_instruction) and
  1732. (taicpu(p).opcode=A_MOV) and
  1733. (taicpu(p).opsize in [S_L,S_W]) and
  1734. ((taicpu(p).oper[0]^.typ = top_reg)
  1735. { we can't use cmov ref,reg because
  1736. ref could be nil and cmov still throws an exception
  1737. if ref=nil but the mov isn't done (FK)
  1738. or ((taicpu(p).oper[0]^.typ = top_ref) and
  1739. (taicpu(p).oper[0]^.ref^.refaddr = addr_no))
  1740. }
  1741. ) and
  1742. (taicpu(p).oper[1]^.typ in [top_reg]);
  1743. end;
  1744. var
  1745. p,hp1,hp2: tai;
  1746. l : longint;
  1747. condition : tasmcond;
  1748. hp3: tai;
  1749. UsedRegs, TmpUsedRegs: TRegSet;
  1750. carryadd_opcode: Tasmop;
  1751. begin
  1752. p := BlockStart;
  1753. UsedRegs := [];
  1754. while (p <> BlockEnd) Do
  1755. begin
  1756. UpdateUsedRegs(UsedRegs, tai(p.next));
  1757. case p.Typ Of
  1758. Ait_Instruction:
  1759. begin
  1760. if InsContainsSegRef(taicpu(p)) then
  1761. begin
  1762. p := tai(p.next);
  1763. continue;
  1764. end;
  1765. case taicpu(p).opcode Of
  1766. A_Jcc:
  1767. begin
  1768. { jb @@1 cmc
  1769. inc/dec operand --> adc/sbb operand,0
  1770. @@1:
  1771. ... and ...
  1772. jnb @@1
  1773. inc/dec operand --> adc/sbb operand,0
  1774. @@1: }
  1775. if GetNextInstruction(p,hp1) and (hp1.typ=ait_instruction) and
  1776. GetNextInstruction(hp1,hp2) and (hp2.typ=ait_label) and
  1777. (Tasmlabel(Taicpu(p).oper[0]^.ref^.symbol)=Tai_label(hp2).labsym) then
  1778. begin
  1779. carryadd_opcode:=A_NONE;
  1780. if Taicpu(p).condition in [C_NAE,C_B] then
  1781. begin
  1782. if Taicpu(hp1).opcode=A_INC then
  1783. carryadd_opcode:=A_ADC;
  1784. if Taicpu(hp1).opcode=A_DEC then
  1785. carryadd_opcode:=A_SBB;
  1786. if carryadd_opcode<>A_NONE then
  1787. begin
  1788. Taicpu(p).clearop(0);
  1789. Taicpu(p).ops:=0;
  1790. Taicpu(p).is_jmp:=false;
  1791. Taicpu(p).opcode:=A_CMC;
  1792. Taicpu(p).condition:=C_NONE;
  1793. Taicpu(hp1).ops:=2;
  1794. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1795. Taicpu(hp1).loadconst(0,0);
  1796. Taicpu(hp1).opcode:=carryadd_opcode;
  1797. continue;
  1798. end;
  1799. end;
  1800. if Taicpu(p).condition in [C_AE,C_NB] then
  1801. begin
  1802. if Taicpu(hp1).opcode=A_INC then
  1803. carryadd_opcode:=A_ADC;
  1804. if Taicpu(hp1).opcode=A_DEC then
  1805. carryadd_opcode:=A_SBB;
  1806. if carryadd_opcode<>A_NONE then
  1807. begin
  1808. asml.remove(p);
  1809. p.free;
  1810. Taicpu(hp1).ops:=2;
  1811. Taicpu(hp1).loadoper(1,Taicpu(hp1).oper[0]^);
  1812. Taicpu(hp1).loadconst(0,0);
  1813. Taicpu(hp1).opcode:=carryadd_opcode;
  1814. p:=hp1;
  1815. continue;
  1816. end;
  1817. end;
  1818. end;
  1819. if (current_settings.cputype>=cpu_Pentium2) then
  1820. begin
  1821. { check for
  1822. jCC xxx
  1823. <several movs>
  1824. xxx:
  1825. }
  1826. l:=0;
  1827. GetNextInstruction(p, hp1);
  1828. while assigned(hp1) and
  1829. CanBeCMOV(hp1) and
  1830. { stop on labels }
  1831. not(hp1.typ=ait_label) do
  1832. begin
  1833. inc(l);
  1834. GetNextInstruction(hp1,hp1);
  1835. end;
  1836. if assigned(hp1) then
  1837. begin
  1838. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1839. begin
  1840. if (l<=4) and (l>0) then
  1841. begin
  1842. condition:=inverse_cond(taicpu(p).condition);
  1843. hp2:=p;
  1844. GetNextInstruction(p,hp1);
  1845. p:=hp1;
  1846. repeat
  1847. taicpu(hp1).opcode:=A_CMOVcc;
  1848. taicpu(hp1).condition:=condition;
  1849. GetNextInstruction(hp1,hp1);
  1850. until not(assigned(hp1)) or
  1851. not(CanBeCMOV(hp1));
  1852. { wait with removing else GetNextInstruction could
  1853. ignore the label if it was the only usage in the
  1854. jump moved away }
  1855. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1856. asml.remove(hp2);
  1857. hp2.free;
  1858. continue;
  1859. end;
  1860. end
  1861. else
  1862. begin
  1863. { check further for
  1864. jCC xxx
  1865. <several movs 1>
  1866. jmp yyy
  1867. xxx:
  1868. <several movs 2>
  1869. yyy:
  1870. }
  1871. { hp2 points to jmp yyy }
  1872. hp2:=hp1;
  1873. { skip hp1 to xxx }
  1874. GetNextInstruction(hp1, hp1);
  1875. if assigned(hp2) and
  1876. assigned(hp1) and
  1877. (l<=3) and
  1878. (hp2.typ=ait_instruction) and
  1879. (taicpu(hp2).is_jmp) and
  1880. (taicpu(hp2).condition=C_None) and
  1881. { real label and jump, no further references to the
  1882. label are allowed }
  1883. (tasmlabel(taicpu(p).oper[0]^.ref^.symbol).getrefs=2) and
  1884. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
  1885. begin
  1886. l:=0;
  1887. { skip hp1 to <several moves 2> }
  1888. GetNextInstruction(hp1, hp1);
  1889. while assigned(hp1) and
  1890. CanBeCMOV(hp1) do
  1891. begin
  1892. inc(l);
  1893. GetNextInstruction(hp1, hp1);
  1894. end;
  1895. { hp1 points to yyy: }
  1896. if assigned(hp1) and
  1897. FindLabel(tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol),hp1) then
  1898. begin
  1899. condition:=inverse_cond(taicpu(p).condition);
  1900. GetNextInstruction(p,hp1);
  1901. hp3:=p;
  1902. p:=hp1;
  1903. repeat
  1904. taicpu(hp1).opcode:=A_CMOVcc;
  1905. taicpu(hp1).condition:=condition;
  1906. GetNextInstruction(hp1,hp1);
  1907. until not(assigned(hp1)) or
  1908. not(CanBeCMOV(hp1));
  1909. { hp2 is still at jmp yyy }
  1910. GetNextInstruction(hp2,hp1);
  1911. { hp2 is now at xxx: }
  1912. condition:=inverse_cond(condition);
  1913. GetNextInstruction(hp1,hp1);
  1914. { hp1 is now at <several movs 2> }
  1915. repeat
  1916. taicpu(hp1).opcode:=A_CMOVcc;
  1917. taicpu(hp1).condition:=condition;
  1918. GetNextInstruction(hp1,hp1);
  1919. until not(assigned(hp1)) or
  1920. not(CanBeCMOV(hp1));
  1921. {
  1922. asml.remove(hp1.next)
  1923. hp1.next.free;
  1924. asml.remove(hp1);
  1925. hp1.free;
  1926. }
  1927. { remove jCC }
  1928. tasmlabel(taicpu(hp3).oper[0]^.ref^.symbol).decrefs;
  1929. asml.remove(hp3);
  1930. hp3.free;
  1931. { remove jmp }
  1932. tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
  1933. asml.remove(hp2);
  1934. hp2.free;
  1935. continue;
  1936. end;
  1937. end;
  1938. end;
  1939. end;
  1940. end;
  1941. end;
  1942. A_FSTP,A_FISTP:
  1943. if doFpuLoadStoreOpt(asmL,p) then
  1944. continue;
  1945. A_IMUL:
  1946. begin
  1947. if (taicpu(p).ops >= 2) and
  1948. ((taicpu(p).oper[0]^.typ = top_const) or
  1949. ((taicpu(p).oper[0]^.typ = top_ref) and (taicpu(p).oper[0]^.ref^.refaddr=addr_full))) and
  1950. (taicpu(p).oper[1]^.typ = top_reg) and
  1951. ((taicpu(p).ops = 2) or
  1952. ((taicpu(p).oper[2]^.typ = top_reg) and
  1953. (taicpu(p).oper[2]^.reg = taicpu(p).oper[1]^.reg))) and
  1954. getLastInstruction(p,hp1) and
  1955. (hp1.typ = ait_instruction) and
  1956. (taicpu(hp1).opcode = A_MOV) and
  1957. (taicpu(hp1).oper[0]^.typ = top_reg) and
  1958. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1959. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  1960. { change "mov reg1,reg2; imul y,reg2" to "imul y,reg1,reg2" }
  1961. begin
  1962. taicpu(p).ops := 3;
  1963. taicpu(p).loadreg(1,taicpu(hp1).oper[0]^.reg);
  1964. taicpu(p).loadreg(2,taicpu(hp1).oper[1]^.reg);
  1965. asml.remove(hp1);
  1966. hp1.free;
  1967. end;
  1968. end;
  1969. A_MOV:
  1970. begin
  1971. if (taicpu(p).oper[0]^.typ = top_reg) and
  1972. (taicpu(p).oper[1]^.typ = top_reg) and
  1973. GetNextInstruction(p, hp1) and
  1974. (hp1.typ = ait_Instruction) and
  1975. ((taicpu(hp1).opcode = A_MOV) or
  1976. (taicpu(hp1).opcode = A_MOVZX) or
  1977. (taicpu(hp1).opcode = A_MOVSX)) and
  1978. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1979. (taicpu(hp1).oper[1]^.typ = top_reg) and
  1980. ((taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) or
  1981. (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg)) and
  1982. (getsupreg(taicpu(hp1).oper[1]^.reg) = getsupreg(taicpu(p).oper[1]^.reg)) then
  1983. {mov reg1, reg2
  1984. mov/zx/sx (reg2, ..), reg2 to mov/zx/sx (reg1, ..), reg2}
  1985. begin
  1986. if (taicpu(hp1).oper[0]^.ref^.base = taicpu(p).oper[1]^.reg) then
  1987. taicpu(hp1).oper[0]^.ref^.base := taicpu(p).oper[0]^.reg;
  1988. if (taicpu(hp1).oper[0]^.ref^.index = taicpu(p).oper[1]^.reg) then
  1989. taicpu(hp1).oper[0]^.ref^.index := taicpu(p).oper[0]^.reg;
  1990. asml.remove(p);
  1991. p.free;
  1992. p := hp1;
  1993. continue;
  1994. end
  1995. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1996. GetNextInstruction(p,hp1) and
  1997. (hp1.typ = ait_instruction) and
  1998. IsFoldableArithOp(taicpu(hp1),taicpu(p).oper[1]^.reg) and
  1999. GetNextInstruction(hp1,hp2) and
  2000. (hp2.typ = ait_instruction) and
  2001. (taicpu(hp2).opcode = A_MOV) and
  2002. (taicpu(hp2).oper[0]^.typ = top_reg) and
  2003. (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
  2004. (taicpu(hp2).oper[1]^.typ = top_ref) then
  2005. begin
  2006. TmpUsedRegs := UsedRegs;
  2007. UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
  2008. if (RefsEqual(taicpu(hp2).oper[1]^.ref^, taicpu(p).oper[0]^.ref^) and
  2009. not(RegUsedAfterInstruction(taicpu(p).oper[1]^.reg,
  2010. hp2, TmpUsedRegs))) then
  2011. { change mov (ref), reg }
  2012. { add/sub/or/... reg2/$const, reg }
  2013. { mov reg, (ref) }
  2014. { # release reg }
  2015. { to add/sub/or/... reg2/$const, (ref) }
  2016. begin
  2017. case taicpu(hp1).opcode of
  2018. A_INC,A_DEC:
  2019. taicpu(hp1).loadRef(0,taicpu(p).oper[0]^.ref^)
  2020. else
  2021. taicpu(hp1).loadRef(1,taicpu(p).oper[0]^.ref^);
  2022. end;
  2023. asml.remove(p);
  2024. asml.remove(hp2);
  2025. p.free;
  2026. hp2.free;
  2027. p := hp1
  2028. end;
  2029. end
  2030. end;
  2031. end;
  2032. end;
  2033. end;
  2034. p := tai(p.next)
  2035. end;
  2036. end;
  2037. procedure PostPeepHoleOpts(asml: TAsmList; BlockStart, BlockEnd: tai);
  2038. var
  2039. p,hp1,hp2: tai;
  2040. begin
  2041. p := BlockStart;
  2042. while (p <> BlockEnd) Do
  2043. begin
  2044. case p.Typ Of
  2045. Ait_Instruction:
  2046. begin
  2047. if InsContainsSegRef(taicpu(p)) then
  2048. begin
  2049. p := tai(p.next);
  2050. continue;
  2051. end;
  2052. case taicpu(p).opcode Of
  2053. A_CALL:
  2054. if (current_settings.optimizecputype < cpu_Pentium2) and
  2055. not(cs_create_pic in current_settings.moduleswitches) and
  2056. GetNextInstruction(p, hp1) and
  2057. (hp1.typ = ait_instruction) and
  2058. (taicpu(hp1).opcode = A_JMP) and
  2059. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  2060. begin
  2061. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  2062. InsertLLItem(asml, p.previous, p, hp2);
  2063. taicpu(p).opcode := A_JMP;
  2064. taicpu(p).is_jmp := true;
  2065. asml.remove(hp1);
  2066. hp1.free;
  2067. end;
  2068. A_CMP:
  2069. begin
  2070. if (taicpu(p).oper[0]^.typ = top_const) and
  2071. (taicpu(p).oper[0]^.val = 0) and
  2072. (taicpu(p).oper[1]^.typ = top_reg) then
  2073. {change "cmp $0, %reg" to "test %reg, %reg"}
  2074. begin
  2075. taicpu(p).opcode := A_TEST;
  2076. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  2077. continue;
  2078. end;
  2079. end;
  2080. (*
  2081. Optimization is not safe; xor clears the carry flag.
  2082. See test/tgadint64 in the test suite.
  2083. A_MOV:
  2084. if (taicpu(p).oper[0]^.typ = Top_Const) and
  2085. (taicpu(p).oper[0]^.val = 0) and
  2086. (taicpu(p).oper[1]^.typ = Top_Reg) then
  2087. { change "mov $0, %reg" into "xor %reg, %reg" }
  2088. begin
  2089. taicpu(p).opcode := A_XOR;
  2090. taicpu(p).loadReg(0,taicpu(p).oper[1]^.reg);
  2091. end;
  2092. *)
  2093. A_MOVZX:
  2094. { if register vars are on, it's possible there is code like }
  2095. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  2096. { so we can't safely replace the movzx then with xor/mov, }
  2097. { since that would change the flags (JM) }
  2098. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  2099. begin
  2100. if (taicpu(p).oper[1]^.typ = top_reg) then
  2101. if (taicpu(p).oper[0]^.typ = top_reg)
  2102. then
  2103. case taicpu(p).opsize of
  2104. S_BL:
  2105. begin
  2106. if IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2107. not(cs_opt_size in current_settings.optimizerswitches) and
  2108. (current_settings.optimizecputype = cpu_Pentium) then
  2109. {Change "movzbl %reg1, %reg2" to
  2110. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  2111. PentiumMMX}
  2112. begin
  2113. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  2114. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  2115. InsertLLItem(asml,p.previous, p, hp1);
  2116. taicpu(p).opcode := A_MOV;
  2117. taicpu(p).changeopsize(S_B);
  2118. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2119. end;
  2120. end;
  2121. end
  2122. else if (taicpu(p).oper[0]^.typ = top_ref) and
  2123. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  2124. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  2125. not(cs_opt_size in current_settings.optimizerswitches) and
  2126. IsGP32Reg(getsupreg(taicpu(p).oper[1]^.reg)) and
  2127. (current_settings.optimizecputype = cpu_Pentium) and
  2128. (taicpu(p).opsize = S_BL) then
  2129. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  2130. Pentium and PentiumMMX}
  2131. begin
  2132. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  2133. taicpu(p).oper[1]^.reg);
  2134. taicpu(p).opcode := A_MOV;
  2135. taicpu(p).changeopsize(S_B);
  2136. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  2137. InsertLLItem(asml,p.previous, p, hp1);
  2138. end;
  2139. end;
  2140. A_TEST, A_OR:
  2141. {removes the line marked with (x) from the sequence
  2142. and/or/xor/add/sub/... $x, %y
  2143. test/or %y, %y (x)
  2144. j(n)z _Label
  2145. as the first instruction already adjusts the ZF}
  2146. begin
  2147. if OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then
  2148. if GetLastInstruction(p, hp1) and
  2149. (tai(hp1).typ = ait_instruction) and
  2150. GetNextInstruction(p,hp2) and
  2151. (hp2.typ = ait_instruction) and
  2152. ((taicpu(hp2).opcode = A_SETcc) or
  2153. (taicpu(hp2).opcode = A_Jcc) or
  2154. (taicpu(hp2).opcode = A_CMOVcc)) then
  2155. case taicpu(hp1).opcode Of
  2156. A_ADD, A_SUB, A_OR, A_XOR, A_AND{, A_SHL, A_SHR}:
  2157. begin
  2158. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[0]^) and
  2159. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2160. { and in case of carry for A(E)/B(E)/C/NC }
  2161. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  2162. ((taicpu(hp1).opcode <> A_ADD) and
  2163. (taicpu(hp1).opcode <> A_SUB))) then
  2164. begin
  2165. hp1 := tai(p.next);
  2166. asml.remove(p);
  2167. p.free;
  2168. p := tai(hp1);
  2169. continue
  2170. end;
  2171. end;
  2172. A_DEC, A_INC, A_NEG:
  2173. begin
  2174. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
  2175. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  2176. { and in case of carry for A(E)/B(E)/C/NC }
  2177. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  2178. begin
  2179. case taicpu(hp1).opcode Of
  2180. A_DEC, A_INC:
  2181. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  2182. begin
  2183. case taicpu(hp1).opcode Of
  2184. A_DEC: taicpu(hp1).opcode := A_SUB;
  2185. A_INC: taicpu(hp1).opcode := A_ADD;
  2186. end;
  2187. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  2188. taicpu(hp1).loadConst(0,1);
  2189. taicpu(hp1).ops:=2;
  2190. end
  2191. end;
  2192. hp1 := tai(p.next);
  2193. asml.remove(p);
  2194. p.free;
  2195. p := tai(hp1);
  2196. continue
  2197. end;
  2198. end
  2199. end
  2200. end;
  2201. end;
  2202. end;
  2203. end;
  2204. p := tai(p.next)
  2205. end;
  2206. end;
  2207. end.