aoptcpu.pas 73 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl and Jonas Maebe
  3. This unit contains the peephole optimizer for i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit aoptcpu;
  18. {$i fpcdefs.inc}
  19. { $define DEBUG_AOPTCPU}
  20. Interface
  21. uses
  22. cgbase,
  23. cpubase, aopt, aoptx86,
  24. Aasmbase,aasmtai,aasmdata;
  25. Type
  26. TCpuAsmOptimizer = class(TX86AsmOptimizer)
  27. procedure Optimize; override;
  28. procedure PrePeepHoleOpts; override;
  29. procedure PeepHoleOptPass1; override;
  30. procedure PeepHoleOptPass2; override;
  31. procedure PostPeepHoleOpts; override;
  32. function DoFpuLoadStoreOpt(var p : tai) : boolean;
  33. end;
  34. Var
  35. AsmOptimizer : TCpuAsmOptimizer;
  36. Implementation
  37. uses
  38. verbose,globtype,globals,
  39. cpuinfo,
  40. aasmcpu,
  41. aoptutils,
  42. procinfo,
  43. cgutils,cgx86,
  44. { units we should get rid off: }
  45. symsym,symconst;
  46. function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
  47. { returns true if a "continue" should be done after this optimization }
  48. var hp1, hp2: tai;
  49. begin
  50. DoFpuLoadStoreOpt := false;
  51. if (taicpu(p).oper[0]^.typ = top_ref) and
  52. getNextInstruction(p, hp1) and
  53. (hp1.typ = ait_instruction) and
  54. (((taicpu(hp1).opcode = A_FLD) and
  55. (taicpu(p).opcode = A_FSTP)) or
  56. ((taicpu(p).opcode = A_FISTP) and
  57. (taicpu(hp1).opcode = A_FILD))) and
  58. (taicpu(hp1).oper[0]^.typ = top_ref) and
  59. (taicpu(hp1).opsize = taicpu(p).opsize) and
  60. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  61. begin
  62. { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
  63. if (taicpu(p).opsize=S_FX) and
  64. getNextInstruction(hp1, hp2) and
  65. (hp2.typ = ait_instruction) and
  66. IsExitCode(hp2) and
  67. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  68. not(assigned(current_procinfo.procdef.funcretsym) and
  69. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  70. (taicpu(p).oper[0]^.ref^.index = NR_NO) then
  71. begin
  72. asml.remove(p);
  73. asml.remove(hp1);
  74. p.free;
  75. hp1.free;
  76. p := hp2;
  77. removeLastDeallocForFuncRes(p);
  78. doFPULoadStoreOpt := true;
  79. end
  80. (* can't be done because the store operation rounds
  81. else
  82. { fst can't store an extended value! }
  83. if (taicpu(p).opsize <> S_FX) and
  84. (taicpu(p).opsize <> S_IQ) then
  85. begin
  86. if (taicpu(p).opcode = A_FSTP) then
  87. taicpu(p).opcode := A_FST
  88. else taicpu(p).opcode := A_FIST;
  89. asml.remove(hp1);
  90. hp1.free;
  91. end
  92. *)
  93. end;
  94. end;
  95. { converts a TChange variable to a TRegister }
  96. function tch2reg(ch: tinschange): tsuperregister;
  97. const
  98. ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
  99. begin
  100. if (ch <= CH_REDI) then
  101. tch2reg := ch2reg[ch]
  102. else if (ch <= CH_WEDI) then
  103. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
  104. else if (ch <= CH_RWEDI) then
  105. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
  106. else if (ch <= CH_MEDI) then
  107. tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
  108. else
  109. InternalError(2016041901)
  110. end;
  111. { Checks if the register is a 32 bit general purpose register }
  112. function isgp32reg(reg: TRegister): boolean;
  113. begin
  114. {$push}{$warnings off}
  115. isgp32reg:=(getregtype(reg)=R_INTREGISTER) and (getsupreg(reg)>=RS_EAX) and (getsupreg(reg)<=RS_EBX);
  116. {$pop}
  117. end;
  118. { returns true if p contains a memory operand with a segment set }
  119. function InsContainsSegRef(p: taicpu): boolean;
  120. var
  121. i: longint;
  122. begin
  123. result:=true;
  124. for i:=0 to p.opercnt-1 do
  125. if (p.oper[i]^.typ=top_ref) and
  126. (p.oper[i]^.ref^.segment<>NR_NO) then
  127. exit;
  128. result:=false;
  129. end;
  130. function InstrReadsFlags(p: tai): boolean;
  131. var
  132. l: longint;
  133. begin
  134. InstrReadsFlags := true;
  135. case p.typ of
  136. ait_instruction:
  137. if InsProp[taicpu(p).opcode].Ch*
  138. [Ch_RCarryFlag,Ch_RParityFlag,Ch_RAuxiliaryFlag,Ch_RZeroFlag,Ch_RSignFlag,Ch_ROverflowFlag,
  139. Ch_RWCarryFlag,Ch_RWParityFlag,Ch_RWAuxiliaryFlag,Ch_RWZeroFlag,Ch_RWSignFlag,Ch_RWOverflowFlag,
  140. Ch_RFlags,Ch_RWFlags,Ch_RFLAGScc,Ch_All]<>[] then
  141. exit;
  142. ait_label:
  143. exit;
  144. end;
  145. InstrReadsFlags := false;
  146. end;
  147. procedure TCPUAsmOptimizer.PrePeepHoleOpts;
  148. var
  149. p,hp1: tai;
  150. l: aint;
  151. tmpRef: treference;
  152. begin
  153. p := BlockStart;
  154. while (p <> BlockEnd) Do
  155. begin
  156. case p.Typ Of
  157. Ait_Instruction:
  158. begin
  159. if InsContainsSegRef(taicpu(p)) then
  160. begin
  161. p := tai(p.next);
  162. continue;
  163. end;
  164. case taicpu(p).opcode Of
  165. A_IMUL:
  166. {changes certain "imul const, %reg"'s to lea sequences}
  167. begin
  168. if (taicpu(p).oper[0]^.typ = Top_Const) and
  169. (taicpu(p).oper[1]^.typ = Top_Reg) and
  170. (taicpu(p).opsize = S_L) then
  171. if (taicpu(p).oper[0]^.val = 1) then
  172. if (taicpu(p).ops = 2) then
  173. {remove "imul $1, reg"}
  174. begin
  175. hp1 := tai(p.Next);
  176. asml.remove(p);
  177. p.free;
  178. p := hp1;
  179. continue;
  180. end
  181. else
  182. {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
  183. begin
  184. hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
  185. InsertLLItem(p.previous, p.next, hp1);
  186. p.free;
  187. p := hp1;
  188. end
  189. else if
  190. ((taicpu(p).ops <= 2) or
  191. (taicpu(p).oper[2]^.typ = Top_Reg)) and
  192. (taicpu(p).oper[0]^.val <= 12) and
  193. not(cs_opt_size in current_settings.optimizerswitches) and
  194. (not(GetNextInstruction(p, hp1)) or
  195. {GetNextInstruction(p, hp1) and}
  196. not((tai(hp1).typ = ait_instruction) and
  197. ((taicpu(hp1).opcode=A_Jcc) and
  198. (taicpu(hp1).condition in [C_O,C_NO])))) then
  199. begin
  200. reference_reset(tmpref,1,[]);
  201. case taicpu(p).oper[0]^.val Of
  202. 3: begin
  203. {imul 3, reg1, reg2 to
  204. lea (reg1,reg1,2), reg2
  205. imul 3, reg1 to
  206. lea (reg1,reg1,2), reg1}
  207. TmpRef.base := taicpu(p).oper[1]^.reg;
  208. TmpRef.index := taicpu(p).oper[1]^.reg;
  209. TmpRef.ScaleFactor := 2;
  210. if (taicpu(p).ops = 2) then
  211. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  212. else
  213. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  214. InsertLLItem(p.previous, p.next, hp1);
  215. p.free;
  216. p := hp1;
  217. end;
  218. 5: begin
  219. {imul 5, reg1, reg2 to
  220. lea (reg1,reg1,4), reg2
  221. imul 5, reg1 to
  222. lea (reg1,reg1,4), reg1}
  223. TmpRef.base := taicpu(p).oper[1]^.reg;
  224. TmpRef.index := taicpu(p).oper[1]^.reg;
  225. TmpRef.ScaleFactor := 4;
  226. if (taicpu(p).ops = 2) then
  227. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  228. else
  229. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  230. InsertLLItem(p.previous, p.next, hp1);
  231. p.free;
  232. p := hp1;
  233. end;
  234. 6: begin
  235. {imul 6, reg1, reg2 to
  236. lea (,reg1,2), reg2
  237. lea (reg2,reg1,4), reg2
  238. imul 6, reg1 to
  239. lea (reg1,reg1,2), reg1
  240. add reg1, reg1}
  241. if (current_settings.optimizecputype <= cpu_386) then
  242. begin
  243. TmpRef.index := taicpu(p).oper[1]^.reg;
  244. if (taicpu(p).ops = 3) then
  245. begin
  246. TmpRef.base := taicpu(p).oper[2]^.reg;
  247. TmpRef.ScaleFactor := 4;
  248. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  249. end
  250. else
  251. begin
  252. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  253. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  254. end;
  255. InsertLLItem(p, p.next, hp1);
  256. reference_reset(tmpref,2,[]);
  257. TmpRef.index := taicpu(p).oper[1]^.reg;
  258. TmpRef.ScaleFactor := 2;
  259. if (taicpu(p).ops = 3) then
  260. begin
  261. TmpRef.base := NR_NO;
  262. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  263. taicpu(p).oper[2]^.reg);
  264. end
  265. else
  266. begin
  267. TmpRef.base := taicpu(p).oper[1]^.reg;
  268. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  269. end;
  270. InsertLLItem(p.previous, p.next, hp1);
  271. p.free;
  272. p := tai(hp1.next);
  273. end
  274. end;
  275. 9: begin
  276. {imul 9, reg1, reg2 to
  277. lea (reg1,reg1,8), reg2
  278. imul 9, reg1 to
  279. lea (reg1,reg1,8), reg1}
  280. TmpRef.base := taicpu(p).oper[1]^.reg;
  281. TmpRef.index := taicpu(p).oper[1]^.reg;
  282. TmpRef.ScaleFactor := 8;
  283. if (taicpu(p).ops = 2) then
  284. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
  285. else
  286. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  287. InsertLLItem(p.previous, p.next, hp1);
  288. p.free;
  289. p := hp1;
  290. end;
  291. 10: begin
  292. {imul 10, reg1, reg2 to
  293. lea (reg1,reg1,4), reg2
  294. add reg2, reg2
  295. imul 10, reg1 to
  296. lea (reg1,reg1,4), reg1
  297. add reg1, reg1}
  298. if (current_settings.optimizecputype <= cpu_386) then
  299. begin
  300. if (taicpu(p).ops = 3) then
  301. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  302. taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
  303. else
  304. hp1 := taicpu.op_reg_reg(A_ADD, S_L,
  305. taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
  306. InsertLLItem(p, p.next, hp1);
  307. TmpRef.base := taicpu(p).oper[1]^.reg;
  308. TmpRef.index := taicpu(p).oper[1]^.reg;
  309. TmpRef.ScaleFactor := 4;
  310. if (taicpu(p).ops = 3) then
  311. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
  312. else
  313. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  314. InsertLLItem(p.previous, p.next, hp1);
  315. p.free;
  316. p := tai(hp1.next);
  317. end
  318. end;
  319. 12: begin
  320. {imul 12, reg1, reg2 to
  321. lea (,reg1,4), reg2
  322. lea (reg2,reg1,8), reg2
  323. imul 12, reg1 to
  324. lea (reg1,reg1,2), reg1
  325. lea (,reg1,4), reg1}
  326. if (current_settings.optimizecputype <= cpu_386)
  327. then
  328. begin
  329. TmpRef.index := taicpu(p).oper[1]^.reg;
  330. if (taicpu(p).ops = 3) then
  331. begin
  332. TmpRef.base := taicpu(p).oper[2]^.reg;
  333. TmpRef.ScaleFactor := 8;
  334. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  335. end
  336. else
  337. begin
  338. TmpRef.base := NR_NO;
  339. TmpRef.ScaleFactor := 4;
  340. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  341. end;
  342. InsertLLItem(p, p.next, hp1);
  343. reference_reset(tmpref,2,[]);
  344. TmpRef.index := taicpu(p).oper[1]^.reg;
  345. if (taicpu(p).ops = 3) then
  346. begin
  347. TmpRef.base := NR_NO;
  348. TmpRef.ScaleFactor := 4;
  349. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
  350. end
  351. else
  352. begin
  353. TmpRef.base := taicpu(p).oper[1]^.reg;
  354. TmpRef.ScaleFactor := 2;
  355. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
  356. end;
  357. InsertLLItem(p.previous, p.next, hp1);
  358. p.free;
  359. p := tai(hp1.next);
  360. end
  361. end
  362. end;
  363. end;
  364. end;
  365. A_SAR,A_SHR:
  366. if PrePeepholeOptSxx(p) then
  367. continue;
  368. A_XOR:
  369. if (taicpu(p).oper[0]^.typ = top_reg) and
  370. (taicpu(p).oper[1]^.typ = top_reg) and
  371. (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  372. { temporarily change this to 'mov reg,0' to make it easier }
  373. { for the CSE. Will be changed back in pass 2 }
  374. begin
  375. taicpu(p).opcode := A_MOV;
  376. taicpu(p).loadConst(0,0);
  377. end;
  378. end;
  379. end;
  380. end;
  381. p := tai(p.next)
  382. end;
  383. end;
  384. { First pass of peephole optimizations }
  385. procedure TCPUAsmOPtimizer.PeepHoleOptPass1;
  386. function WriteOk : Boolean;
  387. begin
  388. writeln('Ok');
  389. Result:=True;
  390. end;
  391. var
  392. l : longint;
  393. p,hp1,hp2 : tai;
  394. hp3,hp4: tai;
  395. v:aint;
  396. TmpRef: TReference;
  397. TmpBool1, TmpBool2: Boolean;
  398. function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
  399. {traces sucessive jumps to their final destination and sets it, e.g.
  400. je l1 je l3
  401. <code> <code>
  402. l1: becomes l1:
  403. je l2 je l3
  404. <code> <code>
  405. l2: l2:
  406. jmp l3 jmp l3
  407. the level parameter denotes how deeep we have already followed the jump,
  408. to avoid endless loops with constructs such as "l5: ; jmp l5" }
  409. var p1, p2: tai;
  410. l: tasmlabel;
  411. function FindAnyLabel(hp: tai; var l: tasmlabel): Boolean;
  412. begin
  413. FindAnyLabel := false;
  414. while assigned(hp.next) and
  415. (tai(hp.next).typ in (SkipInstr+[ait_align])) Do
  416. hp := tai(hp.next);
  417. if assigned(hp.next) and
  418. (tai(hp.next).typ = ait_label) then
  419. begin
  420. FindAnyLabel := true;
  421. l := tai_label(hp.next).labsym;
  422. end
  423. end;
  424. begin
  425. GetfinalDestination := false;
  426. if level > 20 then
  427. exit;
  428. p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
  429. if assigned(p1) then
  430. begin
  431. SkipLabels(p1,p1);
  432. if (tai(p1).typ = ait_instruction) and
  433. (taicpu(p1).is_jmp) then
  434. if { the next instruction after the label where the jump hp arrives}
  435. { is unconditional or of the same type as hp, so continue }
  436. (taicpu(p1).condition in [C_None,hp.condition]) or
  437. { the next instruction after the label where the jump hp arrives}
  438. { is the opposite of hp (so this one is never taken), but after }
  439. { that one there is a branch that will be taken, so perform a }
  440. { little hack: set p1 equal to this instruction (that's what the}
  441. { last SkipLabels is for, only works with short bool evaluation)}
  442. ((taicpu(p1).condition = inverse_cond(hp.condition)) and
  443. SkipLabels(p1,p2) and
  444. (p2.typ = ait_instruction) and
  445. (taicpu(p2).is_jmp) and
  446. (taicpu(p2).condition in [C_None,hp.condition]) and
  447. SkipLabels(p1,p1)) then
  448. begin
  449. { quick check for loops of the form "l5: ; jmp l5 }
  450. if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
  451. tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
  452. exit;
  453. if not GetFinalDestination(asml, taicpu(p1),succ(level)) then
  454. exit;
  455. tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
  456. hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
  457. tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
  458. end
  459. else
  460. if (taicpu(p1).condition = inverse_cond(hp.condition)) then
  461. if not FindAnyLabel(p1,l) then
  462. begin
  463. {$ifdef finaldestdebug}
  464. insertllitem(asml,p1,p1.next,tai_comment.Create(
  465. strpnew('previous label inserted'))));
  466. {$endif finaldestdebug}
  467. current_asmdata.getjumplabel(l);
  468. insertllitem(p1,p1.next,tai_label.Create(l));
  469. tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
  470. hp.oper[0]^.ref^.symbol := l;
  471. l.increfs;
  472. { this won't work, since the new label isn't in the labeltable }
  473. { so it will fail the rangecheck. Labeltable should become a }
  474. { hashtable to support this: }
  475. { GetFinalDestination(asml, hp); }
  476. end
  477. else
  478. begin
  479. {$ifdef finaldestdebug}
  480. insertllitem(asml,p1,p1.next,tai_comment.Create(
  481. strpnew('next label reused'))));
  482. {$endif finaldestdebug}
  483. l.increfs;
  484. hp.oper[0]^.ref^.symbol := l;
  485. if not GetFinalDestination(asml, hp,succ(level)) then
  486. exit;
  487. end;
  488. end;
  489. GetFinalDestination := true;
  490. end;
  491. function DoSubAddOpt(var p: tai): Boolean;
  492. begin
  493. DoSubAddOpt := False;
  494. if GetLastInstruction(p, hp1) and
  495. (hp1.typ = ait_instruction) and
  496. (taicpu(hp1).opsize = taicpu(p).opsize) then
  497. case taicpu(hp1).opcode Of
  498. A_DEC:
  499. if (taicpu(hp1).oper[0]^.typ = top_reg) and
  500. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
  501. begin
  502. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+1);
  503. asml.remove(hp1);
  504. hp1.free;
  505. end;
  506. A_SUB:
  507. if (taicpu(hp1).oper[0]^.typ = top_const) and
  508. (taicpu(hp1).oper[1]^.typ = top_reg) and
  509. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  510. begin
  511. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val+taicpu(hp1).oper[0]^.val);
  512. asml.remove(hp1);
  513. hp1.free;
  514. end;
  515. A_ADD:
  516. if (taicpu(hp1).oper[0]^.typ = top_const) and
  517. (taicpu(hp1).oper[1]^.typ = top_reg) and
  518. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
  519. begin
  520. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val-taicpu(hp1).oper[0]^.val);
  521. asml.remove(hp1);
  522. hp1.free;
  523. if (taicpu(p).oper[0]^.val = 0) then
  524. begin
  525. hp1 := tai(p.next);
  526. asml.remove(p);
  527. p.free;
  528. if not GetLastInstruction(hp1, p) then
  529. p := hp1;
  530. DoSubAddOpt := True;
  531. end
  532. end;
  533. end;
  534. end;
  535. begin
  536. p := BlockStart;
  537. ClearUsedRegs;
  538. while (p <> BlockEnd) Do
  539. begin
  540. UpDateUsedRegs(UsedRegs, tai(p.next));
  541. case p.Typ Of
  542. ait_instruction:
  543. begin
  544. current_filepos:=taicpu(p).fileinfo;
  545. if InsContainsSegRef(taicpu(p)) then
  546. begin
  547. p := tai(p.next);
  548. continue;
  549. end;
  550. { Handle Jmp Optimizations }
  551. if taicpu(p).is_jmp then
  552. begin
  553. {the following if-block removes all code between a jmp and the next label,
  554. because it can never be executed}
  555. if (taicpu(p).opcode = A_JMP) then
  556. begin
  557. hp2:=p;
  558. while GetNextInstruction(hp2, hp1) and
  559. (hp1.typ <> ait_label) do
  560. if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
  561. begin
  562. { don't kill start/end of assembler block,
  563. no-line-info-start/end etc }
  564. if hp1.typ<>ait_marker then
  565. begin
  566. asml.remove(hp1);
  567. hp1.free;
  568. end
  569. else
  570. hp2:=hp1;
  571. end
  572. else break;
  573. end;
  574. { remove jumps to a label coming right after them }
  575. if GetNextInstruction(p, hp1) then
  576. begin
  577. if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
  578. { TODO: FIXME removing the first instruction fails}
  579. (p<>blockstart) then
  580. begin
  581. hp2:=tai(hp1.next);
  582. asml.remove(p);
  583. p.free;
  584. p:=hp2;
  585. continue;
  586. end
  587. else
  588. begin
  589. if hp1.typ = ait_label then
  590. SkipLabels(hp1,hp1);
  591. if (tai(hp1).typ=ait_instruction) and
  592. (taicpu(hp1).opcode=A_JMP) and
  593. GetNextInstruction(hp1, hp2) and
  594. FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
  595. begin
  596. if taicpu(p).opcode=A_Jcc then
  597. begin
  598. taicpu(p).condition:=inverse_cond(taicpu(p).condition);
  599. tai_label(hp2).labsym.decrefs;
  600. taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
  601. { when free'ing hp1, the ref. isn't decresed, so we don't
  602. increase it (FK)
  603. taicpu(p).oper[0]^.ref^.symbol.increfs;
  604. }
  605. asml.remove(hp1);
  606. hp1.free;
  607. GetFinalDestination(asml, taicpu(p),0);
  608. end
  609. else
  610. begin
  611. GetFinalDestination(asml, taicpu(p),0);
  612. p:=tai(p.next);
  613. continue;
  614. end;
  615. end
  616. else
  617. GetFinalDestination(asml, taicpu(p),0);
  618. end;
  619. end;
  620. end
  621. else
  622. { All other optimizes }
  623. begin
  624. for l := 0 to taicpu(p).ops-1 Do
  625. if (taicpu(p).oper[l]^.typ = top_ref) then
  626. With taicpu(p).oper[l]^.ref^ Do
  627. begin
  628. if (base = NR_NO) and
  629. (index <> NR_NO) and
  630. (scalefactor in [0,1]) then
  631. begin
  632. base := index;
  633. index := NR_NO
  634. end
  635. end;
  636. case taicpu(p).opcode Of
  637. A_AND:
  638. if OptPass1And(p) then
  639. continue;
  640. A_CMP:
  641. begin
  642. { cmp register,$8000 neg register
  643. je target --> jo target
  644. .... only if register is deallocated before jump.}
  645. case Taicpu(p).opsize of
  646. S_B: v:=$80;
  647. S_W: v:=$8000;
  648. S_L: v:=aint($80000000);
  649. else
  650. internalerror(2013112905);
  651. end;
  652. if (taicpu(p).oper[0]^.typ=Top_const) and
  653. (taicpu(p).oper[0]^.val=v) and
  654. (Taicpu(p).oper[1]^.typ=top_reg) and
  655. GetNextInstruction(p, hp1) and
  656. (hp1.typ=ait_instruction) and
  657. (taicpu(hp1).opcode=A_Jcc) and
  658. (Taicpu(hp1).condition in [C_E,C_NE]) and
  659. not(RegInUsedRegs(Taicpu(p).oper[1]^.reg, UsedRegs)) then
  660. begin
  661. Taicpu(p).opcode:=A_NEG;
  662. Taicpu(p).loadoper(0,Taicpu(p).oper[1]^);
  663. Taicpu(p).clearop(1);
  664. Taicpu(p).ops:=1;
  665. if Taicpu(hp1).condition=C_E then
  666. Taicpu(hp1).condition:=C_O
  667. else
  668. Taicpu(hp1).condition:=C_NO;
  669. continue;
  670. end;
  671. {
  672. @@2: @@2:
  673. .... ....
  674. cmp operand1,0
  675. jle/jbe @@1
  676. dec operand1 --> sub operand1,1
  677. jmp @@2 jge/jae @@2
  678. @@1: @@1:
  679. ... ....}
  680. if (taicpu(p).oper[0]^.typ = top_const) and
  681. (taicpu(p).oper[1]^.typ in [top_reg,top_ref]) and
  682. (taicpu(p).oper[0]^.val = 0) and
  683. GetNextInstruction(p, hp1) and
  684. (hp1.typ = ait_instruction) and
  685. (taicpu(hp1).is_jmp) and
  686. (taicpu(hp1).opcode=A_Jcc) and
  687. (taicpu(hp1).condition in [C_LE,C_BE]) and
  688. GetNextInstruction(hp1,hp2) and
  689. (hp2.typ = ait_instruction) and
  690. (taicpu(hp2).opcode = A_DEC) and
  691. OpsEqual(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) and
  692. GetNextInstruction(hp2, hp3) and
  693. (hp3.typ = ait_instruction) and
  694. (taicpu(hp3).is_jmp) and
  695. (taicpu(hp3).opcode = A_JMP) and
  696. GetNextInstruction(hp3, hp4) and
  697. FindLabel(tasmlabel(taicpu(hp1).oper[0]^.ref^.symbol),hp4) then
  698. begin
  699. taicpu(hp2).Opcode := A_SUB;
  700. taicpu(hp2).loadoper(1,taicpu(hp2).oper[0]^);
  701. taicpu(hp2).loadConst(0,1);
  702. taicpu(hp2).ops:=2;
  703. taicpu(hp3).Opcode := A_Jcc;
  704. case taicpu(hp1).condition of
  705. C_LE: taicpu(hp3).condition := C_GE;
  706. C_BE: taicpu(hp3).condition := C_AE;
  707. end;
  708. asml.remove(p);
  709. asml.remove(hp1);
  710. p.free;
  711. hp1.free;
  712. p := hp2;
  713. continue;
  714. end
  715. end;
  716. A_FLD:
  717. begin
  718. if (taicpu(p).oper[0]^.typ = top_reg) and
  719. GetNextInstruction(p, hp1) and
  720. (hp1.typ = Ait_Instruction) and
  721. (taicpu(hp1).oper[0]^.typ = top_reg) and
  722. (taicpu(hp1).oper[1]^.typ = top_reg) and
  723. (taicpu(hp1).oper[0]^.reg = NR_ST) and
  724. (taicpu(hp1).oper[1]^.reg = NR_ST1) then
  725. { change to
  726. fld reg fxxx reg,st
  727. fxxxp st, st1 (hp1)
  728. Remark: non commutative operations must be reversed!
  729. }
  730. begin
  731. case taicpu(hp1).opcode Of
  732. A_FMULP,A_FADDP,
  733. A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  734. begin
  735. case taicpu(hp1).opcode Of
  736. A_FADDP: taicpu(hp1).opcode := A_FADD;
  737. A_FMULP: taicpu(hp1).opcode := A_FMUL;
  738. A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
  739. A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
  740. A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
  741. A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
  742. end;
  743. taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
  744. taicpu(hp1).oper[1]^.reg := NR_ST;
  745. asml.remove(p);
  746. p.free;
  747. p := hp1;
  748. continue;
  749. end;
  750. end;
  751. end
  752. else
  753. if (taicpu(p).oper[0]^.typ = top_ref) and
  754. GetNextInstruction(p, hp2) and
  755. (hp2.typ = Ait_Instruction) and
  756. (taicpu(hp2).ops = 2) and
  757. (taicpu(hp2).oper[0]^.typ = top_reg) and
  758. (taicpu(hp2).oper[1]^.typ = top_reg) and
  759. (taicpu(p).opsize in [S_FS, S_FL]) and
  760. (taicpu(hp2).oper[0]^.reg = NR_ST) and
  761. (taicpu(hp2).oper[1]^.reg = NR_ST1) then
  762. if GetLastInstruction(p, hp1) and
  763. (hp1.typ = Ait_Instruction) and
  764. ((taicpu(hp1).opcode = A_FLD) or
  765. (taicpu(hp1).opcode = A_FST)) and
  766. (taicpu(hp1).opsize = taicpu(p).opsize) and
  767. (taicpu(hp1).oper[0]^.typ = top_ref) and
  768. RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
  769. if ((taicpu(hp2).opcode = A_FMULP) or
  770. (taicpu(hp2).opcode = A_FADDP)) then
  771. { change to
  772. fld/fst mem1 (hp1) fld/fst mem1
  773. fld mem1 (p) fadd/
  774. faddp/ fmul st, st
  775. fmulp st, st1 (hp2) }
  776. begin
  777. asml.remove(p);
  778. p.free;
  779. p := hp1;
  780. if (taicpu(hp2).opcode = A_FADDP) then
  781. taicpu(hp2).opcode := A_FADD
  782. else
  783. taicpu(hp2).opcode := A_FMUL;
  784. taicpu(hp2).oper[1]^.reg := NR_ST;
  785. end
  786. else
  787. { change to
  788. fld/fst mem1 (hp1) fld/fst mem1
  789. fld mem1 (p) fld st}
  790. begin
  791. taicpu(p).changeopsize(S_FL);
  792. taicpu(p).loadreg(0,NR_ST);
  793. end
  794. else
  795. begin
  796. case taicpu(hp2).opcode Of
  797. A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
  798. { change to
  799. fld/fst mem1 (hp1) fld/fst mem1
  800. fld mem2 (p) fxxx mem2
  801. fxxxp st, st1 (hp2) }
  802. begin
  803. case taicpu(hp2).opcode Of
  804. A_FADDP: taicpu(p).opcode := A_FADD;
  805. A_FMULP: taicpu(p).opcode := A_FMUL;
  806. A_FSUBP: taicpu(p).opcode := A_FSUBR;
  807. A_FSUBRP: taicpu(p).opcode := A_FSUB;
  808. A_FDIVP: taicpu(p).opcode := A_FDIVR;
  809. A_FDIVRP: taicpu(p).opcode := A_FDIV;
  810. end;
  811. asml.remove(hp2);
  812. hp2.free;
  813. end
  814. end
  815. end
  816. end;
  817. A_FSTP,A_FISTP:
  818. if doFpuLoadStoreOpt(p) then
  819. continue;
  820. A_LEA:
  821. begin
  822. {removes seg register prefixes from LEA operations, as they
  823. don't do anything}
  824. taicpu(p).oper[0]^.ref^.Segment := NR_NO;
  825. {changes "lea (%reg1), %reg2" into "mov %reg1, %reg2"}
  826. if (taicpu(p).oper[0]^.ref^.base <> NR_NO) and
  827. (getsupreg(taicpu(p).oper[0]^.ref^.base) in [RS_EAX..RS_ESP]) and
  828. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  829. (not(Assigned(taicpu(p).oper[0]^.ref^.Symbol))) then
  830. begin
  831. if (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  832. (taicpu(p).oper[0]^.ref^.offset = 0) then
  833. begin
  834. hp1 := taicpu.op_reg_reg(A_MOV, S_L,taicpu(p).oper[0]^.ref^.base,
  835. taicpu(p).oper[1]^.reg);
  836. InsertLLItem(p.previous,p.next, hp1);
  837. p.free;
  838. p := hp1;
  839. continue;
  840. end
  841. else if (taicpu(p).oper[0]^.ref^.offset = 0) then
  842. begin
  843. hp1 := tai(p.Next);
  844. asml.remove(p);
  845. p.free;
  846. p := hp1;
  847. continue;
  848. end
  849. { continue to use lea to adjust the stack pointer,
  850. it is the recommended way, but only if not optimizing for size }
  851. else if (taicpu(p).oper[1]^.reg<>NR_STACK_POINTER_REG) or
  852. (cs_opt_size in current_settings.optimizerswitches) then
  853. with taicpu(p).oper[0]^.ref^ do
  854. if (base = taicpu(p).oper[1]^.reg) then
  855. begin
  856. l := offset;
  857. if (l=1) and UseIncDec then
  858. begin
  859. taicpu(p).opcode := A_INC;
  860. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  861. taicpu(p).ops := 1
  862. end
  863. else if (l=-1) and UseIncDec then
  864. begin
  865. taicpu(p).opcode := A_DEC;
  866. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  867. taicpu(p).ops := 1;
  868. end
  869. else
  870. begin
  871. if (l<0) and (l<>-2147483648) then
  872. begin
  873. taicpu(p).opcode := A_SUB;
  874. taicpu(p).loadConst(0,-l);
  875. end
  876. else
  877. begin
  878. taicpu(p).opcode := A_ADD;
  879. taicpu(p).loadConst(0,l);
  880. end;
  881. end;
  882. end;
  883. end
  884. (*
  885. This is unsafe, lea doesn't modify the flags but "add"
  886. does. This breaks webtbs/tw15694.pp. The above
  887. transformations are also unsafe, but they don't seem to
  888. be triggered by code that FPC generators (or that at
  889. least does not occur in the tests...). This needs to be
  890. fixed by checking for the liveness of the flags register.
  891. else if MatchReference(taicpu(p).oper[0]^.ref^,taicpu(p).oper[1]^.reg,NR_INVALID) then
  892. begin
  893. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.index,
  894. taicpu(p).oper[0]^.ref^.base);
  895. InsertLLItem(asml,p.previous,p.next, hp1);
  896. DebugMsg('Peephole Lea2AddBase done',hp1);
  897. p.free;
  898. p:=hp1;
  899. continue;
  900. end
  901. else if MatchReference(taicpu(p).oper[0]^.ref^,NR_INVALID,taicpu(p).oper[1]^.reg) then
  902. begin
  903. hp1:=taicpu.op_reg_reg(A_ADD,S_L,taicpu(p).oper[0]^.ref^.base,
  904. taicpu(p).oper[0]^.ref^.index);
  905. InsertLLItem(asml,p.previous,p.next,hp1);
  906. DebugMsg('Peephole Lea2AddIndex done',hp1);
  907. p.free;
  908. p:=hp1;
  909. continue;
  910. end
  911. *)
  912. end;
  913. A_MOV:
  914. begin
  915. If OptPass1MOV(p) then
  916. Continue;
  917. end;
  918. A_MOVSX,
  919. A_MOVZX :
  920. begin
  921. If OptPass1Movx(p) then
  922. Continue
  923. end;
  924. (* should not be generated anymore by the current code generator
  925. A_POP:
  926. begin
  927. if target_info.system=system_i386_go32v2 then
  928. begin
  929. { Transform a series of pop/pop/pop/push/push/push to }
  930. { 'movl x(%esp),%reg' for go32v2 (not for the rest, }
  931. { because I'm not sure whether they can cope with }
  932. { 'movl x(%esp),%reg' with x > 0, I believe we had }
  933. { such a problem when using esp as frame pointer (JM) }
  934. if (taicpu(p).oper[0]^.typ = top_reg) then
  935. begin
  936. hp1 := p;
  937. hp2 := p;
  938. l := 0;
  939. while getNextInstruction(hp1,hp1) and
  940. (hp1.typ = ait_instruction) and
  941. (taicpu(hp1).opcode = A_POP) and
  942. (taicpu(hp1).oper[0]^.typ = top_reg) do
  943. begin
  944. hp2 := hp1;
  945. inc(l,4);
  946. end;
  947. getLastInstruction(p,hp3);
  948. l1 := 0;
  949. while (hp2 <> hp3) and
  950. assigned(hp1) and
  951. (hp1.typ = ait_instruction) and
  952. (taicpu(hp1).opcode = A_PUSH) and
  953. (taicpu(hp1).oper[0]^.typ = top_reg) and
  954. (taicpu(hp1).oper[0]^.reg.enum = taicpu(hp2).oper[0]^.reg.enum) do
  955. begin
  956. { change it to a two op operation }
  957. taicpu(hp2).oper[1]^.typ:=top_none;
  958. taicpu(hp2).ops:=2;
  959. taicpu(hp2).opcode := A_MOV;
  960. taicpu(hp2).loadoper(1,taicpu(hp1).oper[0]^);
  961. reference_reset(tmpref);
  962. tmpRef.base.enum:=R_INTREGISTER;
  963. tmpRef.base.number:=NR_STACK_POINTER_REG;
  964. convert_register_to_enum(tmpref.base);
  965. tmpRef.offset := l;
  966. taicpu(hp2).loadRef(0,tmpRef);
  967. hp4 := hp1;
  968. getNextInstruction(hp1,hp1);
  969. asml.remove(hp4);
  970. hp4.free;
  971. getLastInstruction(hp2,hp2);
  972. dec(l,4);
  973. inc(l1);
  974. end;
  975. if l <> -4 then
  976. begin
  977. inc(l,4);
  978. for l1 := l1 downto 1 do
  979. begin
  980. getNextInstruction(hp2,hp2);
  981. dec(taicpu(hp2).oper[0]^.ref^.offset,l);
  982. end
  983. end
  984. end
  985. end
  986. else
  987. begin
  988. if (taicpu(p).oper[0]^.typ = top_reg) and
  989. GetNextInstruction(p, hp1) and
  990. (tai(hp1).typ=ait_instruction) and
  991. (taicpu(hp1).opcode=A_PUSH) and
  992. (taicpu(hp1).oper[0]^.typ = top_reg) and
  993. (taicpu(hp1).oper[0]^.reg.enum=taicpu(p).oper[0]^.reg.enum) then
  994. begin
  995. { change it to a two op operation }
  996. taicpu(p).oper[1]^.typ:=top_none;
  997. taicpu(p).ops:=2;
  998. taicpu(p).opcode := A_MOV;
  999. taicpu(p).loadoper(1,taicpu(p).oper[0]^);
  1000. reference_reset(tmpref);
  1001. TmpRef.base.enum := R_ESP;
  1002. taicpu(p).loadRef(0,TmpRef);
  1003. asml.remove(hp1);
  1004. hp1.free;
  1005. end;
  1006. end;
  1007. end;
  1008. *)
  1009. A_PUSH:
  1010. begin
  1011. if (taicpu(p).opsize = S_W) and
  1012. (taicpu(p).oper[0]^.typ = Top_Const) and
  1013. GetNextInstruction(p, hp1) and
  1014. (tai(hp1).typ = ait_instruction) and
  1015. (taicpu(hp1).opcode = A_PUSH) and
  1016. (taicpu(hp1).oper[0]^.typ = Top_Const) and
  1017. (taicpu(hp1).opsize = S_W) then
  1018. begin
  1019. taicpu(p).changeopsize(S_L);
  1020. taicpu(p).loadConst(0,taicpu(p).oper[0]^.val shl 16 + word(taicpu(hp1).oper[0]^.val));
  1021. asml.remove(hp1);
  1022. hp1.free;
  1023. end;
  1024. end;
  1025. A_SHL, A_SAL:
  1026. begin
  1027. if (taicpu(p).oper[0]^.typ = Top_Const) and
  1028. (taicpu(p).oper[1]^.typ = Top_Reg) and
  1029. (taicpu(p).opsize = S_L) and
  1030. (taicpu(p).oper[0]^.val <= 3) then
  1031. {Changes "shl const, %reg32; add const/reg, %reg32" to one lea statement}
  1032. begin
  1033. TmpBool1 := True; {should we check the next instruction?}
  1034. TmpBool2 := False; {have we found an add/sub which could be
  1035. integrated in the lea?}
  1036. reference_reset(tmpref,2,[]);
  1037. TmpRef.index := taicpu(p).oper[1]^.reg;
  1038. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1039. while TmpBool1 and
  1040. GetNextInstruction(p, hp1) and
  1041. (tai(hp1).typ = ait_instruction) and
  1042. ((((taicpu(hp1).opcode = A_ADD) or
  1043. (taicpu(hp1).opcode = A_SUB)) and
  1044. (taicpu(hp1).oper[1]^.typ = Top_Reg) and
  1045. (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg)) or
  1046. (((taicpu(hp1).opcode = A_INC) or
  1047. (taicpu(hp1).opcode = A_DEC)) and
  1048. (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1049. (taicpu(hp1).oper[0]^.reg = taicpu(p).oper[1]^.reg))) and
  1050. (not GetNextInstruction(hp1,hp2) or
  1051. not instrReadsFlags(hp2)) Do
  1052. begin
  1053. TmpBool1 := False;
  1054. if (taicpu(hp1).oper[0]^.typ = Top_Const) then
  1055. begin
  1056. TmpBool1 := True;
  1057. TmpBool2 := True;
  1058. case taicpu(hp1).opcode of
  1059. A_ADD:
  1060. inc(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1061. A_SUB:
  1062. dec(TmpRef.offset, longint(taicpu(hp1).oper[0]^.val));
  1063. end;
  1064. asml.remove(hp1);
  1065. hp1.free;
  1066. end
  1067. else
  1068. if (taicpu(hp1).oper[0]^.typ = Top_Reg) and
  1069. (((taicpu(hp1).opcode = A_ADD) and
  1070. (TmpRef.base = NR_NO)) or
  1071. (taicpu(hp1).opcode = A_INC) or
  1072. (taicpu(hp1).opcode = A_DEC)) then
  1073. begin
  1074. TmpBool1 := True;
  1075. TmpBool2 := True;
  1076. case taicpu(hp1).opcode of
  1077. A_ADD:
  1078. TmpRef.base := taicpu(hp1).oper[0]^.reg;
  1079. A_INC:
  1080. inc(TmpRef.offset);
  1081. A_DEC:
  1082. dec(TmpRef.offset);
  1083. end;
  1084. asml.remove(hp1);
  1085. hp1.free;
  1086. end;
  1087. end;
  1088. if TmpBool2 or
  1089. ((current_settings.optimizecputype < cpu_Pentium2) and
  1090. (taicpu(p).oper[0]^.val <= 3) and
  1091. not(cs_opt_size in current_settings.optimizerswitches)) then
  1092. begin
  1093. if not(TmpBool2) and
  1094. (taicpu(p).oper[0]^.val = 1) then
  1095. begin
  1096. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1097. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg)
  1098. end
  1099. else
  1100. hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
  1101. taicpu(p).oper[1]^.reg);
  1102. InsertLLItem(p.previous, p.next, hp1);
  1103. p.free;
  1104. p := hp1;
  1105. end;
  1106. end
  1107. else
  1108. if (current_settings.optimizecputype < cpu_Pentium2) and
  1109. (taicpu(p).oper[0]^.typ = top_const) and
  1110. (taicpu(p).oper[1]^.typ = top_reg) then
  1111. if (taicpu(p).oper[0]^.val = 1) then
  1112. {changes "shl $1, %reg" to "add %reg, %reg", which is the same on a 386,
  1113. but faster on a 486, and Tairable in both U and V pipes on the Pentium
  1114. (unlike shl, which is only Tairable in the U pipe)}
  1115. begin
  1116. hp1 := taicpu.Op_reg_reg(A_ADD,taicpu(p).opsize,
  1117. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1118. InsertLLItem(p.previous, p.next, hp1);
  1119. p.free;
  1120. p := hp1;
  1121. end
  1122. else if (taicpu(p).opsize = S_L) and
  1123. (taicpu(p).oper[0]^.val<= 3) then
  1124. {changes "shl $2, %reg" to "lea (,%reg,4), %reg"
  1125. "shl $3, %reg" to "lea (,%reg,8), %reg}
  1126. begin
  1127. reference_reset(tmpref,2,[]);
  1128. TmpRef.index := taicpu(p).oper[1]^.reg;
  1129. TmpRef.scalefactor := 1 shl taicpu(p).oper[0]^.val;
  1130. hp1 := taicpu.Op_ref_reg(A_LEA,S_L,TmpRef, taicpu(p).oper[1]^.reg);
  1131. InsertLLItem(p.previous, p.next, hp1);
  1132. p.free;
  1133. p := hp1;
  1134. end
  1135. end;
  1136. A_SETcc :
  1137. { changes
  1138. setcc (funcres) setcc reg
  1139. movb (funcres), reg to leave/ret
  1140. leave/ret }
  1141. begin
  1142. if (taicpu(p).oper[0]^.typ = top_ref) and
  1143. GetNextInstruction(p, hp1) and
  1144. GetNextInstruction(hp1, hp2) and
  1145. IsExitCode(hp2) and
  1146. (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
  1147. (taicpu(p).oper[0]^.ref^.index = NR_NO) and
  1148. not(assigned(current_procinfo.procdef.funcretsym) and
  1149. (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
  1150. (hp1.typ = ait_instruction) and
  1151. (taicpu(hp1).opcode = A_MOV) and
  1152. (taicpu(hp1).opsize = S_B) and
  1153. (taicpu(hp1).oper[0]^.typ = top_ref) and
  1154. RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
  1155. begin
  1156. taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
  1157. DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
  1158. asml.remove(hp1);
  1159. hp1.free;
  1160. end
  1161. end;
  1162. A_SUB:
  1163. { * change "subl $2, %esp; pushw x" to "pushl x"}
  1164. { * change "sub/add const1, reg" or "dec reg" followed by
  1165. "sub const2, reg" to one "sub ..., reg" }
  1166. begin
  1167. if (taicpu(p).oper[0]^.typ = top_const) and
  1168. (taicpu(p).oper[1]^.typ = top_reg) then
  1169. if (taicpu(p).oper[0]^.val = 2) and
  1170. (taicpu(p).oper[1]^.reg = NR_ESP) and
  1171. { Don't do the sub/push optimization if the sub }
  1172. { comes from setting up the stack frame (JM) }
  1173. (not getLastInstruction(p,hp1) or
  1174. (hp1.typ <> ait_instruction) or
  1175. (taicpu(hp1).opcode <> A_MOV) or
  1176. (taicpu(hp1).oper[0]^.typ <> top_reg) or
  1177. (taicpu(hp1).oper[0]^.reg <> NR_ESP) or
  1178. (taicpu(hp1).oper[1]^.typ <> top_reg) or
  1179. (taicpu(hp1).oper[1]^.reg <> NR_EBP)) then
  1180. begin
  1181. hp1 := tai(p.next);
  1182. while Assigned(hp1) and
  1183. (tai(hp1).typ in [ait_instruction]+SkipInstr) and
  1184. not RegReadByInstruction(NR_ESP,hp1) and
  1185. not RegModifiedByInstruction(NR_ESP,hp1) do
  1186. hp1 := tai(hp1.next);
  1187. if Assigned(hp1) and
  1188. (tai(hp1).typ = ait_instruction) and
  1189. (taicpu(hp1).opcode = A_PUSH) and
  1190. (taicpu(hp1).opsize = S_W) then
  1191. begin
  1192. taicpu(hp1).changeopsize(S_L);
  1193. if taicpu(hp1).oper[0]^.typ=top_reg then
  1194. setsubreg(taicpu(hp1).oper[0]^.reg,R_SUBWHOLE);
  1195. hp1 := tai(p.next);
  1196. asml.remove(p);
  1197. p.free;
  1198. p := hp1;
  1199. continue
  1200. end;
  1201. if DoSubAddOpt(p) then
  1202. continue;
  1203. end
  1204. else if DoSubAddOpt(p) then
  1205. continue
  1206. end;
  1207. A_VMOVAPS,
  1208. A_VMOVAPD:
  1209. if OptPass1VMOVAP(p) then
  1210. continue;
  1211. A_VDIVSD,
  1212. A_VDIVSS,
  1213. A_VSUBSD,
  1214. A_VSUBSS,
  1215. A_VMULSD,
  1216. A_VMULSS,
  1217. A_VADDSD,
  1218. A_VADDSS:
  1219. if OptPass1VOP(p) then
  1220. continue;
  1221. end;
  1222. end; { if is_jmp }
  1223. end;
  1224. end;
  1225. updateUsedRegs(UsedRegs,p);
  1226. p:=tai(p.next);
  1227. end;
  1228. end;
  1229. procedure TCPUAsmOptimizer.PeepHoleOptPass2;
  1230. var
  1231. p : tai;
  1232. begin
  1233. p := BlockStart;
  1234. ClearUsedRegs;
  1235. while (p <> BlockEnd) Do
  1236. begin
  1237. UpdateUsedRegs(UsedRegs, tai(p.next));
  1238. case p.Typ Of
  1239. Ait_Instruction:
  1240. begin
  1241. if InsContainsSegRef(taicpu(p)) then
  1242. begin
  1243. p := tai(p.next);
  1244. continue;
  1245. end;
  1246. case taicpu(p).opcode Of
  1247. A_Jcc:
  1248. if OptPass2Jcc(p) then
  1249. continue;
  1250. A_FSTP,A_FISTP:
  1251. if DoFpuLoadStoreOpt(p) then
  1252. continue;
  1253. A_IMUL:
  1254. if OptPass2Imul(p) then
  1255. continue;
  1256. A_JMP:
  1257. if OptPass2Jmp(p) then
  1258. continue;
  1259. A_MOV:
  1260. if OptPass2MOV(p) then
  1261. continue;
  1262. end;
  1263. end;
  1264. end;
  1265. p := tai(p.next)
  1266. end;
  1267. end;
  1268. procedure TCPUAsmOptimizer.PostPeepHoleOpts;
  1269. var
  1270. p,hp1,hp2: tai;
  1271. IsTestConstX: boolean;
  1272. begin
  1273. p := BlockStart;
  1274. ClearUsedRegs;
  1275. while (p <> BlockEnd) Do
  1276. begin
  1277. UpdateUsedRegs(UsedRegs, tai(p.next));
  1278. case p.Typ Of
  1279. Ait_Instruction:
  1280. begin
  1281. if InsContainsSegRef(taicpu(p)) then
  1282. begin
  1283. p := tai(p.next);
  1284. continue;
  1285. end;
  1286. case taicpu(p).opcode Of
  1287. A_CALL:
  1288. begin
  1289. { don't do this on modern CPUs, this really hurts them due to
  1290. broken call/ret pairing }
  1291. if (current_settings.optimizecputype < cpu_Pentium2) and
  1292. not(cs_create_pic in current_settings.moduleswitches) and
  1293. GetNextInstruction(p, hp1) and
  1294. (hp1.typ = ait_instruction) and
  1295. (taicpu(hp1).opcode = A_JMP) and
  1296. ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then
  1297. begin
  1298. hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol);
  1299. InsertLLItem(p.previous, p, hp2);
  1300. taicpu(p).opcode := A_JMP;
  1301. taicpu(p).is_jmp := true;
  1302. asml.remove(hp1);
  1303. hp1.free;
  1304. end
  1305. { replace
  1306. call procname
  1307. ret
  1308. by
  1309. jmp procname
  1310. this should never hurt except when pic is used, not sure
  1311. how to handle it then
  1312. but do it only on level 4 because it destroys stack back traces
  1313. }
  1314. else if (cs_opt_level4 in current_settings.optimizerswitches) and
  1315. not(cs_create_pic in current_settings.moduleswitches) and
  1316. GetNextInstruction(p, hp1) and
  1317. (hp1.typ = ait_instruction) and
  1318. (taicpu(hp1).opcode = A_RET) and
  1319. (taicpu(hp1).ops=0) then
  1320. begin
  1321. taicpu(p).opcode := A_JMP;
  1322. taicpu(p).is_jmp := true;
  1323. asml.remove(hp1);
  1324. hp1.free;
  1325. end;
  1326. end;
  1327. A_CMP:
  1328. begin
  1329. if (taicpu(p).oper[0]^.typ = top_const) and
  1330. (taicpu(p).oper[0]^.val = 0) and
  1331. (taicpu(p).oper[1]^.typ = top_reg) then
  1332. {change "cmp $0, %reg" to "test %reg, %reg"}
  1333. begin
  1334. taicpu(p).opcode := A_TEST;
  1335. taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
  1336. continue;
  1337. end;
  1338. end;
  1339. A_MOV:
  1340. PostPeepholeOptMov(p);
  1341. A_MOVZX:
  1342. { if register vars are on, it's possible there is code like }
  1343. { "cmpl $3,%eax; movzbl 8(%ebp),%ebx; je .Lxxx" }
  1344. { so we can't safely replace the movzx then with xor/mov, }
  1345. { since that would change the flags (JM) }
  1346. if not(cs_opt_regvar in current_settings.optimizerswitches) then
  1347. begin
  1348. if (taicpu(p).oper[1]^.typ = top_reg) then
  1349. if (taicpu(p).oper[0]^.typ = top_reg)
  1350. then
  1351. case taicpu(p).opsize of
  1352. S_BL:
  1353. begin
  1354. if IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1355. not(cs_opt_size in current_settings.optimizerswitches) and
  1356. (current_settings.optimizecputype = cpu_Pentium) then
  1357. {Change "movzbl %reg1, %reg2" to
  1358. "xorl %reg2, %reg2; movb %reg1, %reg2" for Pentium and
  1359. PentiumMMX}
  1360. begin
  1361. hp1 := taicpu.op_reg_reg(A_XOR, S_L,
  1362. taicpu(p).oper[1]^.reg, taicpu(p).oper[1]^.reg);
  1363. InsertLLItem(p.previous, p, hp1);
  1364. taicpu(p).opcode := A_MOV;
  1365. taicpu(p).changeopsize(S_B);
  1366. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1367. end;
  1368. end;
  1369. end
  1370. else if (taicpu(p).oper[0]^.typ = top_ref) and
  1371. (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
  1372. (taicpu(p).oper[0]^.ref^.index <> taicpu(p).oper[1]^.reg) and
  1373. not(cs_opt_size in current_settings.optimizerswitches) and
  1374. IsGP32Reg(taicpu(p).oper[1]^.reg) and
  1375. (current_settings.optimizecputype = cpu_Pentium) and
  1376. (taicpu(p).opsize = S_BL) then
  1377. {changes "movzbl mem, %reg" to "xorl %reg, %reg; movb mem, %reg8" for
  1378. Pentium and PentiumMMX}
  1379. begin
  1380. hp1 := taicpu.Op_reg_reg(A_XOR, S_L, taicpu(p).oper[1]^.reg,
  1381. taicpu(p).oper[1]^.reg);
  1382. taicpu(p).opcode := A_MOV;
  1383. taicpu(p).changeopsize(S_B);
  1384. setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
  1385. InsertLLItem(p.previous, p, hp1);
  1386. end;
  1387. end;
  1388. A_TEST, A_OR:
  1389. {removes the line marked with (x) from the sequence
  1390. and/or/xor/add/sub/... $x, %y
  1391. test/or %y, %y | test $-1, %y (x)
  1392. j(n)z _Label
  1393. as the first instruction already adjusts the ZF
  1394. %y operand may also be a reference }
  1395. begin
  1396. IsTestConstX:=(taicpu(p).opcode=A_TEST) and
  1397. MatchOperand(taicpu(p).oper[0]^,-1);
  1398. if (OpsEqual(taicpu(p).oper[0]^,taicpu(p).oper[1]^) or IsTestConstX) and
  1399. GetLastInstruction(p, hp1) and
  1400. (tai(hp1).typ = ait_instruction) and
  1401. GetNextInstruction(p,hp2) and
  1402. MatchInstruction(hp2,A_SETcc,A_Jcc,A_CMOVcc,[]) then
  1403. case taicpu(hp1).opcode Of
  1404. A_ADD, A_SUB, A_OR, A_XOR, A_AND:
  1405. begin
  1406. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1407. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1408. { and in case of carry for A(E)/B(E)/C/NC }
  1409. ((taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) or
  1410. ((taicpu(hp1).opcode <> A_ADD) and
  1411. (taicpu(hp1).opcode <> A_SUB))) then
  1412. begin
  1413. hp1 := tai(p.next);
  1414. asml.remove(p);
  1415. p.free;
  1416. p := tai(hp1);
  1417. continue
  1418. end;
  1419. end;
  1420. A_SHL, A_SAL, A_SHR, A_SAR:
  1421. begin
  1422. if OpsEqual(taicpu(hp1).oper[1]^,taicpu(p).oper[1]^) and
  1423. { SHL/SAL/SHR/SAR with a value of 0 do not change the flags }
  1424. { therefore, it's only safe to do this optimization for }
  1425. { shifts by a (nonzero) constant }
  1426. (taicpu(hp1).oper[0]^.typ = top_const) and
  1427. (taicpu(hp1).oper[0]^.val <> 0) and
  1428. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1429. { and in case of carry for A(E)/B(E)/C/NC }
  1430. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1431. begin
  1432. hp1 := tai(p.next);
  1433. asml.remove(p);
  1434. p.free;
  1435. p := tai(hp1);
  1436. continue
  1437. end;
  1438. end;
  1439. A_DEC, A_INC, A_NEG:
  1440. begin
  1441. if OpsEqual(taicpu(hp1).oper[0]^,taicpu(p).oper[1]^) and
  1442. { does not work in case of overflow for G(E)/L(E)/C_O/C_NO }
  1443. { and in case of carry for A(E)/B(E)/C/NC }
  1444. (taicpu(hp2).condition in [C_Z,C_NZ,C_E,C_NE]) then
  1445. begin
  1446. case taicpu(hp1).opcode Of
  1447. A_DEC, A_INC:
  1448. {replace inc/dec with add/sub 1, because inc/dec doesn't set the carry flag}
  1449. begin
  1450. case taicpu(hp1).opcode Of
  1451. A_DEC: taicpu(hp1).opcode := A_SUB;
  1452. A_INC: taicpu(hp1).opcode := A_ADD;
  1453. end;
  1454. taicpu(hp1).loadoper(1,taicpu(hp1).oper[0]^);
  1455. taicpu(hp1).loadConst(0,1);
  1456. taicpu(hp1).ops:=2;
  1457. end
  1458. end;
  1459. hp1 := tai(p.next);
  1460. asml.remove(p);
  1461. p.free;
  1462. p := tai(hp1);
  1463. continue
  1464. end;
  1465. end
  1466. else
  1467. { change "test $-1,%reg" into "test %reg,%reg" }
  1468. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1469. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1470. end { case }
  1471. else
  1472. { change "test $-1,%reg" into "test %reg,%reg" }
  1473. if IsTestConstX and (taicpu(p).oper[1]^.typ=top_reg) then
  1474. taicpu(p).loadoper(0,taicpu(p).oper[1]^);
  1475. end;
  1476. end;
  1477. end;
  1478. end;
  1479. p := tai(p.next)
  1480. end;
  1481. end;
  1482. Procedure TCpuAsmOptimizer.Optimize;
  1483. Var
  1484. HP: Tai;
  1485. pass: longint;
  1486. slowopt, changed, lastLoop: boolean;
  1487. Begin
  1488. slowopt := (cs_opt_level3 in current_settings.optimizerswitches);
  1489. pass := 0;
  1490. changed := false;
  1491. repeat
  1492. lastLoop :=
  1493. not(slowopt) or
  1494. (not changed and (pass > 2)) or
  1495. { prevent endless loops }
  1496. (pass = 4);
  1497. changed := false;
  1498. { Setup labeltable, always necessary }
  1499. blockstart := tai(asml.first);
  1500. pass_1;
  1501. { Blockend now either contains an ait_marker with Kind = mark_AsmBlockStart, }
  1502. { or nil }
  1503. While Assigned(BlockStart) Do
  1504. Begin
  1505. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1506. begin
  1507. if (pass = 0) then
  1508. PrePeepHoleOpts;
  1509. { Peephole optimizations }
  1510. PeepHoleOptPass1;
  1511. { Only perform them twice in the first pass }
  1512. if pass = 0 then
  1513. PeepHoleOptPass1;
  1514. end;
  1515. { More peephole optimizations }
  1516. if (cs_opt_peephole in current_settings.optimizerswitches) then
  1517. begin
  1518. PeepHoleOptPass2;
  1519. if lastLoop then
  1520. PostPeepHoleOpts;
  1521. end;
  1522. { Continue where we left off, BlockEnd is either the start of an }
  1523. { assembler block or nil }
  1524. BlockStart := BlockEnd;
  1525. While Assigned(BlockStart) And
  1526. (BlockStart.typ = ait_Marker) And
  1527. (Tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
  1528. Begin
  1529. { We stopped at an assembler block, so skip it }
  1530. Repeat
  1531. BlockStart := Tai(BlockStart.Next);
  1532. Until (BlockStart.Typ = Ait_Marker) And
  1533. (Tai_Marker(Blockstart).Kind = mark_AsmBlockEnd);
  1534. { Blockstart now contains a Tai_marker(mark_AsmBlockEnd) }
  1535. If GetNextInstruction(BlockStart, HP) And
  1536. ((HP.typ <> ait_Marker) Or
  1537. (Tai_Marker(HP).Kind <> mark_AsmBlockStart)) Then
  1538. { There is no assembler block anymore after the current one, so }
  1539. { optimize the next block of "normal" instructions }
  1540. pass_1
  1541. { Otherwise, skip the next assembler block }
  1542. else
  1543. blockStart := hp;
  1544. End;
  1545. End;
  1546. inc(pass);
  1547. until lastLoop;
  1548. dfa.free;
  1549. End;
  1550. begin
  1551. casmoptimizer:=TCpuAsmOptimizer;
  1552. end.