rgobj.pas 81 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480
  1. {
  2. $Id$
  3. Copyright (c) 1998-2002 by Florian Klaempfl
  4. This unit implements the base class for the register allocator
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. {$i fpcdefs.inc}
  19. { Allow duplicate allocations, can be used to get the .s file written }
  20. { $define ALLOWDUPREG}
  21. {#******************************************************************************
  22. @abstract(Abstract register allocator unit)
  23. Register allocator introduction.
  24. Free Pascal uses a Chaitin style register allocator. We use a variant similair
  25. to the one described in the book "Modern compiler implementation in C" by
  26. Andrew W. Appel., published by Cambridge University Press.
  27. The register allocator that is described by Appel uses a much improved way
  28. of register coalescing, called "iterated register coalescing". Instead
  29. of doing coalescing as a prepass to the register allocation, the coalescing
  30. is done inside the register allocator. This has the advantage that the
  31. register allocator can coalesce very aggresively without introducing spills.
  32. Reading this book is recommended for a complete understanding. Here is a small
  33. introduction.
  34. The code generator thinks it has an infinite amount of registers. Our processor
  35. has a limited amount of registers. Therefore we must reduce the amount of
  36. registers until there are less enough to fit into the processors registers.
  37. Registers can interfere or not interfere. If two imaginary registers interfere
  38. they cannot be placed into the same psysical register. Reduction of registers
  39. is done by:
  40. - "coalescing" Two registers that do not interfere are combined
  41. into one register.
  42. - "spilling" A register is changed into a memory location and the generated
  43. code is modified to use the memory location instead of the register.
  44. Register allocation is a graph colouring problem. Each register is a colour, and
  45. if two registers interfere there is a connection between them in the graph.
  46. In addition to the imaginary registers in the code generator, the psysical
  47. CPU registers are also present in this graph. This allows us to make
  48. interferences between imaginary registers and cpu registers. This is very
  49. usefull for describing architectural constraints, like for example that
  50. the div instruction modifies edx, so variables that are in use at that time
  51. cannot be stored into edx. This can be modelled by making edx interfere
  52. with those variables.
  53. Graph colouring is an NP complete problem. Therefore we use an approximation
  54. that pushes registers to colour on to a stack. This is done in the "simplify"
  55. procedure.
  56. The register allocator first checks which registers are a candidate for
  57. coalescing.
  58. *******************************************************************************}
  59. unit rgobj;
  60. interface
  61. uses
  62. cutils, cpubase,
  63. aasmbase,aasmtai,aasmcpu,
  64. cclasses,globtype,cgbase,node,
  65. {$ifdef delphi}
  66. dmisc,
  67. {$endif}
  68. cpuinfo
  69. ;
  70. type
  71. {
  72. regvarother_longintarray = array[tregisterindex] of longint;
  73. regvarother_booleanarray = array[tregisterindex] of boolean;
  74. regvarint_longintarray = array[first_int_supreg..last_int_supreg] of longint;
  75. regvarint_ptreearray = array[first_int_supreg..last_int_supreg] of tnode;
  76. }
  77. {
  78. The interference bitmap contains of 2 layers:
  79. layer 1 - 256*256 blocks with pointers to layer 2 blocks
  80. layer 2 - blocks of 32*256 (32 bytes = 256 bits)
  81. }
  82. Tinterferencebitmap2 = array[byte] of set of byte;
  83. Pinterferencebitmap2 = ^Tinterferencebitmap2;
  84. Tinterferencebitmap1 = array[byte] of Pinterferencebitmap2;
  85. pinterferencebitmap1 = ^tinterferencebitmap1;
  86. Tinterferencebitmap=class
  87. private
  88. maxx1,
  89. maxy1 : byte;
  90. fbitmap : pinterferencebitmap1;
  91. function getbitmap(x,y:tsuperregister):boolean;
  92. procedure setbitmap(x,y:tsuperregister;b:boolean);
  93. public
  94. constructor create;
  95. destructor destroy;override;
  96. property bitmap[x,y:tsuperregister]:boolean read getbitmap write setbitmap;default;
  97. end;
  98. Tmovelist=record
  99. count:cardinal;
  100. data:array[0..$ffff] of Tlinkedlistitem;
  101. end;
  102. Pmovelist=^Tmovelist;
  103. {In the register allocator we keep track of move instructions.
  104. These instructions are moved between five linked lists. There
  105. is also a linked list per register to keep track about the moves
  106. it is associated with. Because we need to determine quickly in
  107. which of the five lists it is we add anu enumeradtion to each
  108. move instruction.}
  109. Tmoveset=(ms_coalesced_moves,ms_constrained_moves,ms_frozen_moves,
  110. ms_worklist_moves,ms_active_moves);
  111. Tmoveins=class(Tlinkedlistitem)
  112. moveset:Tmoveset;
  113. x,y:Tsuperregister;
  114. end;
  115. Treginfoflag=(ri_coalesced,ri_selected);
  116. Treginfoflagset=set of Treginfoflag;
  117. Treginfo=record
  118. live_start,
  119. live_end : Tai;
  120. subreg : tsubregister;
  121. alias : Tsuperregister;
  122. { The register allocator assigns each register a colour }
  123. colour : Tsuperregister;
  124. movelist : Pmovelist;
  125. adjlist : Psuperregisterworklist;
  126. degree : TSuperregister;
  127. flags : Treginfoflagset;
  128. end;
  129. Preginfo=^TReginfo;
  130. tspillreginfo = record
  131. orgreg : tsuperregister;
  132. tempreg : tregister;
  133. regread,regwritten, mustbespilled: boolean;
  134. end;
  135. tspillregsinfo = array[0..2] of tspillreginfo;
  136. {#------------------------------------------------------------------
  137. This class implements the default register allocator. It is used by the
  138. code generator to allocate and free registers which might be valid
  139. across nodes. It also contains utility routines related to registers.
  140. Some of the methods in this class should be overriden
  141. by cpu-specific implementations.
  142. --------------------------------------------------------------------}
  143. trgobj=class
  144. preserved_by_proc : tcpuregisterset;
  145. used_in_proc : tcpuregisterset;
  146. // is_reg_var : Tsuperregisterset; {old regvars}
  147. // reg_var_loaded:Tsuperregisterset; {old regvars}
  148. constructor create(Aregtype:Tregistertype;
  149. Adefaultsub:Tsubregister;
  150. const Ausable:array of tsuperregister;
  151. Afirst_imaginary:Tsuperregister;
  152. Apreserved_by_proc:Tcpuregisterset);
  153. destructor destroy;override;
  154. {# Allocate a register. An internalerror will be generated if there is
  155. no more free registers which can be allocated.}
  156. function getregister(list:Taasmoutput;subreg:Tsubregister):Tregister;virtual;
  157. {# Get the register specified.}
  158. procedure getexplicitregister(list:Taasmoutput;r:Tregister);virtual;
  159. {# Get multiple registers specified.}
  160. procedure allocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset);virtual;
  161. {# Free multiple registers specified.}
  162. procedure deallocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset);virtual;
  163. function uses_registers:boolean;virtual;
  164. {# Deallocate any kind of register }
  165. procedure ungetregister(list:Taasmoutput;r:Tregister);virtual;
  166. procedure add_reg_instruction(instr:Tai;r:tregister);
  167. procedure add_move_instruction(instr:Taicpu);
  168. {# Do the register allocation.}
  169. procedure do_register_allocation(list:Taasmoutput;headertai:tai);virtual;
  170. { Adds an interference edge.
  171. don't move this to the protected section, the arm cg requires to access this (FK) }
  172. procedure add_edge(u,v:Tsuperregister);
  173. protected
  174. regtype : Tregistertype;
  175. { default subregister used }
  176. defaultsub : tsubregister;
  177. live_registers:Tsuperregisterworklist;
  178. { can be overriden to add cpu specific interferences }
  179. procedure add_cpu_interferences(p : tai);virtual;
  180. function get_insert_pos(p:Tai;huntfor1,huntfor2,huntfor3:Tsuperregister):Tai;
  181. procedure forward_allocation(pfrom,pto:Tai);
  182. procedure getregisterinline(list:Taasmoutput;position:Tai;subreg:Tsubregister;var result:Tregister);
  183. procedure ungetregisterinline(list:Taasmoutput;position:Tai;r:Tregister);
  184. procedure add_constraints(reg:Tregister);virtual;
  185. procedure do_spill_read(list:Taasmoutput;instr:Taicpu_abstract;
  186. pos:Tai;regidx:word;
  187. const spilltemplist:Tspill_temp_list;
  188. const regs:Tspillregsinfo);virtual;
  189. procedure do_spill_written(list:Taasmoutput;instr:Taicpu_abstract;
  190. pos:Tai;regidx:word;
  191. const spilltemplist:Tspill_temp_list;
  192. const regs:Tspillregsinfo);virtual;
  193. procedure do_spill_readwritten(list:Taasmoutput;instr:Taicpu_abstract;
  194. pos:Tai;regidx:word;
  195. const spilltemplist:Tspill_temp_list;
  196. const regs:Tspillregsinfo);virtual;
  197. function instr_spill_register(list:Taasmoutput;
  198. instr:taicpu_abstract;
  199. const r:Tsuperregisterset;
  200. const spilltemplist:Tspill_temp_list): boolean;virtual;
  201. private
  202. {# First imaginary register.}
  203. first_imaginary : Tsuperregister;
  204. {# Highest register allocated until now.}
  205. reginfo : PReginfo;
  206. maxreginfo,
  207. maxreginfoinc,
  208. maxreg : Tsuperregister;
  209. usable_registers_cnt : word;
  210. usable_registers : array[0..maxcpuregister-1] of tsuperregister;
  211. ibitmap : Tinterferencebitmap;
  212. spillednodes,
  213. simplifyworklist,
  214. freezeworklist,
  215. spillworklist,
  216. coalescednodes,
  217. selectstack : tsuperregisterworklist;
  218. worklist_moves,
  219. active_moves,
  220. frozen_moves,
  221. coalesced_moves,
  222. constrained_moves : Tlinkedlist;
  223. {$ifdef EXTDEBUG}
  224. procedure writegraph(loopidx:longint);
  225. {$endif EXTDEBUG}
  226. {# Disposes of the reginfo array.}
  227. procedure dispose_reginfo;
  228. {# Prepare the register colouring.}
  229. procedure prepare_colouring;
  230. {# Clean up after register colouring.}
  231. procedure epilogue_colouring;
  232. {# Colour the registers; that is do the register allocation.}
  233. procedure colour_registers;
  234. {# Spills certain registers in the specified assembler list.}
  235. procedure insert_regalloc_info(list:Taasmoutput;headertai:tai);
  236. procedure generate_interference_graph(list:Taasmoutput;headertai:tai);
  237. procedure translate_registers(list:Taasmoutput);
  238. function spill_registers(list:Taasmoutput;headertai:tai):boolean;virtual;
  239. function getnewreg(subreg:tsubregister):tsuperregister;
  240. procedure add_edges_used(u:Tsuperregister);
  241. procedure add_to_movelist(u:Tsuperregister;data:Tlinkedlistitem);
  242. function move_related(n:Tsuperregister):boolean;
  243. procedure make_work_list;
  244. procedure sort_simplify_worklist;
  245. procedure enable_moves(n:Tsuperregister);
  246. procedure decrement_degree(m:Tsuperregister);
  247. procedure simplify;
  248. function get_alias(n:Tsuperregister):Tsuperregister;
  249. procedure add_worklist(u:Tsuperregister);
  250. function adjacent_ok(u,v:Tsuperregister):boolean;
  251. function conservative(u,v:Tsuperregister):boolean;
  252. procedure combine(u,v:Tsuperregister);
  253. procedure coalesce;
  254. procedure freeze_moves(u:Tsuperregister);
  255. procedure freeze;
  256. procedure select_spill;
  257. procedure assign_colours;
  258. procedure clear_interferences(u:Tsuperregister);
  259. end;
  260. const
  261. first_reg = 0;
  262. last_reg = high(tsuperregister)-1;
  263. maxspillingcounter = 20;
  264. implementation
  265. uses
  266. systems,
  267. globals,verbose,tgobj,procinfo;
  268. {******************************************************************************
  269. tinterferencebitmap
  270. ******************************************************************************}
  271. constructor tinterferencebitmap.create;
  272. begin
  273. inherited create;
  274. maxx1:=1;
  275. getmem(fbitmap,sizeof(tinterferencebitmap1)*2);
  276. fillchar(fbitmap^,sizeof(tinterferencebitmap1)*2,0);
  277. end;
  278. destructor tinterferencebitmap.destroy;
  279. var i,j:byte;
  280. begin
  281. for i:=0 to maxx1 do
  282. for j:=0 to maxy1 do
  283. if assigned(fbitmap[i,j]) then
  284. dispose(fbitmap[i,j]);
  285. freemem(fbitmap);
  286. end;
  287. function tinterferencebitmap.getbitmap(x,y:tsuperregister):boolean;
  288. var
  289. page : pinterferencebitmap2;
  290. begin
  291. result:=false;
  292. if (x shr 8>maxx1) then
  293. exit;
  294. page:=fbitmap[x shr 8,y shr 8];
  295. result:=assigned(page) and
  296. ((x and $ff) in page^[y and $ff]);
  297. end;
  298. procedure tinterferencebitmap.setbitmap(x,y:tsuperregister;b:boolean);
  299. var
  300. x1,y1 : byte;
  301. begin
  302. x1:=x shr 8;
  303. y1:=y shr 8;
  304. if x1>maxx1 then
  305. begin
  306. reallocmem(fbitmap,sizeof(tinterferencebitmap1)*(x1+1));
  307. fillchar(fbitmap[maxx1+1],sizeof(tinterferencebitmap1)*(x1-maxx1),0);
  308. maxx1:=x1;
  309. end;
  310. if not assigned(fbitmap[x1,y1]) then
  311. begin
  312. if y1>maxy1 then
  313. maxy1:=y1;
  314. new(fbitmap[x1,y1]);
  315. fillchar(fbitmap[x1,y1]^,sizeof(tinterferencebitmap2),0);
  316. end;
  317. if b then
  318. include(fbitmap[x1,y1]^[y and $ff],(x and $ff))
  319. else
  320. exclude(fbitmap[x1,y1]^[y and $ff],(x and $ff));
  321. end;
  322. {******************************************************************************
  323. trgobj
  324. ******************************************************************************}
  325. constructor trgobj.create(Aregtype:Tregistertype;
  326. Adefaultsub:Tsubregister;
  327. const Ausable:array of tsuperregister;
  328. Afirst_imaginary:Tsuperregister;
  329. Apreserved_by_proc:Tcpuregisterset);
  330. var
  331. i : Tsuperregister;
  332. begin
  333. { empty super register sets can cause very strange problems }
  334. if high(Ausable)=0 then
  335. internalerror(200210181);
  336. first_imaginary:=Afirst_imaginary;
  337. maxreg:=Afirst_imaginary;
  338. regtype:=Aregtype;
  339. defaultsub:=Adefaultsub;
  340. preserved_by_proc:=Apreserved_by_proc;
  341. used_in_proc:=[];
  342. live_registers.init;
  343. { Get reginfo for CPU registers }
  344. maxreginfo:=first_imaginary;
  345. maxreginfoinc:=16;
  346. worklist_moves:=Tlinkedlist.create;
  347. reginfo:=allocmem(first_imaginary*sizeof(treginfo));
  348. for i:=0 to first_imaginary-1 do
  349. begin
  350. reginfo[i].degree:=high(tsuperregister);
  351. reginfo[i].alias:=RS_INVALID;
  352. end;
  353. { Usable registers }
  354. fillchar(usable_registers,sizeof(usable_registers),0);
  355. for i:=low(Ausable) to high(Ausable) do
  356. usable_registers[i]:=Ausable[i];
  357. usable_registers_cnt:=high(Ausable)+1;
  358. { Initialize Worklists }
  359. spillednodes.init;
  360. simplifyworklist.init;
  361. freezeworklist.init;
  362. spillworklist.init;
  363. coalescednodes.init;
  364. selectstack.init;
  365. end;
  366. destructor trgobj.destroy;
  367. begin
  368. spillednodes.done;
  369. simplifyworklist.done;
  370. freezeworklist.done;
  371. spillworklist.done;
  372. coalescednodes.done;
  373. selectstack.done;
  374. live_registers.done;
  375. worklist_moves.free;
  376. dispose_reginfo;
  377. end;
  378. procedure Trgobj.dispose_reginfo;
  379. var i:Tsuperregister;
  380. begin
  381. if reginfo<>nil then
  382. begin
  383. for i:=0 to maxreg-1 do
  384. with reginfo[i] do
  385. begin
  386. if adjlist<>nil then
  387. dispose(adjlist,done);
  388. if movelist<>nil then
  389. dispose(movelist);
  390. end;
  391. freemem(reginfo);
  392. reginfo:=nil;
  393. end;
  394. end;
  395. function trgobj.getnewreg(subreg:tsubregister):tsuperregister;
  396. var
  397. oldmaxreginfo : tsuperregister;
  398. begin
  399. result:=maxreg;
  400. inc(maxreg);
  401. if maxreg>=last_reg then
  402. internalerror(200310146);
  403. if maxreg>=maxreginfo then
  404. begin
  405. oldmaxreginfo:=maxreginfo;
  406. inc(maxreginfo,maxreginfoinc);
  407. if maxreginfoinc<256 then
  408. maxreginfoinc:=maxreginfoinc*2;
  409. reallocmem(reginfo,maxreginfo*sizeof(treginfo));
  410. { Do we really need it to clear it ? At least for 1.0.x (PFV) }
  411. fillchar(reginfo[oldmaxreginfo],(maxreginfo-oldmaxreginfo)*sizeof(treginfo),0);
  412. end;
  413. reginfo[result].subreg:=subreg;
  414. end;
  415. function trgobj.getregister(list:Taasmoutput;subreg:Tsubregister):Tregister;
  416. begin
  417. if defaultsub=R_SUBNONE then
  418. result:=newreg(regtype,getnewreg(R_SUBNONE),R_SUBNONE)
  419. else
  420. result:=newreg(regtype,getnewreg(subreg),subreg);
  421. end;
  422. function trgobj.uses_registers:boolean;
  423. begin
  424. result:=(maxreg>first_imaginary);
  425. end;
  426. procedure trgobj.ungetregister(list:Taasmoutput;r:Tregister);
  427. begin
  428. { Only explicit allocs insert regalloc info }
  429. if getsupreg(r)<first_imaginary then
  430. list.concat(Tai_regalloc.dealloc(r));
  431. end;
  432. procedure trgobj.getexplicitregister(list:Taasmoutput;r:Tregister);
  433. var
  434. supreg:Tsuperregister;
  435. begin
  436. supreg:=getsupreg(r);
  437. if supreg>=first_imaginary then
  438. internalerror(2003121503);
  439. include(used_in_proc,supreg);
  440. list.concat(Tai_regalloc.alloc(r));
  441. end;
  442. procedure trgobj.allocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset);
  443. var i:Tsuperregister;
  444. begin
  445. for i:=0 to first_imaginary-1 do
  446. if i in r then
  447. getexplicitregister(list,newreg(regtype,i,defaultsub));
  448. end;
  449. procedure trgobj.deallocexplicitregisters(list:Taasmoutput;r:Tcpuregisterset);
  450. var i:Tsuperregister;
  451. begin
  452. for i:=0 to first_imaginary-1 do
  453. if i in r then
  454. ungetregister(list,newreg(regtype,i,defaultsub));
  455. end;
  456. procedure trgobj.do_register_allocation(list:Taasmoutput;headertai:tai);
  457. var
  458. spillingcounter:byte;
  459. endspill:boolean;
  460. i:Tsuperregister;
  461. begin
  462. { Insert regalloc info for imaginary registers }
  463. insert_regalloc_info(list,headertai);
  464. ibitmap:=tinterferencebitmap.create;
  465. generate_interference_graph(list,headertai);
  466. { Don't do the real allocation when -sr is passed }
  467. if (cs_no_regalloc in aktglobalswitches) then
  468. exit;
  469. {Do register allocation.}
  470. spillingcounter:=0;
  471. repeat
  472. prepare_colouring;
  473. colour_registers;
  474. epilogue_colouring;
  475. endspill:=true;
  476. if spillednodes.length<>0 then
  477. begin
  478. inc(spillingcounter);
  479. if spillingcounter>maxspillingcounter then
  480. internalerror(200309041);
  481. endspill:=not spill_registers(list,headertai);
  482. end;
  483. until endspill;
  484. ibitmap.free;
  485. translate_registers(list);
  486. dispose_reginfo;
  487. end;
  488. procedure trgobj.add_constraints(reg:Tregister);
  489. begin
  490. end;
  491. procedure trgobj.add_edge(u,v:Tsuperregister);
  492. {This procedure will add an edge to the virtual interference graph.}
  493. procedure addadj(u,v:Tsuperregister);
  494. begin
  495. with reginfo[u] do
  496. begin
  497. if adjlist=nil then
  498. new(adjlist,init);
  499. adjlist^.add(v);
  500. end;
  501. end;
  502. begin
  503. if (u<>v) and not(ibitmap[v,u]) then
  504. begin
  505. ibitmap[v,u]:=true;
  506. ibitmap[u,v]:=true;
  507. {Precoloured nodes are not stored in the interference graph.}
  508. if (u>=first_imaginary) then
  509. addadj(u,v);
  510. if (v>=first_imaginary) then
  511. addadj(v,u);
  512. end;
  513. end;
  514. procedure trgobj.add_edges_used(u:Tsuperregister);
  515. var i:word;
  516. begin
  517. with live_registers do
  518. if length>0 then
  519. for i:=0 to length-1 do
  520. add_edge(u,buf^[i]);
  521. end;
  522. {$ifdef EXTDEBUG}
  523. procedure trgobj.writegraph(loopidx:longint);
  524. {This procedure writes out the current interference graph in the
  525. register allocator.}
  526. var f:text;
  527. i,j:Tsuperregister;
  528. begin
  529. assign(f,'igraph'+tostr(loopidx));
  530. rewrite(f);
  531. writeln(f,'Interference graph');
  532. writeln(f);
  533. write(f,' ');
  534. for i:=0 to 15 do
  535. for j:=0 to 15 do
  536. write(f,hexstr(i,1));
  537. writeln(f);
  538. write(f,' ');
  539. for i:=0 to 15 do
  540. write(f,'0123456789ABCDEF');
  541. writeln(f);
  542. for i:=0 to maxreg-1 do
  543. begin
  544. write(f,hexstr(i,2):4);
  545. for j:=0 to maxreg-1 do
  546. if ibitmap[i,j] then
  547. write(f,'*')
  548. else
  549. write(f,'-');
  550. writeln(f);
  551. end;
  552. close(f);
  553. end;
  554. {$endif EXTDEBUG}
  555. procedure trgobj.add_to_movelist(u:Tsuperregister;data:Tlinkedlistitem);
  556. var cursize:cardinal;
  557. begin
  558. with reginfo[u] do
  559. begin
  560. if movelist=nil then
  561. begin
  562. getmem(movelist,64);
  563. movelist^.count:=0;
  564. end
  565. else
  566. begin
  567. cursize:=memsize(movelist);
  568. if (4*(movelist^.count+1)=cursize) then
  569. reallocmem(movelist,cursize*2);
  570. end;
  571. movelist^.data[movelist^.count]:=data;
  572. inc(movelist^.count);
  573. end;
  574. end;
  575. procedure trgobj.add_reg_instruction(instr:Tai;r:tregister);
  576. var
  577. supreg : tsuperregister;
  578. begin
  579. supreg:=getsupreg(r);
  580. if supreg>=first_imaginary then
  581. with reginfo[supreg] do
  582. begin
  583. if not assigned(live_start) then
  584. live_start:=instr;
  585. live_end:=instr;
  586. end;
  587. end;
  588. procedure trgobj.add_move_instruction(instr:Taicpu);
  589. {This procedure notifies a certain as a move instruction so the
  590. register allocator can try to eliminate it.}
  591. var i:Tmoveins;
  592. ssupreg,dsupreg:Tsuperregister;
  593. begin
  594. {$ifdef extdebug}
  595. if (instr.oper[O_MOV_SOURCE]^.typ<>top_reg) or
  596. (instr.oper[O_MOV_DEST]^.typ<>top_reg) then
  597. internalerror(200311291);
  598. {$endif}
  599. i:=Tmoveins.create;
  600. i.moveset:=ms_worklist_moves;
  601. worklist_moves.insert(i);
  602. ssupreg:=getsupreg(instr.oper[O_MOV_SOURCE]^.reg);
  603. add_to_movelist(ssupreg,i);
  604. dsupreg:=getsupreg(instr.oper[O_MOV_DEST]^.reg);
  605. if ssupreg<>dsupreg then
  606. {Avoid adding the same move instruction twice to a single register.}
  607. add_to_movelist(dsupreg,i);
  608. i.x:=ssupreg;
  609. i.y:=dsupreg;
  610. end;
  611. function trgobj.move_related(n:Tsuperregister):boolean;
  612. var i:cardinal;
  613. begin
  614. move_related:=false;
  615. with reginfo[n] do
  616. if movelist<>nil then
  617. with movelist^ do
  618. for i:=0 to count-1 do
  619. if Tmoveins(data[i]).moveset in [ms_worklist_moves,ms_active_moves] then
  620. begin
  621. move_related:=true;
  622. break;
  623. end;
  624. end;
  625. procedure Trgobj.sort_simplify_worklist;
  626. {Sorts the simplifyworklist by the number of interferences the
  627. registers in it cause. This allows simplify to execute in
  628. constant time.}
  629. var p,h,i,j,leni,lenj:word;
  630. t:Tsuperregister;
  631. adji,adjj:Psuperregisterworklist;
  632. begin
  633. with simplifyworklist do
  634. begin
  635. if length<2 then
  636. exit;
  637. p:=1;
  638. while 2*p<length do
  639. p:=2*p;
  640. while p<>0 do
  641. begin
  642. for h:=0 to length-p-1 do
  643. begin
  644. i:=h;
  645. repeat
  646. j:=i+p;
  647. adji:=reginfo[buf^[i]].adjlist;
  648. adjj:=reginfo[buf^[j]].adjlist;
  649. if adji=nil then
  650. leni:=0
  651. else
  652. leni:=adji^.length;
  653. if adjj=nil then
  654. lenj:=0
  655. else
  656. lenj:=adjj^.length;
  657. if lenj>=leni then
  658. break;
  659. t:=buf^[i];
  660. buf^[i]:=buf^[j];
  661. buf^[j]:=t;
  662. if i<p then
  663. break;
  664. dec(i,p)
  665. until false;
  666. end;
  667. p:=p shr 1;
  668. end;
  669. end;
  670. end;
  671. procedure trgobj.make_work_list;
  672. var n:Tsuperregister;
  673. begin
  674. {If we have 7 cpu registers, and the degree of a node is 7, we cannot
  675. assign it to any of the registers, thus it is significant.}
  676. for n:=first_imaginary to maxreg-1 do
  677. with reginfo[n] do
  678. begin
  679. if adjlist=nil then
  680. degree:=0
  681. else
  682. degree:=adjlist^.length;
  683. if degree>=usable_registers_cnt then
  684. spillworklist.add(n)
  685. else if move_related(n) then
  686. freezeworklist.add(n)
  687. else
  688. simplifyworklist.add(n);
  689. end;
  690. sort_simplify_worklist;
  691. end;
  692. procedure trgobj.prepare_colouring;
  693. var i:word;
  694. begin
  695. make_work_list;
  696. active_moves:=Tlinkedlist.create;
  697. frozen_moves:=Tlinkedlist.create;
  698. coalesced_moves:=Tlinkedlist.create;
  699. constrained_moves:=Tlinkedlist.create;
  700. selectstack.clear;
  701. end;
  702. procedure trgobj.enable_moves(n:Tsuperregister);
  703. var m:Tlinkedlistitem;
  704. i:cardinal;
  705. begin
  706. with reginfo[n] do
  707. if movelist<>nil then
  708. for i:=0 to movelist^.count-1 do
  709. begin
  710. m:=movelist^.data[i];
  711. if Tmoveins(m).moveset in [ms_worklist_moves,ms_active_moves] then
  712. if Tmoveins(m).moveset=ms_active_moves then
  713. begin
  714. {Move m from the set active_moves to the set worklist_moves.}
  715. active_moves.remove(m);
  716. Tmoveins(m).moveset:=ms_worklist_moves;
  717. worklist_moves.concat(m);
  718. end;
  719. end;
  720. end;
  721. procedure Trgobj.decrement_degree(m:Tsuperregister);
  722. var adj : Psuperregisterworklist;
  723. n : tsuperregister;
  724. d,i : word;
  725. begin
  726. with reginfo[m] do
  727. begin
  728. d:=degree;
  729. if d=0 then
  730. internalerror(200312151);
  731. dec(degree);
  732. if d=usable_registers_cnt then
  733. begin
  734. {Enable moves for m.}
  735. enable_moves(m);
  736. {Enable moves for adjacent.}
  737. adj:=adjlist;
  738. if adj<>nil then
  739. for i:=1 to adj^.length do
  740. begin
  741. n:=adj^.buf^[i-1];
  742. if reginfo[n].flags*[ri_selected,ri_coalesced]<>[] then
  743. enable_moves(n);
  744. end;
  745. {Remove the node from the spillworklist.}
  746. if not spillworklist.delete(m) then
  747. internalerror(200310145);
  748. if move_related(m) then
  749. freezeworklist.add(m)
  750. else
  751. simplifyworklist.add(m);
  752. end;
  753. end;
  754. end;
  755. procedure trgobj.simplify;
  756. var adj : Psuperregisterworklist;
  757. m,n : Tsuperregister;
  758. i : word;
  759. begin
  760. {We take the element with the least interferences out of the
  761. simplifyworklist. Since the simplifyworklist is now sorted, we
  762. no longer need to search, but we can simply take the first element.}
  763. m:=simplifyworklist.get;
  764. {Push it on the selectstack.}
  765. selectstack.add(m);
  766. with reginfo[m] do
  767. begin
  768. include(flags,ri_selected);
  769. adj:=adjlist;
  770. end;
  771. if adj<>nil then
  772. for i:=1 to adj^.length do
  773. begin
  774. n:=adj^.buf^[i-1];
  775. if (n>=first_imaginary) and
  776. (reginfo[n].flags*[ri_selected,ri_coalesced]=[]) then
  777. decrement_degree(n);
  778. end;
  779. end;
  780. function trgobj.get_alias(n:Tsuperregister):Tsuperregister;
  781. begin
  782. while ri_coalesced in reginfo[n].flags do
  783. n:=reginfo[n].alias;
  784. get_alias:=n;
  785. end;
  786. procedure trgobj.add_worklist(u:Tsuperregister);
  787. begin
  788. if (u>=first_imaginary) and
  789. (not move_related(u)) and
  790. (reginfo[u].degree<usable_registers_cnt) then
  791. begin
  792. if not freezeworklist.delete(u) then
  793. internalerror(200308161); {must be found}
  794. simplifyworklist.add(u);
  795. end;
  796. end;
  797. function trgobj.adjacent_ok(u,v:Tsuperregister):boolean;
  798. {Check wether u and v should be coalesced. u is precoloured.}
  799. function ok(t,r:Tsuperregister):boolean;
  800. begin
  801. ok:=(t<first_imaginary) or
  802. (reginfo[t].degree<usable_registers_cnt) or
  803. ibitmap[r,t];
  804. end;
  805. var adj : Psuperregisterworklist;
  806. i : word;
  807. n : tsuperregister;
  808. begin
  809. with reginfo[v] do
  810. begin
  811. adjacent_ok:=true;
  812. adj:=adjlist;
  813. if adj<>nil then
  814. for i:=1 to adj^.length do
  815. begin
  816. n:=adj^.buf^[i-1];
  817. if (flags*[ri_coalesced,ri_selected]=[]) and not ok(n,u) then
  818. begin
  819. adjacent_ok:=false;
  820. break;
  821. end;
  822. end;
  823. end;
  824. end;
  825. function trgobj.conservative(u,v:Tsuperregister):boolean;
  826. var adj : Psuperregisterworklist;
  827. done : Tsuperregisterset; {To prevent that we count nodes twice.}
  828. i,k:word;
  829. n : tsuperregister;
  830. begin
  831. k:=0;
  832. supregset_reset(done,false);
  833. with reginfo[u] do
  834. begin
  835. adj:=adjlist;
  836. if adj<>nil then
  837. for i:=1 to adj^.length do
  838. begin
  839. n:=adj^.buf^[i-1];
  840. if flags*[ri_coalesced,ri_selected]=[] then
  841. begin
  842. supregset_include(done,n);
  843. if reginfo[n].degree>=usable_registers_cnt then
  844. inc(k);
  845. end;
  846. end;
  847. end;
  848. adj:=reginfo[v].adjlist;
  849. if adj<>nil then
  850. for i:=1 to adj^.length do
  851. begin
  852. n:=adj^.buf^[i-1];
  853. if not supregset_in(done,n) and
  854. (reginfo[n].degree>=usable_registers_cnt) and
  855. (reginfo[u].flags*[ri_coalesced,ri_selected]=[]) then
  856. inc(k);
  857. end;
  858. conservative:=(k<usable_registers_cnt);
  859. end;
  860. procedure trgobj.combine(u,v:Tsuperregister);
  861. var adj : Psuperregisterworklist;
  862. i : word;
  863. t : tsuperregister;
  864. n,o : cardinal;
  865. decrement : boolean;
  866. { moves:Tsuperregisterset;}
  867. vm:Pmovelist;
  868. label l1;
  869. begin
  870. if not freezeworklist.delete(v) then
  871. spillworklist.delete(v);
  872. coalescednodes.add(v);
  873. include(reginfo[v].flags,ri_coalesced);
  874. reginfo[v].alias:=u;
  875. {Combine both movelists. Since the movelists are sets, only add
  876. elements that are not already present. The movelists cannot be
  877. empty by definition; nodes are only coalesced if there is a move
  878. between them.}
  879. { Nice attempt; it didn't work.
  880. supregset_reset(moves,false);
  881. supregset_include(moves,u);
  882. with reginfo[u].movelist^ do
  883. for n:=0 to count-1 do
  884. begin
  885. if Tmoveins(data[n]).x=u then
  886. supregset_include(moves,Tmoveins(data[n]).y)
  887. else
  888. supregset_include(moves,Tmoveins(data[n]).x)
  889. end;
  890. with reginfo[v].movelist^ do
  891. for n:=0 to count-1 do
  892. begin
  893. if Tmoveins(data[n]).x=v then
  894. begin
  895. if supregset_in(moves,Tmoveins(data[n]).y) then
  896. add_to_movelist(u,data[n]);
  897. end
  898. else
  899. begin
  900. if supregset_in(moves,Tmoveins(data[n]).x) then
  901. add_to_movelist(u,data[n]);
  902. end;
  903. end;}
  904. {This loop is a performance bottleneck for large procedures and therefore
  905. optimized by hand as much as possible. This is because machine registers
  906. generally collect large movelists (for example around procedure calls data
  907. is moved into machine registers). The loop below is unfortunately quadratic,
  908. and guess what this means when a procedure has collected several thousand
  909. moves.... Test webtbs/tw2242 is a good example to illustrate this.}
  910. vm:=reginfo[v].movelist;
  911. for n:=0 to vm^.count-1 do
  912. with reginfo[u].movelist^ do
  913. begin
  914. for o:=0 to count-1 do
  915. if data[o]=vm^.data[n] then
  916. goto l1; {Continue outer loop.}
  917. add_to_movelist(u,vm^.data[n]);
  918. l1:
  919. end;
  920. enable_moves(v);
  921. adj:=reginfo[v].adjlist;
  922. if adj<>nil then
  923. for i:=1 to adj^.length do
  924. begin
  925. t:=adj^.buf^[i-1];
  926. if not(ri_coalesced in reginfo[t].flags) then
  927. begin
  928. {t has a connection to v. Since we are adding v to u, we
  929. need to connect t to u. However, beware if t was already
  930. connected to u...}
  931. if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then
  932. {... because in that case, we are actually removing an edge
  933. and the degree of t decreases.}
  934. decrement_degree(t)
  935. else
  936. begin
  937. add_edge(t,u);
  938. {We have added an edge to t and u. So their degree increases.
  939. However, v is added to u. That means its neighbours will
  940. no longer point to v, but to u instead. Therefore, only the
  941. degree of u increases.}
  942. if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then
  943. inc(reginfo[u].degree);
  944. end;
  945. end;
  946. end;
  947. if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
  948. spillworklist.add(u);
  949. end;
  950. procedure trgobj.coalesce;
  951. var m:Tmoveins;
  952. x,y,u,v:Tsuperregister;
  953. begin
  954. m:=Tmoveins(worklist_moves.getfirst);
  955. x:=get_alias(m.x);
  956. y:=get_alias(m.y);
  957. if (y<first_imaginary) then
  958. begin
  959. u:=y;
  960. v:=x;
  961. end
  962. else
  963. begin
  964. u:=x;
  965. v:=y;
  966. end;
  967. if (u=v) then
  968. begin
  969. m.moveset:=ms_coalesced_moves; {Already coalesced.}
  970. coalesced_moves.insert(m);
  971. add_worklist(u);
  972. end
  973. {Do u and v interfere? In that case the move is constrained. Two
  974. precoloured nodes interfere allways. If v is precoloured, by the above
  975. code u is precoloured, thus interference...}
  976. else if (v<first_imaginary) or ibitmap[u,v] then
  977. begin
  978. m.moveset:=ms_constrained_moves; {Cannot coalesce yet...}
  979. constrained_moves.insert(m);
  980. add_worklist(u);
  981. add_worklist(v);
  982. end
  983. {Next test: is it possible and a good idea to coalesce??}
  984. else if ((u<first_imaginary) and adjacent_ok(u,v)) or
  985. ((u>=first_imaginary) and conservative(u,v)) then
  986. begin
  987. m.moveset:=ms_coalesced_moves; {Move coalesced!}
  988. coalesced_moves.insert(m);
  989. combine(u,v);
  990. add_worklist(u);
  991. end
  992. else
  993. begin
  994. m.moveset:=ms_active_moves;
  995. active_moves.insert(m);
  996. end;
  997. end;
  998. procedure trgobj.freeze_moves(u:Tsuperregister);
  999. var i:cardinal;
  1000. m:Tlinkedlistitem;
  1001. v,x,y:Tsuperregister;
  1002. begin
  1003. if reginfo[u].movelist<>nil then
  1004. for i:=0 to reginfo[u].movelist^.count-1 do
  1005. begin
  1006. m:=reginfo[u].movelist^.data[i];
  1007. if Tmoveins(m).moveset in [ms_worklist_moves,ms_active_moves] then
  1008. begin
  1009. x:=Tmoveins(m).x;
  1010. y:=Tmoveins(m).y;
  1011. if get_alias(y)=get_alias(u) then
  1012. v:=get_alias(x)
  1013. else
  1014. v:=get_alias(y);
  1015. {Move m from active_moves/worklist_moves to frozen_moves.}
  1016. if Tmoveins(m).moveset=ms_active_moves then
  1017. active_moves.remove(m)
  1018. else
  1019. worklist_moves.remove(m);
  1020. Tmoveins(m).moveset:=ms_frozen_moves;
  1021. frozen_moves.insert(m);
  1022. if (v>=first_imaginary) and not(move_related(v)) and
  1023. (reginfo[v].degree<usable_registers_cnt) then
  1024. begin
  1025. freezeworklist.delete(v);
  1026. simplifyworklist.add(v);
  1027. end;
  1028. end;
  1029. end;
  1030. end;
  1031. procedure trgobj.freeze;
  1032. var n:Tsuperregister;
  1033. begin
  1034. { We need to take a random element out of the freezeworklist. We take
  1035. the last element. Dirty code! }
  1036. n:=freezeworklist.get;
  1037. {Add it to the simplifyworklist.}
  1038. simplifyworklist.add(n);
  1039. freeze_moves(n);
  1040. end;
  1041. procedure trgobj.select_spill;
  1042. var
  1043. n : tsuperregister;
  1044. adj : psuperregisterworklist;
  1045. max,p,i:word;
  1046. begin
  1047. { We must look for the element with the most interferences in the
  1048. spillworklist. This is required because those registers are creating
  1049. the most conflicts and keeping them in a register will not reduce the
  1050. complexity and even can cause the help registers for the spilling code
  1051. to get too much conflicts with the result that the spilling code
  1052. will never converge (PFV) }
  1053. max:=0;
  1054. p:=0;
  1055. with spillworklist do
  1056. begin
  1057. {Safe: This procedure is only called if length<>0}
  1058. for i:=0 to length-1 do
  1059. begin
  1060. adj:=reginfo[buf^[i]].adjlist;
  1061. if assigned(adj) and (adj^.length>max) then
  1062. begin
  1063. p:=i;
  1064. max:=adj^.length;
  1065. end;
  1066. end;
  1067. n:=buf^[p];
  1068. deleteidx(p);
  1069. end;
  1070. simplifyworklist.add(n);
  1071. freeze_moves(n);
  1072. end;
  1073. procedure trgobj.assign_colours;
  1074. {Assign_colours assigns the actual colours to the registers.}
  1075. var adj : Psuperregisterworklist;
  1076. i,j,k : word;
  1077. n,a,c : Tsuperregister;
  1078. adj_colours,
  1079. colourednodes : Tsuperregisterset;
  1080. found : boolean;
  1081. begin
  1082. spillednodes.clear;
  1083. {Reset colours}
  1084. for n:=0 to maxreg-1 do
  1085. reginfo[n].colour:=n;
  1086. {Colour the cpu registers...}
  1087. supregset_reset(colourednodes,false);
  1088. for n:=0 to first_imaginary-1 do
  1089. supregset_include(colourednodes,n);
  1090. {Now colour the imaginary registers on the select-stack.}
  1091. for i:=selectstack.length downto 1 do
  1092. begin
  1093. n:=selectstack.buf^[i-1];
  1094. {Create a list of colours that we cannot assign to n.}
  1095. supregset_reset(adj_colours,false);
  1096. adj:=reginfo[n].adjlist;
  1097. if adj<>nil then
  1098. for j:=0 to adj^.length-1 do
  1099. begin
  1100. a:=get_alias(adj^.buf^[j]);
  1101. if supregset_in(colourednodes,a) then
  1102. supregset_include(adj_colours,reginfo[a].colour);
  1103. end;
  1104. supregset_include(adj_colours,RS_STACK_POINTER_REG);
  1105. {Assume a spill by default...}
  1106. found:=false;
  1107. {Search for a colour not in this list.}
  1108. for k:=0 to usable_registers_cnt-1 do
  1109. begin
  1110. c:=usable_registers[k];
  1111. if not(supregset_in(adj_colours,c)) then
  1112. begin
  1113. reginfo[n].colour:=c;
  1114. found:=true;
  1115. supregset_include(colourednodes,n);
  1116. include(used_in_proc,c);
  1117. break;
  1118. end;
  1119. end;
  1120. if not found then
  1121. spillednodes.add(n);
  1122. end;
  1123. {Finally colour the nodes that were coalesced.}
  1124. for i:=1 to coalescednodes.length do
  1125. begin
  1126. n:=coalescednodes.buf^[i-1];
  1127. k:=get_alias(n);
  1128. reginfo[n].colour:=reginfo[k].colour;
  1129. if reginfo[k].colour<maxcpuregister then
  1130. include(used_in_proc,reginfo[k].colour);
  1131. end;
  1132. {$ifdef ra_debug}
  1133. if aktfilepos.line=179 then
  1134. begin
  1135. writeln('colourlist');
  1136. for i:=0 to maxreg-1 do
  1137. writeln(i:4,' ',reginfo[i].colour:4)
  1138. end;
  1139. {$endif ra_debug}
  1140. end;
  1141. procedure trgobj.colour_registers;
  1142. begin
  1143. repeat
  1144. if simplifyworklist.length<>0 then
  1145. simplify
  1146. else if not(worklist_moves.empty) then
  1147. coalesce
  1148. else if freezeworklist.length<>0 then
  1149. freeze
  1150. else if spillworklist.length<>0 then
  1151. select_spill;
  1152. until (simplifyworklist.length=0) and
  1153. worklist_moves.empty and
  1154. (freezeworklist.length=0) and
  1155. (spillworklist.length=0);
  1156. assign_colours;
  1157. end;
  1158. procedure trgobj.epilogue_colouring;
  1159. var
  1160. i : Tsuperregister;
  1161. begin
  1162. worklist_moves.clear;
  1163. active_moves.destroy;
  1164. active_moves:=nil;
  1165. frozen_moves.destroy;
  1166. frozen_moves:=nil;
  1167. coalesced_moves.destroy;
  1168. coalesced_moves:=nil;
  1169. constrained_moves.destroy;
  1170. constrained_moves:=nil;
  1171. for i:=0 to maxreg-1 do
  1172. with reginfo[i] do
  1173. if movelist<>nil then
  1174. begin
  1175. dispose(movelist);
  1176. movelist:=nil;
  1177. end;
  1178. end;
  1179. procedure trgobj.clear_interferences(u:Tsuperregister);
  1180. {Remove node u from the interference graph and remove all collected
  1181. move instructions it is associated with.}
  1182. var i : word;
  1183. v : Tsuperregister;
  1184. adj,adj2 : Psuperregisterworklist;
  1185. begin
  1186. adj:=reginfo[u].adjlist;
  1187. if adj<>nil then
  1188. begin
  1189. for i:=1 to adj^.length do
  1190. begin
  1191. v:=adj^.buf^[i-1];
  1192. {Remove (u,v) and (v,u) from bitmap.}
  1193. ibitmap[u,v]:=false;
  1194. ibitmap[v,u]:=false;
  1195. {Remove (v,u) from adjacency list.}
  1196. adj2:=reginfo[v].adjlist;
  1197. if adj2<>nil then
  1198. begin
  1199. adj2^.delete(u);
  1200. if adj2^.length=0 then
  1201. begin
  1202. dispose(adj2,done);
  1203. reginfo[v].adjlist:=nil;
  1204. end;
  1205. end;
  1206. end;
  1207. {Remove ( u,* ) from adjacency list.}
  1208. dispose(adj,done);
  1209. reginfo[u].adjlist:=nil;
  1210. end;
  1211. end;
  1212. procedure trgobj.getregisterinline(list:Taasmoutput;
  1213. position:Tai;subreg:Tsubregister;var result:Tregister);
  1214. var p:Tsuperregister;
  1215. r:Tregister;
  1216. begin
  1217. p:=getnewreg(subreg);
  1218. live_registers.add(p);
  1219. r:=newreg(regtype,p,subreg);
  1220. if position=nil then
  1221. list.insert(Tai_regalloc.alloc(r))
  1222. else
  1223. list.insertafter(Tai_regalloc.alloc(r),position);
  1224. add_edges_used(p);
  1225. add_constraints(r);
  1226. result:=r;
  1227. end;
  1228. procedure trgobj.ungetregisterinline(list:Taasmoutput;
  1229. position:Tai;r:Tregister);
  1230. var supreg:Tsuperregister;
  1231. begin
  1232. supreg:=getsupreg(r);
  1233. live_registers.delete(supreg);
  1234. if position=nil then
  1235. list.insert(Tai_regalloc.dealloc(r))
  1236. else
  1237. list.insertafter(Tai_regalloc.dealloc(r),position);
  1238. end;
  1239. procedure trgobj.insert_regalloc_info(list:Taasmoutput;headertai:tai);
  1240. var
  1241. supreg : tsuperregister;
  1242. p : tai;
  1243. r : tregister;
  1244. begin
  1245. { Insert regallocs for all imaginary registers }
  1246. for supreg:=first_imaginary to maxreg-1 do
  1247. with reginfo[supreg] do
  1248. begin
  1249. r:=newreg(regtype,supreg,subreg);
  1250. if assigned(live_start) then
  1251. begin
  1252. {$ifdef EXTDEBUG}
  1253. if live_start=live_end then
  1254. Comment(V_Warning,'Register '+std_regname(r)+' is only used once');
  1255. {$endif EXTDEBUG}
  1256. list.insertbefore(Tai_regalloc.alloc(r),live_start);
  1257. { Insert live end deallocation before reg allocations
  1258. to reduce conflicts }
  1259. p:=live_end;
  1260. while assigned(p) and
  1261. assigned(p.previous) and
  1262. (tai(p.previous).typ=ait_regalloc) and
  1263. tai_regalloc(p.previous).allocation and
  1264. (tai_regalloc(p.previous).reg<>r) do
  1265. p:=tai(p.previous);
  1266. list.insertbefore(Tai_regalloc.dealloc(r),p);
  1267. end
  1268. {$ifdef EXTDEBUG}
  1269. else
  1270. Comment(V_Warning,'Register '+std_regname(r)+' not used');
  1271. {$endif EXTDEBUG}
  1272. end;
  1273. end;
  1274. procedure trgobj.add_cpu_interferences(p : tai);
  1275. begin
  1276. end;
  1277. procedure trgobj.generate_interference_graph(list:Taasmoutput;headertai:tai);
  1278. var
  1279. p : tai;
  1280. i : integer;
  1281. supreg : tsuperregister;
  1282. begin
  1283. { All allocations are available. Now we can generate the
  1284. interference graph. Walk through all instructions, we can
  1285. start with the headertai, because before the header tai is
  1286. only symbols. }
  1287. live_registers.clear;
  1288. p:=headertai;
  1289. while assigned(p) do
  1290. begin
  1291. if p.typ=ait_regalloc then
  1292. with Tai_regalloc(p) do
  1293. begin
  1294. if (getregtype(reg)=regtype) then
  1295. begin
  1296. supreg:=getsupreg(reg);
  1297. if allocation then
  1298. live_registers.add(supreg)
  1299. else
  1300. live_registers.delete(supreg);
  1301. add_edges_used(supreg);
  1302. add_constraints(reg);
  1303. end;
  1304. end;
  1305. add_cpu_interferences(p);
  1306. p:=Tai(p.next);
  1307. end;
  1308. {$ifdef EXTDEBUG}
  1309. if live_registers.length>0 then
  1310. begin
  1311. for i:=0 to live_registers.length-1 do
  1312. begin
  1313. { Only report for imaginary registers }
  1314. if live_registers.buf^[i]>=first_imaginary then
  1315. Comment(V_Warning,'Register '+std_regname(newreg(R_INTREGISTER,live_registers.buf^[i],defaultsub))+' not released');
  1316. end;
  1317. end;
  1318. {$endif}
  1319. end;
  1320. procedure Trgobj.translate_registers(list:taasmoutput);
  1321. var
  1322. hp,p,q:Tai;
  1323. i:shortint;
  1324. {$ifdef arm}
  1325. so:pshifterop;
  1326. {$endif arm}
  1327. begin
  1328. { Leave when no imaginary registers are used }
  1329. if maxreg<=first_imaginary then
  1330. exit;
  1331. p:=Tai(list.first);
  1332. while assigned(p) do
  1333. begin
  1334. case p.typ of
  1335. ait_regalloc:
  1336. with Tai_regalloc(p) do
  1337. begin
  1338. if (getregtype(reg)=regtype) then
  1339. setsupreg(reg,reginfo[getsupreg(reg)].colour);
  1340. {
  1341. Remove sequences of release and
  1342. allocation of the same register like:
  1343. # Register X released
  1344. # Register X allocated
  1345. }
  1346. if assigned(previous) and
  1347. (Tai(previous).typ=ait_regalloc) and
  1348. (Tai_regalloc(previous).reg=reg) and
  1349. { allocation,deallocation or deallocation,allocation }
  1350. (Tai_regalloc(previous).allocation xor allocation) then
  1351. begin
  1352. q:=Tai(next);
  1353. hp:=tai(previous);
  1354. list.remove(hp);
  1355. hp.free;
  1356. list.remove(p);
  1357. p.free;
  1358. p:=q;
  1359. continue;
  1360. end;
  1361. end;
  1362. ait_instruction:
  1363. with Taicpu_abstract(p) do
  1364. begin
  1365. for i:=0 to ops-1 do
  1366. with oper[i]^ do
  1367. case typ of
  1368. Top_reg:
  1369. if (getregtype(reg)=regtype) then
  1370. setsupreg(reg,reginfo[getsupreg(reg)].colour);
  1371. Top_ref:
  1372. begin
  1373. if regtype=R_INTREGISTER then
  1374. with ref^ do
  1375. begin
  1376. if base<>NR_NO then
  1377. setsupreg(base,reginfo[getsupreg(base)].colour);
  1378. if index<>NR_NO then
  1379. setsupreg(index,reginfo[getsupreg(index)].colour);
  1380. end;
  1381. end;
  1382. {$ifdef arm}
  1383. Top_shifterop:
  1384. begin
  1385. so:=shifterop;
  1386. if so^.rs<>NR_NO then
  1387. setsupreg(so^.rs,reginfo[getsupreg(so^.rs)].colour);
  1388. end;
  1389. {$endif arm}
  1390. end;
  1391. { Maybe the operation can be removed when
  1392. it is a move and both arguments are the same }
  1393. if is_same_reg_move then
  1394. begin
  1395. q:=Tai(p.next);
  1396. list.remove(p);
  1397. p.free;
  1398. p:=q;
  1399. continue;
  1400. end;
  1401. end;
  1402. end;
  1403. p:=Tai(p.next);
  1404. end;
  1405. end;
  1406. function trgobj.get_insert_pos(p:Tai;huntfor1,huntfor2,huntfor3:Tsuperregister):Tai;
  1407. var
  1408. back : Tsuperregisterworklist;
  1409. supreg : tsuperregister;
  1410. begin
  1411. back.copyfrom(live_registers);
  1412. result:=p;
  1413. while (p<>nil) and (p.typ=ait_regalloc) do
  1414. begin
  1415. supreg:=getsupreg(Tai_regalloc(p).reg);
  1416. {Rewind the register allocation.}
  1417. if Tai_regalloc(p).allocation then
  1418. live_registers.delete(supreg)
  1419. else
  1420. begin
  1421. live_registers.add(supreg);
  1422. if supreg=huntfor1 then
  1423. begin
  1424. get_insert_pos:=Tai(p.previous);
  1425. back.done;
  1426. back.copyfrom(live_registers);
  1427. end;
  1428. if supreg=huntfor2 then
  1429. begin
  1430. get_insert_pos:=Tai(p.previous);
  1431. back.done;
  1432. back.copyfrom(live_registers);
  1433. end;
  1434. if supreg=huntfor3 then
  1435. begin
  1436. get_insert_pos:=Tai(p.previous);
  1437. back.done;
  1438. back.copyfrom(live_registers);
  1439. end;
  1440. end;
  1441. p:=Tai(p.previous);
  1442. end;
  1443. live_registers.done;
  1444. live_registers:=back;
  1445. end;
  1446. procedure trgobj.forward_allocation(pfrom,pto:Tai);
  1447. var
  1448. p : tai;
  1449. begin
  1450. {Forward the register allocation again.}
  1451. p:=pfrom;
  1452. while (p<>pto) do
  1453. begin
  1454. if p.typ<>ait_regalloc then
  1455. internalerror(200305311);
  1456. if Tai_regalloc(p).allocation then
  1457. live_registers.add(getsupreg(Tai_regalloc(p).reg))
  1458. else
  1459. live_registers.delete(getsupreg(Tai_regalloc(p).reg));
  1460. p:=Tai(p.next);
  1461. end;
  1462. end;
  1463. function trgobj.spill_registers(list:Taasmoutput;headertai:tai):boolean;
  1464. { Returns true if any help registers have been used }
  1465. var
  1466. i : word;
  1467. t : tsuperregister;
  1468. p,q : Tai;
  1469. regs_to_spill_set:Tsuperregisterset;
  1470. spill_temps : ^Tspill_temp_list;
  1471. supreg : tsuperregister;
  1472. templist : taasmoutput;
  1473. begin
  1474. spill_registers:=false;
  1475. live_registers.clear;
  1476. for i:=first_imaginary to maxreg-1 do
  1477. exclude(reginfo[i].flags,ri_selected);
  1478. spill_temps:=allocmem(sizeof(treference)*maxreg);
  1479. supregset_reset(regs_to_spill_set,false);
  1480. { Allocate temps and insert in front of the list }
  1481. templist:=taasmoutput.create;
  1482. {Safe: this procedure is only called if there are spilled nodes.}
  1483. with spillednodes do
  1484. for i:=0 to length-1 do
  1485. begin
  1486. t:=buf^[i];
  1487. {Alternative representation.}
  1488. supregset_include(regs_to_spill_set,t);
  1489. {Clear all interferences of the spilled register.}
  1490. clear_interferences(t);
  1491. {Get a temp for the spilled register}
  1492. tg.gettemp(templist,4,tt_noreuse,spill_temps^[t]);
  1493. end;
  1494. list.insertlistafter(headertai,templist);
  1495. templist.free;
  1496. { Walk through all instructions, we can start with the headertai,
  1497. because before the header tai is only symbols }
  1498. p:=headertai;
  1499. while assigned(p) do
  1500. begin
  1501. case p.typ of
  1502. ait_regalloc:
  1503. with Tai_regalloc(p) do
  1504. begin
  1505. if (getregtype(reg)=regtype) then
  1506. begin
  1507. {A register allocation of a spilled register can be removed.}
  1508. supreg:=getsupreg(reg);
  1509. if supregset_in(regs_to_spill_set,supreg) then
  1510. begin
  1511. q:=Tai(p.next);
  1512. list.remove(p);
  1513. p.free;
  1514. p:=q;
  1515. continue;
  1516. end
  1517. else
  1518. if allocation then
  1519. live_registers.add(supreg)
  1520. else
  1521. live_registers.delete(supreg);
  1522. end;
  1523. end;
  1524. ait_instruction:
  1525. with Taicpu_abstract(p) do
  1526. begin
  1527. aktfilepos:=fileinfo;
  1528. if instr_spill_register(list,Taicpu_abstract(p),regs_to_spill_set,spill_temps^) then
  1529. spill_registers:=true;
  1530. end;
  1531. end;
  1532. p:=Tai(p.next);
  1533. end;
  1534. aktfilepos:=current_procinfo.exitpos;
  1535. {Safe: this procedure is only called if there are spilled nodes.}
  1536. with spillednodes do
  1537. for i:=0 to length-1 do
  1538. tg.ungettemp(list,spill_temps^[buf^[i]]);
  1539. freemem(spill_temps);
  1540. end;
  1541. procedure Trgobj.do_spill_read(list:Taasmoutput;instr:Taicpu_abstract;
  1542. pos:Tai;regidx:word;
  1543. const spilltemplist:Tspill_temp_list;
  1544. const regs:Tspillregsinfo);
  1545. var helpins:Tai;
  1546. begin
  1547. with regs[regidx] do
  1548. begin
  1549. helpins:=instr.spilling_create_load(spilltemplist[orgreg],tempreg);
  1550. if pos=nil then
  1551. list.insertafter(helpins,list.first)
  1552. else
  1553. list.insertafter(helpins,pos.next);
  1554. ungetregisterinline(list,instr,tempreg);
  1555. forward_allocation(tai(helpins.next),instr);
  1556. end;
  1557. end;
  1558. procedure Trgobj.do_spill_written(list:Taasmoutput;instr:Taicpu_abstract;
  1559. pos:Tai;regidx:word;
  1560. const spilltemplist:Tspill_temp_list;
  1561. const regs:Tspillregsinfo);
  1562. var helpins:Tai;
  1563. begin
  1564. with regs[regidx] do
  1565. begin
  1566. helpins:=instr.spilling_create_store(tempreg,spilltemplist[orgreg]);
  1567. list.insertafter(helpins,instr);
  1568. ungetregisterinline(list,helpins,tempreg);
  1569. end;
  1570. end;
  1571. procedure Trgobj.do_spill_readwritten(list:Taasmoutput;instr:Taicpu_abstract;
  1572. pos:Tai;regidx:word;
  1573. const spilltemplist:Tspill_temp_list;
  1574. const regs:Tspillregsinfo);
  1575. var helpins1,helpins2:Tai;
  1576. begin
  1577. with regs[regidx] do
  1578. begin
  1579. helpins1:=instr.spilling_create_load(spilltemplist[orgreg],tempreg);
  1580. if pos=nil then
  1581. list.insertafter(helpins1,list.first)
  1582. else
  1583. list.insertafter(helpins1,pos.next);
  1584. helpins2:=instr.spilling_create_store(tempreg,spilltemplist[orgreg]);
  1585. list.insertafter(helpins2,instr);
  1586. ungetregisterinline(list,helpins2,tempreg);
  1587. forward_allocation(tai(helpins1.next),instr);
  1588. end;
  1589. end;
  1590. function trgobj.instr_spill_register(list:Taasmoutput;
  1591. instr:taicpu_abstract;
  1592. const r:Tsuperregisterset;
  1593. const spilltemplist:Tspill_temp_list): boolean;
  1594. var
  1595. counter, regindex: longint;
  1596. pos: tai;
  1597. regs: tspillregsinfo;
  1598. spilled: boolean;
  1599. procedure addreginfo(reg: tsuperregister; operation: topertype);
  1600. var
  1601. i, tmpindex: longint;
  1602. begin
  1603. tmpindex := regindex;
  1604. // did we already encounter this register?
  1605. for i := 0 to pred(regindex) do
  1606. if (regs[i].orgreg = reg) then
  1607. begin
  1608. tmpindex := i;
  1609. break;
  1610. end;
  1611. if tmpindex > high(regs) then
  1612. internalerror(2003120301);
  1613. regs[tmpindex].orgreg := reg;
  1614. if supregset_in(r,reg) then
  1615. begin
  1616. // add/update info on this register
  1617. regs[tmpindex].mustbespilled := true;
  1618. case operation of
  1619. operand_read:
  1620. regs[tmpindex].regread := true;
  1621. operand_write:
  1622. regs[tmpindex].regwritten := true;
  1623. operand_readwrite:
  1624. begin
  1625. regs[tmpindex].regread := true;
  1626. regs[tmpindex].regwritten := true;
  1627. end;
  1628. end;
  1629. spilled := true;
  1630. end;
  1631. inc(regindex,ord(regindex=tmpindex));
  1632. end;
  1633. procedure tryreplacereg(var reg: tregister);
  1634. var
  1635. i: longint;
  1636. supreg: tsuperregister;
  1637. begin
  1638. if (getregtype(reg) = R_INTREGISTER) then
  1639. begin
  1640. supreg := getsupreg(reg);
  1641. for i := 0 to pred(regindex) do
  1642. if (regs[i].mustbespilled) and
  1643. (regs[i].orgreg = supreg) then
  1644. begin
  1645. reg := regs[i].tempreg;
  1646. break;
  1647. end;
  1648. end;
  1649. end;
  1650. begin
  1651. result := false;
  1652. fillchar(regs,sizeof(regs),0);
  1653. for counter := low(regs) to high(regs) do
  1654. regs[counter].orgreg := RS_INVALID;
  1655. spilled := false;
  1656. regindex := 0;
  1657. { check whether and if so which and how (read/written) this instructions contains
  1658. registers that must be spilled }
  1659. for counter := 0 to instr.ops-1 do
  1660. with instr.oper[counter]^ do
  1661. begin
  1662. case typ of
  1663. top_reg:
  1664. begin
  1665. if (getregtype(reg) = regtype) then
  1666. addreginfo(getsupreg(reg),instr.spilling_get_operation_type(counter));
  1667. end;
  1668. top_ref:
  1669. begin
  1670. if regtype in [R_INTREGISTER,R_ADDRESSREGISTER] then
  1671. with ref^ do
  1672. begin
  1673. if (base <> NR_NO) then
  1674. addreginfo(getsupreg(base),operand_read);
  1675. if (index <> NR_NO) then
  1676. addreginfo(getsupreg(index),operand_read);
  1677. end;
  1678. end;
  1679. {$ifdef ARM}
  1680. top_shifterop:
  1681. begin
  1682. if shifterop^.rs<>NR_NO then
  1683. addreginfo(getsupreg(shifterop^.rs),operand_read);
  1684. end;
  1685. {$endif ARM}
  1686. end;
  1687. end;
  1688. { if no spilling for this instruction we can leave }
  1689. if not spilled then
  1690. exit;
  1691. { generate the spilling code }
  1692. result := true;
  1693. for counter := 0 to pred(regindex) do
  1694. with regs[counter] do
  1695. begin
  1696. if mustbespilled then
  1697. begin
  1698. pos:=get_insert_pos(Tai(instr.previous),regs[0].orgreg,regs[1].orgreg,regs[2].orgreg);
  1699. getregisterinline(list,pos,defaultsub,tempreg);
  1700. if regread then
  1701. if regwritten then
  1702. do_spill_readwritten(list,instr,pos,counter,spilltemplist,regs)
  1703. else
  1704. do_spill_read(list,instr,pos,counter,spilltemplist,regs)
  1705. else
  1706. do_spill_written(list,instr,pos,counter,spilltemplist,regs)
  1707. end;
  1708. end;
  1709. { substitute registers }
  1710. for counter := 0 to instr.ops-1 do
  1711. with instr.oper[counter]^ do
  1712. begin
  1713. case typ of
  1714. top_reg:
  1715. begin
  1716. tryreplacereg(reg);
  1717. end;
  1718. top_ref:
  1719. begin
  1720. tryreplacereg(ref^.base);
  1721. tryreplacereg(ref^.index);
  1722. end;
  1723. {$ifdef ARM}
  1724. top_shifterop:
  1725. begin
  1726. tryreplacereg(shifterop^.rs);
  1727. end;
  1728. {$endif ARM}
  1729. end;
  1730. end;
  1731. end;
  1732. end.
  1733. {
  1734. $Log$
  1735. Revision 1.118 2004-02-07 23:28:34 daniel
  1736. * Take advantage of our new with statement optimization
  1737. Revision 1.117 2004/02/06 13:34:46 daniel
  1738. * Some changes to better accomodate very large movelists
  1739. * movelist resizing now exponential (avoids heap fragmentation, saves
  1740. 300 kb memory in make cycle)
  1741. * Trgobj.combine hand-optimized (still too slow)
  1742. Revision 1.116 2004/01/28 22:16:31 peter
  1743. * more record alignment fixes
  1744. Revision 1.115 2004/01/26 17:40:11 florian
  1745. * made DoSpill* overrideable
  1746. + add_cpu_interferences added
  1747. Revision 1.114 2004/01/26 16:12:28 daniel
  1748. * reginfo now also only allocated during register allocation
  1749. * third round of gdb cleanups: kick out most of concatstabto
  1750. Revision 1.112 2004/01/12 16:37:59 peter
  1751. * moved spilling code from taicpu to rg
  1752. Revision 1.109 2003/12/26 14:02:30 peter
  1753. * sparc updates
  1754. * use registertype in spill_register
  1755. Revision 1.108 2003/12/22 23:09:34 peter
  1756. * only report unreleased imaginary registers
  1757. Revision 1.107 2003/12/22 22:13:46 peter
  1758. * made decrease_degree working, but not really fixed
  1759. Revision 1.106 2003/12/18 17:06:21 florian
  1760. * arm compiler compilation fixed
  1761. Revision 1.105 2003/12/17 21:59:05 peter
  1762. * don't insert dealloc before alloc of the same register
  1763. Revision 1.104 2003/12/16 09:41:44 daniel
  1764. * Automatic conversion from integer constants to pointer constants is no
  1765. longer done except in Delphi mode
  1766. Revision 1.103 2003/12/15 21:25:49 peter
  1767. * reg allocations for imaginary register are now inserted just
  1768. before reg allocation
  1769. * tregister changed to enum to allow compile time check
  1770. * fixed several tregister-tsuperregister errors
  1771. Revision 1.102 2003/12/15 16:37:47 daniel
  1772. * More microoptimizations
  1773. Revision 1.101 2003/12/15 15:58:58 peter
  1774. * fix statedebug compile
  1775. Revision 1.100 2003/12/14 20:24:28 daniel
  1776. * Register allocator speed optimizations
  1777. - Worklist no longer a ringbuffer
  1778. - No find operations are left
  1779. - Simplify now done in constant time
  1780. - unusedregs is now a Tsuperregisterworklist
  1781. - Microoptimizations
  1782. Revision 1.99 2003/12/12 17:16:17 peter
  1783. * rg[tregistertype] added in tcg
  1784. Revision 1.98 2003/12/04 23:27:32 peter
  1785. * remove redundant calls to add_edge_used
  1786. Revision 1.97 2003/11/29 17:36:41 peter
  1787. * check for add_move_instruction
  1788. Revision 1.96 2003/11/24 15:17:37 florian
  1789. * changed some types to prevend range check errors
  1790. Revision 1.95 2003/11/10 19:05:50 peter
  1791. * fixed alias/colouring > 255
  1792. Revision 1.94 2003/11/07 15:58:32 florian
  1793. * Florian's culmutative nr. 1; contains:
  1794. - invalid calling conventions for a certain cpu are rejected
  1795. - arm softfloat calling conventions
  1796. - -Sp for cpu dependend code generation
  1797. - several arm fixes
  1798. - remaining code for value open array paras on heap
  1799. Revision 1.93 2003/10/30 16:22:40 peter
  1800. * call firstpass before allocation and codegeneration is started
  1801. * move leftover code from pass_2.generatecode() to psub
  1802. Revision 1.92 2003/10/29 21:29:14 jonas
  1803. * some ALLOWDUPREG improvements
  1804. Revision 1.91 2003/10/21 15:15:36 peter
  1805. * taicpu_abstract.oper[] changed to pointers
  1806. Revision 1.90 2003/10/19 12:36:36 florian
  1807. * improved speed; reduced memory usage of the interference bitmap
  1808. Revision 1.89 2003/10/19 01:34:30 florian
  1809. * some ppc stuff fixed
  1810. * memory leak fixed
  1811. Revision 1.88 2003/10/18 15:41:26 peter
  1812. * made worklists dynamic in size
  1813. Revision 1.87 2003/10/17 16:16:08 peter
  1814. * fixed last commit
  1815. Revision 1.86 2003/10/17 15:25:18 florian
  1816. * fixed more ppc stuff
  1817. Revision 1.85 2003/10/17 14:38:32 peter
  1818. * 64k registers supported
  1819. * fixed some memory leaks
  1820. Revision 1.84 2003/10/11 16:06:42 florian
  1821. * fixed some MMX<->SSE
  1822. * started to fix ppc, needs an overhaul
  1823. + stabs info improve for spilling, not sure if it works correctly/completly
  1824. - MMX_SUPPORT removed from Makefile.fpc
  1825. Revision 1.83 2003/10/10 17:48:14 peter
  1826. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  1827. * tregisteralloctor renamed to trgobj
  1828. * removed rgobj from a lot of units
  1829. * moved location_* and reference_* to cgobj
  1830. * first things for mmx register allocation
  1831. Revision 1.82 2003/10/09 21:31:37 daniel
  1832. * Register allocator splitted, ans abstract now
  1833. Revision 1.81 2003/10/01 20:34:49 peter
  1834. * procinfo unit contains tprocinfo
  1835. * cginfo renamed to cgbase
  1836. * moved cgmessage to verbose
  1837. * fixed ppc and sparc compiles
  1838. Revision 1.80 2003/09/30 19:54:42 peter
  1839. * reuse registers with the least conflicts
  1840. Revision 1.79 2003/09/29 20:58:56 peter
  1841. * optimized releasing of registers
  1842. Revision 1.78 2003/09/28 13:41:12 peter
  1843. * return reg 255 when allowdupreg is defined
  1844. Revision 1.77 2003/09/25 16:19:32 peter
  1845. * fix filepositions
  1846. * insert spill temp allocations at the start of the proc
  1847. Revision 1.76 2003/09/16 16:17:01 peter
  1848. * varspez in calls to push_addr_param
  1849. Revision 1.75 2003/09/12 19:07:42 daniel
  1850. * Fixed fast spilling functionality by re-adding the code that initializes
  1851. precoloured nodes to degree 255. I would like to play hangman on the one
  1852. who removed that code.
  1853. Revision 1.74 2003/09/11 11:54:59 florian
  1854. * improved arm code generation
  1855. * move some protected and private field around
  1856. * the temp. register for register parameters/arguments are now released
  1857. before the move to the parameter register is done. This improves
  1858. the code in a lot of cases.
  1859. Revision 1.73 2003/09/09 20:59:27 daniel
  1860. * Adding register allocation order
  1861. Revision 1.72 2003/09/09 15:55:44 peter
  1862. * use register with least interferences in spillregister
  1863. Revision 1.71 2003/09/07 22:09:35 peter
  1864. * preparations for different default calling conventions
  1865. * various RA fixes
  1866. Revision 1.70 2003/09/03 21:06:45 peter
  1867. * fixes for FPU register allocation
  1868. Revision 1.69 2003/09/03 15:55:01 peter
  1869. * NEWRA branch merged
  1870. Revision 1.68 2003/09/03 11:18:37 florian
  1871. * fixed arm concatcopy
  1872. + arm support in the common compiler sources added
  1873. * moved some generic cg code around
  1874. + tfputype added
  1875. * ...
  1876. Revision 1.67.2.5 2003/08/31 20:44:07 peter
  1877. * fixed getexplicitregisterint tregister value
  1878. Revision 1.67.2.4 2003/08/31 20:40:50 daniel
  1879. * Fixed add_edges_used
  1880. Revision 1.67.2.3 2003/08/29 17:28:59 peter
  1881. * next batch of updates
  1882. Revision 1.67.2.2 2003/08/28 18:35:08 peter
  1883. * tregister changed to cardinal
  1884. Revision 1.67.2.1 2003/08/27 19:55:54 peter
  1885. * first tregister patch
  1886. Revision 1.67 2003/08/23 10:46:21 daniel
  1887. * Register allocator bugfix for h2pas
  1888. Revision 1.66 2003/08/17 16:59:20 jonas
  1889. * fixed regvars so they work with newra (at least for ppc)
  1890. * fixed some volatile register bugs
  1891. + -dnotranslation option for -dnewra, which causes the registers not to
  1892. be translated from virtual to normal registers. Requires support in
  1893. the assembler writer as well, which is only implemented in aggas/
  1894. agppcgas currently
  1895. Revision 1.65 2003/08/17 14:32:48 daniel
  1896. * Precoloured nodes now have an infinite degree approached with 255,
  1897. like they should.
  1898. Revision 1.64 2003/08/17 08:48:02 daniel
  1899. * Another register allocator bug fixed.
  1900. * usable_registers_cnt set to 6 for i386
  1901. Revision 1.63 2003/08/09 18:56:54 daniel
  1902. * cs_regalloc renamed to cs_regvars to avoid confusion with register
  1903. allocator
  1904. * Some preventive changes to i386 spillinh code
  1905. Revision 1.62 2003/08/03 14:09:50 daniel
  1906. * Fixed a register allocator bug
  1907. * Figured out why -dnewra generates superfluous "mov reg1,reg2"
  1908. statements: changes in location_force. These moves are now no longer
  1909. constrained so they are optimized away.
  1910. Revision 1.61 2003/07/21 13:32:39 jonas
  1911. * add_edges_used() is now also called for registers allocated with
  1912. getexplicitregisterint()
  1913. * writing the intereference graph is now only done with -dradebug2 and
  1914. the created files are now called "igraph.<module_name>"
  1915. Revision 1.60 2003/07/06 15:31:21 daniel
  1916. * Fixed register allocator. *Lots* of fixes.
  1917. Revision 1.59 2003/07/06 15:00:47 jonas
  1918. * fixed my previous completely broken commit. It's not perfect though,
  1919. registers > last_int_supreg and < max_intreg may still be "translated"
  1920. Revision 1.58 2003/07/06 14:45:05 jonas
  1921. * support integer registers that are not managed by newra (ie. don't
  1922. translate register numbers that fall outside the range
  1923. first_int_supreg..last_int_supreg)
  1924. Revision 1.57 2003/07/02 22:18:04 peter
  1925. * paraloc splitted in callerparaloc,calleeparaloc
  1926. * sparc calling convention updates
  1927. Revision 1.56 2003/06/17 16:34:44 jonas
  1928. * lots of newra fixes (need getfuncretparaloc implementation for i386)!
  1929. * renamed all_intregisters to volatile_intregisters and made it
  1930. processor dependent
  1931. Revision 1.55 2003/06/14 14:53:50 jonas
  1932. * fixed newra cycle for x86
  1933. * added constants for indicating source and destination operands of the
  1934. "move reg,reg" instruction to aasmcpu (and use those in rgobj)
  1935. Revision 1.54 2003/06/13 21:19:31 peter
  1936. * current_procdef removed, use current_procinfo.procdef instead
  1937. Revision 1.53 2003/06/12 21:11:10 peter
  1938. * ungetregisterfpu gets size parameter
  1939. Revision 1.52 2003/06/12 16:43:07 peter
  1940. * newra compiles for sparc
  1941. Revision 1.51 2003/06/09 14:54:26 jonas
  1942. * (de)allocation of registers for parameters is now performed properly
  1943. (and checked on the ppc)
  1944. - removed obsolete allocation of all parameter registers at the start
  1945. of a procedure (and deallocation at the end)
  1946. Revision 1.50 2003/06/03 21:11:09 peter
  1947. * cg.a_load_* get a from and to size specifier
  1948. * makeregsize only accepts newregister
  1949. * i386 uses generic tcgnotnode,tcgunaryminus
  1950. Revision 1.49 2003/06/03 13:01:59 daniel
  1951. * Register allocator finished
  1952. Revision 1.48 2003/06/01 21:38:06 peter
  1953. * getregisterfpu size parameter added
  1954. * op_const_reg size parameter added
  1955. * sparc updates
  1956. Revision 1.47 2003/05/31 20:31:11 jonas
  1957. * set inital costs of assigning a variable to a register to 120 for
  1958. non-i386, because the used register must be store to memory at the
  1959. start and loaded again at the end
  1960. Revision 1.46 2003/05/30 18:55:21 jonas
  1961. * fixed several regvar related bugs for non-i386. make cycle with -Or now
  1962. works for ppc
  1963. Revision 1.45 2003/05/30 12:36:13 jonas
  1964. * use as little different registers on the ppc until newra is released,
  1965. since every used register must be saved
  1966. Revision 1.44 2003/05/17 13:30:08 jonas
  1967. * changed tt_persistant to tt_persistent :)
  1968. * tempcreatenode now doesn't accept a boolean anymore for persistent
  1969. temps, but a ttemptype, so you can also create ansistring temps etc
  1970. Revision 1.43 2003/05/16 14:33:31 peter
  1971. * regvar fixes
  1972. Revision 1.42 2003/04/26 20:03:49 daniel
  1973. * Bug fix in simplify
  1974. Revision 1.41 2003/04/25 20:59:35 peter
  1975. * removed funcretn,funcretsym, function result is now in varsym
  1976. and aliases for result and function name are added using absolutesym
  1977. * vs_hidden parameter for funcret passed in parameter
  1978. * vs_hidden fixes
  1979. * writenode changed to printnode and released from extdebug
  1980. * -vp option added to generate a tree.log with the nodetree
  1981. * nicer printnode for statements, callnode
  1982. Revision 1.40 2003/04/25 08:25:26 daniel
  1983. * Ifdefs around a lot of calls to cleartempgen
  1984. * Fixed registers that are allocated but not freed in several nodes
  1985. * Tweak to register allocator to cause less spills
  1986. * 8-bit registers now interfere with esi,edi and ebp
  1987. Compiler can now compile rtl successfully when using new register
  1988. allocator
  1989. Revision 1.39 2003/04/23 20:23:06 peter
  1990. * compile fix for no-newra
  1991. Revision 1.38 2003/04/23 14:42:07 daniel
  1992. * Further register allocator work. Compiler now smaller with new
  1993. allocator than without.
  1994. * Somebody forgot to adjust ppu version number
  1995. Revision 1.37 2003/04/22 23:50:23 peter
  1996. * firstpass uses expectloc
  1997. * checks if there are differences between the expectloc and
  1998. location.loc from secondpass in EXTDEBUG
  1999. Revision 1.36 2003/04/22 10:09:35 daniel
  2000. + Implemented the actual register allocator
  2001. + Scratch registers unavailable when new register allocator used
  2002. + maybe_save/maybe_restore unavailable when new register allocator used
  2003. Revision 1.35 2003/04/21 19:16:49 peter
  2004. * count address regs separate
  2005. Revision 1.34 2003/04/17 16:48:21 daniel
  2006. * Added some code to keep track of move instructions in register
  2007. allocator
  2008. Revision 1.33 2003/04/17 07:50:24 daniel
  2009. * Some work on interference graph construction
  2010. Revision 1.32 2003/03/28 19:16:57 peter
  2011. * generic constructor working for i386
  2012. * remove fixed self register
  2013. * esi added as address register for i386
  2014. Revision 1.31 2003/03/11 21:46:24 jonas
  2015. * lots of new regallocator fixes, both in generic and ppc-specific code
  2016. (ppc compiler still can't compile the linux system unit though)
  2017. Revision 1.30 2003/03/09 21:18:59 olle
  2018. + added cutils to the uses clause
  2019. Revision 1.29 2003/03/08 20:36:41 daniel
  2020. + Added newra version of Ti386shlshrnode
  2021. + Added interference graph construction code
  2022. Revision 1.28 2003/03/08 13:59:16 daniel
  2023. * Work to handle new register notation in ag386nsm
  2024. + Added newra version of Ti386moddivnode
  2025. Revision 1.27 2003/03/08 10:53:48 daniel
  2026. * Created newra version of secondmul in n386add.pas
  2027. Revision 1.26 2003/03/08 08:59:07 daniel
  2028. + $define newra will enable new register allocator
  2029. + getregisterint will return imaginary registers with $newra
  2030. + -sr switch added, will skip register allocation so you can see
  2031. the direct output of the code generator before register allocation
  2032. Revision 1.25 2003/02/26 20:50:45 daniel
  2033. * Fixed ungetreference
  2034. Revision 1.24 2003/02/19 22:39:56 daniel
  2035. * Fixed a few issues
  2036. Revision 1.23 2003/02/19 22:00:14 daniel
  2037. * Code generator converted to new register notation
  2038. - Horribily outdated todo.txt removed
  2039. Revision 1.22 2003/02/02 19:25:54 carl
  2040. * Several bugfixes for m68k target (register alloc., opcode emission)
  2041. + VIS target
  2042. + Generic add more complete (still not verified)
  2043. Revision 1.21 2003/01/08 18:43:57 daniel
  2044. * Tregister changed into a record
  2045. Revision 1.20 2002/10/05 12:43:28 carl
  2046. * fixes for Delphi 6 compilation
  2047. (warning : Some features do not work under Delphi)
  2048. Revision 1.19 2002/08/23 16:14:49 peter
  2049. * tempgen cleanup
  2050. * tt_noreuse temp type added that will be used in genentrycode
  2051. Revision 1.18 2002/08/17 22:09:47 florian
  2052. * result type handling in tcgcal.pass_2 overhauled
  2053. * better tnode.dowrite
  2054. * some ppc stuff fixed
  2055. Revision 1.17 2002/08/17 09:23:42 florian
  2056. * first part of procinfo rewrite
  2057. Revision 1.16 2002/08/06 20:55:23 florian
  2058. * first part of ppc calling conventions fix
  2059. Revision 1.15 2002/08/05 18:27:48 carl
  2060. + more more more documentation
  2061. + first version include/exclude (can't test though, not enough scratch for i386 :()...
  2062. Revision 1.14 2002/08/04 19:06:41 carl
  2063. + added generic exception support (still does not work!)
  2064. + more documentation
  2065. Revision 1.13 2002/07/07 09:52:32 florian
  2066. * powerpc target fixed, very simple units can be compiled
  2067. * some basic stuff for better callparanode handling, far from being finished
  2068. Revision 1.12 2002/07/01 18:46:26 peter
  2069. * internal linker
  2070. * reorganized aasm layer
  2071. Revision 1.11 2002/05/18 13:34:17 peter
  2072. * readded missing revisions
  2073. Revision 1.10 2002/05/16 19:46:44 carl
  2074. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  2075. + try to fix temp allocation (still in ifdef)
  2076. + generic constructor calls
  2077. + start of tassembler / tmodulebase class cleanup
  2078. Revision 1.8 2002/04/21 15:23:03 carl
  2079. + makeregsize
  2080. + changeregsize is now a local routine
  2081. Revision 1.7 2002/04/20 21:32:25 carl
  2082. + generic FPC_CHECKPOINTER
  2083. + first parameter offset in stack now portable
  2084. * rename some constants
  2085. + move some cpu stuff to other units
  2086. - remove unused constents
  2087. * fix stacksize for some targets
  2088. * fix generic size problems which depend now on EXTEND_SIZE constant
  2089. Revision 1.6 2002/04/15 19:03:31 carl
  2090. + reg2str -> std_reg2str()
  2091. Revision 1.5 2002/04/06 18:13:01 jonas
  2092. * several powerpc-related additions and fixes
  2093. Revision 1.4 2002/04/04 19:06:04 peter
  2094. * removed unused units
  2095. * use tlocation.size in cg.a_*loc*() routines
  2096. Revision 1.3 2002/04/02 17:11:29 peter
  2097. * tlocation,treference update
  2098. * LOC_CONSTANT added for better constant handling
  2099. * secondadd splitted in multiple routines
  2100. * location_force_reg added for loading a location to a register
  2101. of a specified size
  2102. * secondassignment parses now first the right and then the left node
  2103. (this is compatible with Kylix). This saves a lot of push/pop especially
  2104. with string operations
  2105. * adapted some routines to use the new cg methods
  2106. Revision 1.2 2002/04/01 19:24:25 jonas
  2107. * fixed different parameter name in interface and implementation
  2108. declaration of a method (only 1.0.x detected this)
  2109. Revision 1.1 2002/03/31 20:26:36 jonas
  2110. + a_loadfpu_* and a_loadmm_* methods in tcg
  2111. * register allocation is now handled by a class and is mostly processor
  2112. independent (+rgobj.pas and i386/rgcpu.pas)
  2113. * temp allocation is now handled by a class (+tgobj.pas, -i386\tgcpu.pas)
  2114. * some small improvements and fixes to the optimizer
  2115. * some register allocation fixes
  2116. * some fpuvaroffset fixes in the unary minus node
  2117. * push/popusedregisters is now called rg.save/restoreusedregisters and
  2118. (for i386) uses temps instead of push/pop's when using -Op3 (that code is
  2119. also better optimizable)
  2120. * fixed and optimized register saving/restoring for new/dispose nodes
  2121. * LOC_FPU locations now also require their "register" field to be set to
  2122. R_ST, not R_ST0 (the latter is used for LOC_CFPUREGISTER locations only)
  2123. - list field removed of the tnode class because it's not used currently
  2124. and can cause hard-to-find bugs
  2125. }