|  | @@ -123,7 +123,7 @@ unit rgobj;
 | 
	
		
			
				|  |  |        end;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        Tmovelist=record
 | 
	
		
			
				|  |  | -        count:cardinal;
 | 
	
		
			
				|  |  | +        count,sorted_until:cardinal;
 | 
	
		
			
				|  |  |          data:array[0..$ffff] of Tlinkedlistitem;
 | 
	
		
			
				|  |  |        end;
 | 
	
		
			
				|  |  |        Pmovelist=^Tmovelist;
 | 
	
	
		
			
				|  | @@ -311,6 +311,42 @@ implementation
 | 
	
		
			
				|  |  |         globals,verbose,tgobj,procinfo;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    procedure sort_movelist(ml:Pmovelist);
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    {Ok, sorting pointers is silly, but it does the job to make Trgobj.combine
 | 
	
		
			
				|  |  | +     faster.}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    var h,i,p:word;
 | 
	
		
			
				|  |  | +        t:Tlinkedlistitem;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    begin
 | 
	
		
			
				|  |  | +      with ml^ do
 | 
	
		
			
				|  |  | +        begin
 | 
	
		
			
				|  |  | +          if count<2 then
 | 
	
		
			
				|  |  | +            exit;
 | 
	
		
			
				|  |  | +          p:=1;
 | 
	
		
			
				|  |  | +          while 2*p<count do
 | 
	
		
			
				|  |  | +            p:=2*p;
 | 
	
		
			
				|  |  | +          while p<>0 do
 | 
	
		
			
				|  |  | +            begin
 | 
	
		
			
				|  |  | +              for h:=p to count-1 do
 | 
	
		
			
				|  |  | +                begin
 | 
	
		
			
				|  |  | +                  i:=h;
 | 
	
		
			
				|  |  | +                  t:=data[i];
 | 
	
		
			
				|  |  | +                  repeat
 | 
	
		
			
				|  |  | +                    if ptrint(data[i-p])<=ptrint(t) then
 | 
	
		
			
				|  |  | +                      break;
 | 
	
		
			
				|  |  | +                    data[i]:=data[i-p];
 | 
	
		
			
				|  |  | +                    dec(i,p);
 | 
	
		
			
				|  |  | +                  until i<p;
 | 
	
		
			
				|  |  | +                  data[i]:=t;
 | 
	
		
			
				|  |  | +                end;
 | 
	
		
			
				|  |  | +              p:=p shr 1;
 | 
	
		
			
				|  |  | +            end;
 | 
	
		
			
				|  |  | +          sorted_until:=count-1;
 | 
	
		
			
				|  |  | +        end;
 | 
	
		
			
				|  |  | +    end;
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  {******************************************************************************
 | 
	
		
			
				|  |  |                                tinterferencebitmap
 | 
	
		
			
				|  |  |  ******************************************************************************}
 | 
	
	
		
			
				|  | @@ -664,11 +700,12 @@ implementation
 | 
	
		
			
				|  |  |              begin
 | 
	
		
			
				|  |  |                getmem(movelist,64);
 | 
	
		
			
				|  |  |                movelist^.count:=0;
 | 
	
		
			
				|  |  | +              movelist^.sorted_until:=0;
 | 
	
		
			
				|  |  |              end
 | 
	
		
			
				|  |  |            else
 | 
	
		
			
				|  |  |              begin
 | 
	
		
			
				|  |  |                cursize:=memsize(movelist);
 | 
	
		
			
				|  |  | -              if (4*(movelist^.count+1)=cursize) then
 | 
	
		
			
				|  |  | +              if (4*(movelist^.count+2)=cursize) then
 | 
	
		
			
				|  |  |                  reallocmem(movelist,cursize*2);
 | 
	
		
			
				|  |  |              end;
 | 
	
		
			
				|  |  |            movelist^.data[movelist^.count]:=data;
 | 
	
	
		
			
				|  | @@ -742,9 +779,9 @@ implementation
 | 
	
		
			
				|  |  |       registers in it cause. This allows simplify to execute in
 | 
	
		
			
				|  |  |       constant time.}
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    var p,h,i,j,leni,lenj:word;
 | 
	
		
			
				|  |  | +    var p,h,i,leni,lent:word;
 | 
	
		
			
				|  |  |          t:Tsuperregister;
 | 
	
		
			
				|  |  | -        adji,adjj:Psuperregisterworklist;
 | 
	
		
			
				|  |  | +        adji,adjt:Psuperregisterworklist;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      begin
 | 
	
		
			
				|  |  |        with simplifyworklist do
 | 
	
	
		
			
				|  | @@ -756,30 +793,25 @@ implementation
 | 
	
		
			
				|  |  |              p:=2*p;
 | 
	
		
			
				|  |  |            while p<>0 do
 | 
	
		
			
				|  |  |              begin
 | 
	
		
			
				|  |  | -              for h:=0 to length-p-1 do
 | 
	
		
			
				|  |  | +              for h:=p to length-1 do
 | 
	
		
			
				|  |  |                  begin
 | 
	
		
			
				|  |  |                    i:=h;
 | 
	
		
			
				|  |  | +                  t:=buf^[i];
 | 
	
		
			
				|  |  | +                  adjt:=reginfo[buf^[i]].adjlist;
 | 
	
		
			
				|  |  | +                  lent:=0;
 | 
	
		
			
				|  |  | +                  if adjt<>nil then
 | 
	
		
			
				|  |  | +                    lent:=adjt^.length;
 | 
	
		
			
				|  |  |                    repeat
 | 
	
		
			
				|  |  | -                    j:=i+p;
 | 
	
		
			
				|  |  | -                    adji:=reginfo[buf^[i]].adjlist;
 | 
	
		
			
				|  |  | -                    adjj:=reginfo[buf^[j]].adjlist;
 | 
	
		
			
				|  |  | -                    if adji=nil then
 | 
	
		
			
				|  |  | -                      leni:=0
 | 
	
		
			
				|  |  | -                    else
 | 
	
		
			
				|  |  | +                    adji:=reginfo[buf^[i-p]].adjlist;
 | 
	
		
			
				|  |  | +                    leni:=0;
 | 
	
		
			
				|  |  | +                    if adji<>nil then
 | 
	
		
			
				|  |  |                        leni:=adji^.length;
 | 
	
		
			
				|  |  | -                    if adjj=nil then
 | 
	
		
			
				|  |  | -                      lenj:=0
 | 
	
		
			
				|  |  | -                    else
 | 
	
		
			
				|  |  | -                      lenj:=adjj^.length;
 | 
	
		
			
				|  |  | -                    if lenj>=leni then
 | 
	
		
			
				|  |  | -                      break;
 | 
	
		
			
				|  |  | -                    t:=buf^[i];
 | 
	
		
			
				|  |  | -                    buf^[i]:=buf^[j];
 | 
	
		
			
				|  |  | -                    buf^[j]:=t;
 | 
	
		
			
				|  |  | -                    if i<p then
 | 
	
		
			
				|  |  | +                    if leni<=lent then
 | 
	
		
			
				|  |  |                        break;
 | 
	
		
			
				|  |  | +                    buf^[i]:=buf^[i-p];
 | 
	
		
			
				|  |  |                      dec(i,p)
 | 
	
		
			
				|  |  | -                  until false;
 | 
	
		
			
				|  |  | +                  until i<p;
 | 
	
		
			
				|  |  | +                  buf^[i]:=t;
 | 
	
		
			
				|  |  |                  end;
 | 
	
		
			
				|  |  |                p:=p shr 1;
 | 
	
		
			
				|  |  |              end;
 | 
	
	
		
			
				|  | @@ -1009,12 +1041,9 @@ implementation
 | 
	
		
			
				|  |  |      procedure trgobj.combine(u,v:Tsuperregister);
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      var adj : Psuperregisterworklist;
 | 
	
		
			
				|  |  | -        i : word;
 | 
	
		
			
				|  |  | +        i,n,p,q:cardinal;
 | 
	
		
			
				|  |  |          t : tsuperregister;
 | 
	
		
			
				|  |  | -        n,o : cardinal;
 | 
	
		
			
				|  |  | -        decrement : boolean;
 | 
	
		
			
				|  |  | -{	moves:Tsuperregisterset;}
 | 
	
		
			
				|  |  | -        vm:Pmovelist;
 | 
	
		
			
				|  |  | +        searched:Tlinkedlistitem;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      label l1;
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1028,50 +1057,53 @@ implementation
 | 
	
		
			
				|  |  |        {Combine both movelists. Since the movelists are sets, only add
 | 
	
		
			
				|  |  |         elements that are not already present. The movelists cannot be
 | 
	
		
			
				|  |  |         empty by definition; nodes are only coalesced if there is a move
 | 
	
		
			
				|  |  | -       between them.}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -{     Nice attempt; it didn't work.
 | 
	
		
			
				|  |  | -      supregset_reset(moves,false);
 | 
	
		
			
				|  |  | -      supregset_include(moves,u);
 | 
	
		
			
				|  |  | +       between them. To prevent quadratic time blowup (movelists of
 | 
	
		
			
				|  |  | +       especially machine registers can get very large because of moves
 | 
	
		
			
				|  |  | +       generated during calls) we need to go into disgusting complexity.
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +       (See webtbs/tw2242 for an example that stresses this.)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +       We want to sort the movelist to be able to search logarithmically.
 | 
	
		
			
				|  |  | +       Unfortunately, sorting the movelist every time before searching
 | 
	
		
			
				|  |  | +       is counter-productive, since the movelist usually grows with a few
 | 
	
		
			
				|  |  | +       items at a time. Therefore, we split the movelist into a sorted
 | 
	
		
			
				|  |  | +       and an unsorted part and search through both. If the unsorted part
 | 
	
		
			
				|  |  | +       becomes too large, we sort.}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +      {We have to weigh the cost of sorting the list against searching
 | 
	
		
			
				|  |  | +       the cost of the unsorted part. I use factor of 8 here; if the
 | 
	
		
			
				|  |  | +       number of items is less than 8 times the numer of unsorted items,
 | 
	
		
			
				|  |  | +       we'll sort the list.}
 | 
	
		
			
				|  |  |        with reginfo[u].movelist^ do
 | 
	
		
			
				|  |  | -        for n:=0 to count-1 do
 | 
	
		
			
				|  |  | -	  begin
 | 
	
		
			
				|  |  | -	    if Tmoveins(data[n]).x=u then
 | 
	
		
			
				|  |  | -              supregset_include(moves,Tmoveins(data[n]).y)
 | 
	
		
			
				|  |  | -	    else
 | 
	
		
			
				|  |  | -              supregset_include(moves,Tmoveins(data[n]).x)
 | 
	
		
			
				|  |  | -          end;
 | 
	
		
			
				|  |  | -      with reginfo[v].movelist^ do
 | 
	
		
			
				|  |  | -        for n:=0 to count-1 do
 | 
	
		
			
				|  |  | -	  begin
 | 
	
		
			
				|  |  | -	    if Tmoveins(data[n]).x=v then
 | 
	
		
			
				|  |  | -	      begin
 | 
	
		
			
				|  |  | -	        if supregset_in(moves,Tmoveins(data[n]).y) then
 | 
	
		
			
				|  |  | -        	  add_to_movelist(u,data[n]);
 | 
	
		
			
				|  |  | -              end
 | 
	
		
			
				|  |  | -	    else
 | 
	
		
			
				|  |  | -	      begin
 | 
	
		
			
				|  |  | -	        if supregset_in(moves,Tmoveins(data[n]).x) then
 | 
	
		
			
				|  |  | -        	  add_to_movelist(u,data[n]);
 | 
	
		
			
				|  |  | +        if count<8*(count-sorted_until) then
 | 
	
		
			
				|  |  | +          sort_movelist(reginfo[u].movelist);
 | 
	
		
			
				|  |  | +      for n:=0 to reginfo[v].movelist^.count-1 do
 | 
	
		
			
				|  |  | +        begin
 | 
	
		
			
				|  |  | +          {Binary search the sorted part of the list.}
 | 
	
		
			
				|  |  | +          searched:=reginfo[v].movelist^.data[n];
 | 
	
		
			
				|  |  | +          p:=0;
 | 
	
		
			
				|  |  | +          q:=reginfo[u].movelist^.sorted_until;
 | 
	
		
			
				|  |  | +          i:=0;
 | 
	
		
			
				|  |  | +          if q<>0 then 
 | 
	
		
			
				|  |  | +            repeat
 | 
	
		
			
				|  |  | +              i:=(p+q) shr 1;
 | 
	
		
			
				|  |  | +              if ptrint(searched)>ptrint(reginfo[u].movelist^.data[i]) then
 | 
	
		
			
				|  |  | +                p:=i+1
 | 
	
		
			
				|  |  | +              else
 | 
	
		
			
				|  |  | +                q:=i;
 | 
	
		
			
				|  |  | +            until p=q;
 | 
	
		
			
				|  |  | +          with reginfo[u].movelist^ do
 | 
	
		
			
				|  |  | +            if searched<>data[i] then
 | 
	
		
			
				|  |  | +              begin
 | 
	
		
			
				|  |  | +                {Linear search the unsorted part of the list.}
 | 
	
		
			
				|  |  | +                for i:=sorted_until+1 to count-1 do
 | 
	
		
			
				|  |  | +                  if searched=data[i] then
 | 
	
		
			
				|  |  | +                    goto l1;
 | 
	
		
			
				|  |  | +                {Not found -> add}
 | 
	
		
			
				|  |  | +                add_to_movelist(u,searched);
 | 
	
		
			
				|  |  | +              l1:
 | 
	
		
			
				|  |  |                end;
 | 
	
		
			
				|  |  | -	  end;}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -      {This loop is a performance bottleneck for large procedures and therefore
 | 
	
		
			
				|  |  | -       optimized by hand as much as possible. This is because machine registers
 | 
	
		
			
				|  |  | -       generally collect large movelists (for example around procedure calls data
 | 
	
		
			
				|  |  | -       is moved into machine registers). The loop below is unfortunately quadratic,
 | 
	
		
			
				|  |  | -       and guess what this means when a procedure has collected several thousand
 | 
	
		
			
				|  |  | -       moves.... Test webtbs/tw2242 is a good example to illustrate this.}
 | 
	
		
			
				|  |  | -      vm:=reginfo[v].movelist;
 | 
	
		
			
				|  |  | -      for n:=0 to vm^.count-1 do
 | 
	
		
			
				|  |  | -        with reginfo[u].movelist^ do
 | 
	
		
			
				|  |  | -          begin
 | 
	
		
			
				|  |  | -            for o:=0 to count-1 do
 | 
	
		
			
				|  |  | -              if data[o]=vm^.data[n] then
 | 
	
		
			
				|  |  | -                goto l1; {Continue outer loop.}
 | 
	
		
			
				|  |  | -            add_to_movelist(u,vm^.data[n]);
 | 
	
		
			
				|  |  | -          l1:
 | 
	
		
			
				|  |  | -          end;
 | 
	
		
			
				|  |  | +        end;
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |        enable_moves(v);
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -1080,26 +1112,27 @@ implementation
 | 
	
		
			
				|  |  |          for i:=1 to adj^.length do
 | 
	
		
			
				|  |  |            begin
 | 
	
		
			
				|  |  |              t:=adj^.buf^[i-1];
 | 
	
		
			
				|  |  | -            if not(ri_coalesced in reginfo[t].flags) then
 | 
	
		
			
				|  |  | -              begin
 | 
	
		
			
				|  |  | -                {t has a connection to v. Since we are adding v to u, we
 | 
	
		
			
				|  |  | -                 need to connect t to u. However, beware if t was already
 | 
	
		
			
				|  |  | -                 connected to u...}
 | 
	
		
			
				|  |  | -                if (ibitmap[t,u]) and not (ri_selected in reginfo[t].flags) then
 | 
	
		
			
				|  |  | -                  {... because in that case, we are actually removing an edge
 | 
	
		
			
				|  |  | -                   and the degree of t decreases.}
 | 
	
		
			
				|  |  | -                  decrement_degree(t)
 | 
	
		
			
				|  |  | -                else
 | 
	
		
			
				|  |  | -                  begin
 | 
	
		
			
				|  |  | -                    add_edge(t,u);
 | 
	
		
			
				|  |  | -                    {We have added an edge to t and u. So their degree increases.
 | 
	
		
			
				|  |  | -                     However, v is added to u. That means its neighbours will
 | 
	
		
			
				|  |  | -                     no longer point to v, but to u instead. Therefore, only the
 | 
	
		
			
				|  |  | -                     degree of u increases.}
 | 
	
		
			
				|  |  | -                    if (u>=first_imaginary) and not (ri_selected in reginfo[t].flags) then
 | 
	
		
			
				|  |  | -                      inc(reginfo[u].degree);
 | 
	
		
			
				|  |  | -                  end;
 | 
	
		
			
				|  |  | -              end;
 | 
	
		
			
				|  |  | +            with reginfo[t] do
 | 
	
		
			
				|  |  | +              if not(ri_coalesced in flags) then
 | 
	
		
			
				|  |  | +                begin
 | 
	
		
			
				|  |  | +                  {t has a connection to v. Since we are adding v to u, we
 | 
	
		
			
				|  |  | +                   need to connect t to u. However, beware if t was already
 | 
	
		
			
				|  |  | +                   connected to u...}
 | 
	
		
			
				|  |  | +                  if (ibitmap[t,u]) and not (ri_selected in flags) then
 | 
	
		
			
				|  |  | +                    {... because in that case, we are actually removing an edge
 | 
	
		
			
				|  |  | +                     and the degree of t decreases.}
 | 
	
		
			
				|  |  | +                    decrement_degree(t)
 | 
	
		
			
				|  |  | +                  else
 | 
	
		
			
				|  |  | +                    begin
 | 
	
		
			
				|  |  | +                      add_edge(t,u);
 | 
	
		
			
				|  |  | +                      {We have added an edge to t and u. So their degree increases.
 | 
	
		
			
				|  |  | +                       However, v is added to u. That means its neighbours will
 | 
	
		
			
				|  |  | +                       no longer point to v, but to u instead. Therefore, only the
 | 
	
		
			
				|  |  | +                       degree of u increases.}
 | 
	
		
			
				|  |  | +                      if (u>=first_imaginary) and not (ri_selected in flags) then
 | 
	
		
			
				|  |  | +                        inc(reginfo[u].degree);
 | 
	
		
			
				|  |  | +                    end;
 | 
	
		
			
				|  |  | +                end;
 | 
	
		
			
				|  |  |            end;
 | 
	
		
			
				|  |  |        if (reginfo[u].degree>=usable_registers_cnt) and freezeworklist.delete(u) then
 | 
	
		
			
				|  |  |          spillworklist.add(u);
 | 
	
	
		
			
				|  | @@ -1968,7 +2001,10 @@ implementation
 | 
	
		
			
				|  |  |  end.
 | 
	
		
			
				|  |  |  {
 | 
	
		
			
				|  |  |    $Log$
 | 
	
		
			
				|  |  | -  Revision 1.118  2004-02-07 23:28:34  daniel
 | 
	
		
			
				|  |  | +  Revision 1.119  2004-02-08 14:26:28  daniel
 | 
	
		
			
				|  |  | +    * Register allocator speed boost
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +  Revision 1.118  2004/02/07 23:28:34  daniel
 | 
	
		
			
				|  |  |      * Take advantage of our new with statement optimization
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |    Revision 1.117  2004/02/06 13:34:46  daniel
 |