Browse Source

Allocate fixed arenas within variable OS chunks.

Rika Ichinose 2 months ago
parent
commit
7e0e284fce
2 changed files with 272 additions and 302 deletions
  1. 270 300
      rtl/inc/heap.inc
  2. 2 2
      rtl/inc/heaph.inc

+ 270 - 300
rtl/inc/heap.inc

@@ -265,9 +265,11 @@ type
     class function IndexToSize(sizeIndex: SizeUint): SizeUint; static; inline;
     class function IndexToSize(sizeIndex: SizeUint): SizeUint; static; inline;
 
 
   const
   const
-    OSChunkFixedSizeQuant = 32 * 1024;
     OSChunkVarSizeQuant = 64 * 1024;
     OSChunkVarSizeQuant = 64 * 1024;
-    MaxFixedChunkSize = 256 * 1024;
+    FixedArenaSizeQuant = 4 * 1024;
+    MinFixedArenaSize = 8 * 1024;
+    MaxFixedArenaSize = 64 * 1024;
+    MaxKeptFixedArenas = 4;
 
 
     { Limit on shrinking variable chunks and keeping the tail when splitting the chunk in AllocVar / TryResizeVar. }
     { Limit on shrinking variable chunks and keeping the tail when splitting the chunk in AllocVar / TryResizeVar. }
     MinVarHeaderAndPayload = MaxFixedHeaderAndPayload * 3 div 4;
     MinVarHeaderAndPayload = MaxFixedHeaderAndPayload * 3 div 4;
@@ -277,20 +279,15 @@ type
   const
   const
     SizeIndexBits = 1 + trunc(ln(FixedSizesCount - 1) /  ln(2));
     SizeIndexBits = 1 + trunc(ln(FixedSizesCount - 1) /  ln(2));
     SizeIndexMask = 1 shl SizeIndexBits - 1;
     SizeIndexMask = 1 shl SizeIndexBits - 1;
-    FixedBitPos = {$if SizeIndexBits >= 3} SizeIndexBits {$else} 3 {$endif}; { Variable chunks use 3 low bits for used / first / last. }
+    FixedBitPos = {$if SizeIndexBits >= 4} SizeIndexBits {$else} 4 {$endif}; { Variable chunks use 4 low bits for used / first / last / fixed arena. }
     FixedFlag = 1 shl FixedBitPos;
     FixedFlag = 1 shl FixedBitPos;
-    ChunkOffsetShift = {$if FixedBitPos + 1 >= 5} FixedBitPos + 1 {$else} 5 {$endif}; { VarSizeQuant must be at least 2^5 to fit 3 64-bit pointers (RbTree.Node). }
-
-    { Not really used; MaxFixedChunkSize limit on fixed OS chunks assumed to be strictly enforced and (much!) more restricting than MaxChunkOffset.
-      MaxFixedChunkSize = 256 Kb.
-      MaxChunkOffset ~ 2^(32 - 6) ~ 67 Mb.
-      Indices could be stored instead, but offsets avoid multiplications. }
-    MaxChunkOffset = High(uint32) shr ChunkOffsetShift;
+    FixedArenaOffsetShift = {$if FixedBitPos + 1 >= 5} FixedBitPos + 1 {$else} 5 {$endif}; { VarSizeQuant must be at least 2^5 to fit 3 64-bit pointers (RbTree.Node). }
 
 
     UsedFlag = 1 shl 0;
     UsedFlag = 1 shl 0;
     FirstFlag = 1 shl 1;
     FirstFlag = 1 shl 1;
     LastFlag = 1 shl 2;
     LastFlag = 1 shl 2;
-    VarSizeQuant = 1 shl ChunkOffsetShift; {$if VarSizeQuant < Alignment} {$error Assumed to be >= Alignment.} {$endif} {$if VarSizeQuant < 3 * sizeof(pointer)} {$error Must fit RbTree.Node.} {$endif}
+    FixedArenaFlag = 1 shl 3;
+    VarSizeQuant = 1 shl FixedArenaOffsetShift; {$if VarSizeQuant < Alignment} {$error Assumed to be >= Alignment.} {$endif} {$if VarSizeQuant < 3 * sizeof(pointer)} {$error Must fit RbTree.Node.} {$endif}
     VarSizeMask = SizeUint(-VarSizeQuant);
     VarSizeMask = SizeUint(-VarSizeQuant);
     HugeHeader = 0; { Special header value for huge chunks. FixedFlag must be 0, and the value must be impossible for a variable chunk. 0 turns out to be suitable. :) }
     HugeHeader = 0; { Special header value for huge chunks. FixedFlag must be 0, and the value must be impossible for a variable chunk. 0 turns out to be suitable. :) }
 
 
@@ -300,13 +297,13 @@ type
       Fixed chunk header, assuming SizeIndexBits = 4:
       Fixed chunk header, assuming SizeIndexBits = 4:
       h[0:3] = size index (= h and SizeIndexMask)
       h[0:3] = size index (= h and SizeIndexMask)
       h[4] = 1 (h and FixedFlag <> 0)
       h[4] = 1 (h and FixedFlag <> 0)
-      h[5:31] — offset in the OS chunk (= h shr ChunkOffsetShift)
+      h[5:31] — offset in the FixedArena (= h shr FixedArenaOffsetShift)
 
 
       Variable chunk header, assuming SizeIndexBits = 4:
       Variable chunk header, assuming SizeIndexBits = 4:
       h[0] = used flag (h and UsedFlag <> 0)
       h[0] = used flag (h and UsedFlag <> 0)
       h[1] = first flag (h and FirstFlag <> 0)
       h[1] = first flag (h and FirstFlag <> 0)
       h[2] = last flag (h and LastFlag <> 0)
       h[2] = last flag (h and LastFlag <> 0)
-      h[3] = unused
+      h[3] = fixed arena flag (h and FixedArenaFlag <> 0)
       h[4] = 0 (h and FixedFlag = 0)
       h[4] = 0 (h and FixedFlag = 0)
       h[5:31] = size, rounded up to 32 (VarSizeQuant), shr 5; in other words, size = h and VarSizeMask.
       h[5:31] = size, rounded up to 32 (VarSizeQuant), shr 5; in other words, size = h and VarSizeMask.
 
 
@@ -334,16 +331,7 @@ type
 
 
     pOSChunk = ^OSChunk;
     pOSChunk = ^OSChunk;
     OSChunk = object(OSChunkBase) { Common header for all OS chunks. }
     OSChunk = object(OSChunkBase) { Common header for all OS chunks. }
-    {$ifdef FPC_HAS_FEATURE_THREADING}
-      threadState: pThreadState; { Protected with gs.lock. Nil if orphaned. }
-    {$endif}
       prev, next: pointer; { pOSChunk, but used for different subtypes. }
       prev, next: pointer; { pOSChunk, but used for different subtypes. }
-
-      { For what purpose this chunk was used the last time. -1 — var, N ≥ 0 — fixed size N. Allows for a small optimization when reusing fixed chunks. }
-      sizeIndex: SizeInt;
-
-      { Remove from list src and push to list dest. }
-      procedure MoveTo(var src, dest: pOSChunk);
     end;
     end;
 
 
     pFreeOSChunk = ^FreeOSChunk;
     pFreeOSChunk = ^FreeOSChunk;
@@ -363,13 +351,15 @@ type
     {$endif}
     {$endif}
     end;
     end;
 
 
-    pFixedOSChunk = ^FixedOSChunk;
-    FixedOSChunk = object(OSChunk)
-      { Data starts at FixedOSChunkDataOffset and spans for “maxSize” (virtual value, does not exist directly) bytes, of which:
+    pFixedArena = ^FixedArena;
+    FixedArena = record
+      { Allocated with AllocVar(isArena := true), so has VarHeader to the left.
+
+        Data starts at FixedArenaDataOffset and spans for “maxSize” (virtual value, does not exist directly) bytes, of which:
         — first formattedSize are either allocated (“used”; counted in usedSize) or in the freelist (firstFreeChunk; size = formattedSize - usedSize),
         — first formattedSize are either allocated (“used”; counted in usedSize) or in the freelist (firstFreeChunk; size = formattedSize - usedSize),
         — the rest “maxSize” - formattedSize are yet unallocated space.
         — the rest “maxSize” - formattedSize are yet unallocated space.
 
 
-        This design, together with tracking free chunks per FixedOSChunk rather than per fixed size, trivializes reusing the chunk.
+        This design, together with tracking free chunks per FixedArena rather than per fixed size, trivializes reusing the fixed arenas.
         Chopping all available space at once would get rid of the “unallocated space” entity, but is a lot of potentially wasted work:
         Chopping all available space at once would get rid of the “unallocated space” entity, but is a lot of potentially wasted work:
         https://gitlab.com/freepascal.org/fpc/source/-/issues/40447.
         https://gitlab.com/freepascal.org/fpc/source/-/issues/40447.
 
 
@@ -380,6 +370,8 @@ type
 
 
       firstFreeChunk: pFreeChunk;
       firstFreeChunk: pFreeChunk;
       usedSize, formattedSize, fullThreshold: uint32;
       usedSize, formattedSize, fullThreshold: uint32;
+      sizeIndex: uint32; { For what size it was used the last time. Allows for a small optimization when reusing fixed arenas. }
+      prev, next: pFixedArena;
     end;
     end;
 
 
     pVarOSChunk = ^VarOSChunk;
     pVarOSChunk = ^VarOSChunk;
@@ -389,7 +381,7 @@ type
     pVarHeader = ^VarHeader;
     pVarHeader = ^VarHeader;
     VarHeader = record
     VarHeader = record
     {$ifdef FPC_HAS_FEATURE_THREADING}
     {$ifdef FPC_HAS_FEATURE_THREADING}
-      threadState: pThreadState;
+      threadState: pThreadState; { Protected with gs.lock. Nil if orphaned. }
     {$endif}
     {$endif}
       prevSize: uint32; { Always 0 for the first chunk. }
       prevSize: uint32; { Always 0 for the first chunk. }
       { Assumed to indeed match chunk’s CommonHeader, i.e. that there is no padding after this field.
       { Assumed to indeed match chunk’s CommonHeader, i.e. that there is no padding after this field.
@@ -409,7 +401,8 @@ type
     end;
     end;
 
 
     ThreadState = object
     ThreadState = object
-      fullOS: pFixedOSChunk; { Completely filled fixed OS chunks. }
+      emptyArenas: pFixedArena; { Empty fixed arenas to be reused instead of slower AllocVar. Singly linked list, “prev”s are garbage. }
+      nEmptyArenas: SizeUint; { # of items in emptyArenas. }
       freeOS: FreeOSChunkList; { Completely empty OS chunks. }
       freeOS: FreeOSChunkList; { Completely empty OS chunks. }
     {$ifdef FPC_HAS_FEATURE_THREADING}
     {$ifdef FPC_HAS_FEATURE_THREADING}
       toFree: pFreeChunk; { Free requests from other threads, atomic. }
       toFree: pFreeChunk; { Free requests from other threads, atomic. }
@@ -420,25 +413,26 @@ type
       varOS: pVarOSChunk;
       varOS: pVarOSChunk;
       varFree: RbTree;
       varFree: RbTree;
 
 
-      { OS chunks with at least 1 free chunk (including unformatted space), but not completely empty.
-        OS chunks that become completely empty are moved to freeOS, completely full are moved to fullOS. }
-      fixedPartialOS: array[0 .. FixedSizesCount - 1] of pFixedOSChunk;
+      { Fixed arenas with at least 1 free chunk (including unformatted space), but not completely empty.
+        Fixed arenas that become completely empty are moved to emptyArenas, completely full are... not present in any list. }
+      partialArenas: array[0 .. FixedSizesCount - 1] of pFixedArena;
 
 
-      { Only to calculate preferable new fixed OS chunk sizes...
-        (Updated infrequently, as opposed to possible “fixedUsed”. When a new fixed OS chunk is required, all existing chunks of its size are full.) }
-      allocatedByFullFixed: array[0 .. FixedSizesCount - 1] of SizeUint;
+      { Only to calculate preferable new fixed arena sizes...
+        (Updated infrequently, as opposed to possible “usedPerArena”. When a new arena is required, all existing arenas of its size are full.) }
+      allocatedByFullArenas: array[0 .. FixedSizesCount - 1] of SizeUint;
 
 
     {$ifdef DEBUG_HEAP_INC}
     {$ifdef DEBUG_HEAP_INC}
       procedure Dump(var f: text);
       procedure Dump(var f: text);
     {$endif}
     {$endif}
 
 
+      function ChooseFixedArenaSize(sizeIndex: SizeUint): SizeUint;
       function AllocFixed(size: SizeUint): pointer;
       function AllocFixed(size: SizeUint): pointer;
       function FreeFixed(p: pointer): SizeUint;
       function FreeFixed(p: pointer): SizeUint;
 
 
-      function GetOSChunk(minSize, maxSize: SizeUint; sizeIndex: SizeInt): pOSChunk;
-      function AllocateOSChunk(minSize: SizeUint; sizeIndex: SizeInt): pOSChunk;
+      function GetOSChunk(minSize, maxSize: SizeUint): pOSChunk;
+      function AllocateOSChunk(minSize: SizeUint): pOSChunk;
 
 
-      function AllocVar(size: SizeUint): pointer;
+      function AllocVar(size: SizeUint; isArena: boolean): pointer;
       function FreeVar(p: pointer): SizeUint;
       function FreeVar(p: pointer): SizeUint;
       function TryResizeVar(p: pointer; size: SizeUint): pointer;
       function TryResizeVar(p: pointer; size: SizeUint): pointer;
 
 
@@ -452,12 +446,10 @@ type
       procedure FlushToFree;
       procedure FlushToFree;
 
 
       procedure Orphan; { Must be performed under gs.lock. }
       procedure Orphan; { Must be performed under gs.lock. }
-      class procedure Orphan(list: pFixedOSChunk); static;
-      procedure Adopt(osChunk: pFixedOSChunk); { Must be performed under gs.lock. }
+      procedure AdoptArena(arena: pFixedArena);
       procedure AdoptVarOwner(p: pointer); { Adopts the OS chunk that contains p. Must be performed under gs.lock. }
       procedure AdoptVarOwner(p: pointer); { Adopts the OS chunk that contains p. Must be performed under gs.lock. }
-
-      class function ChangeThreadStates(list: pOSChunk; ts: pThreadState): pOSChunk; static; { Returns the last item of the list. }
       class procedure ChangeThreadState(vOs: pVarOSChunk; ts: pThreadState); static;
       class procedure ChangeThreadState(vOs: pVarOSChunk; ts: pThreadState); static;
+      procedure FreeEmptyArenas(untilCount: SizeUint);
 
 
     {$ifndef FPC_SECTION_THREADVARS}
     {$ifndef FPC_SECTION_THREADVARS}
       procedure FixupSelfPtr;
       procedure FixupSelfPtr;
@@ -476,7 +468,6 @@ type
       lockUse: int32;
       lockUse: int32;
 
 
       { Data from dead threads (“orphaned”), protected by gs.lock. }
       { Data from dead threads (“orphaned”), protected by gs.lock. }
-      fixedOS: pFixedOSChunk;
       freeOS: FreeOSChunkList;
       freeOS: FreeOSChunkList;
       varOS: pVarOSChunk;
       varOS: pVarOSChunk;
     {$endif FPC_HAS_FEATURE_THREADING}
     {$endif FPC_HAS_FEATURE_THREADING}
@@ -494,7 +485,7 @@ type
   const
   const
     CommonHeaderSize = sizeof(CommonHeader);
     CommonHeaderSize = sizeof(CommonHeader);
   {$if MinFixedHeaderAndPayload < CommonHeaderSize + sizeof(FreeChunk)} {$error MinFixedHeaderAndPayload does not fit CommonHeader + FreeChunk.} {$endif}
   {$if MinFixedHeaderAndPayload < CommonHeaderSize + sizeof(FreeChunk)} {$error MinFixedHeaderAndPayload does not fit CommonHeader + FreeChunk.} {$endif}
-    FixedOSChunkDataOffset = (sizeof(FixedOSChunk) + CommonHeaderSize + Alignment - 1) and -Alignment - CommonHeaderSize;
+    FixedArenaDataOffset = (sizeof(FixedArena) + CommonHeaderSize + Alignment - 1) and -Alignment - CommonHeaderSize;
     VarHeaderSize = sizeof(VarHeader);
     VarHeaderSize = sizeof(VarHeader);
     VarOSChunkDataOffset = (sizeof(VarOSChunk) + VarHeaderSize + Alignment - 1) and -Alignment - VarHeaderSize;
     VarOSChunkDataOffset = (sizeof(VarOSChunk) + VarHeaderSize + Alignment - 1) and -Alignment - VarHeaderSize;
     HugeChunkDataOffset = (sizeof(HugeChunk) + CommonHeaderSize + Alignment - 1) and -Alignment - CommonHeaderSize;
     HugeChunkDataOffset = (sizeof(HugeChunk) + CommonHeaderSize + Alignment - 1) and -Alignment - CommonHeaderSize;
@@ -508,7 +499,7 @@ type
 
 
   class function RbTree.Control.KeyCompare(key: pointer; b: pNode): PtrInt;
   class function RbTree.Control.KeyCompare(key: pointer; b: pNode): PtrInt;
   begin
   begin
-    result := PtrInt(key) - PtrInt(HeapInc.pFreeVarChunk(pointer(b) - HeapInc.FreeVarChunkRbnOffset)^.size);
+    result := PtrInt(PtrUint(key)) - PtrInt(HeapInc.pFreeVarChunk(pointer(b) - HeapInc.FreeVarChunkRbnOffset)^.size);
   end;
   end;
 
 
   procedure RbTree.Add(n: pNode);
   procedure RbTree.Add(n: pNode);
@@ -833,26 +824,6 @@ type
     result := FixedSizes[sizeIndex];
     result := FixedSizes[sizeIndex];
   end;
   end;
 
 
-  procedure HeapInc.OSChunk.MoveTo(var src, dest: pOSChunk);
-  var
-    osNext, osPrev: pOSChunk;
-  begin
-    osPrev := prev;
-    osNext := next;
-    if Assigned(osPrev) then
-      osPrev^.next := osNext
-    else
-      src := osNext;
-    if Assigned(osNext) then
-      osNext^.prev := osPrev;
-    prev := nil;
-    osNext := dest;
-    next := osNext;
-    if Assigned(osNext) then
-      osNext^.prev := @self;
-    dest := @self;
-  end;
-
   function HeapInc.FreeOSChunkList.Get(minSize, maxSize: SizeUint): pOSChunk;
   function HeapInc.FreeOSChunkList.Get(minSize, maxSize: SizeUint): pOSChunk;
   var
   var
     prev, next: pFreeOSChunk;
     prev, next: pFreeOSChunk;
@@ -915,7 +886,7 @@ type
   procedure HeapInc.ThreadState.Dump(var f: text);
   procedure HeapInc.ThreadState.Dump(var f: text);
   var
   var
     i: SizeInt;
     i: SizeInt;
-    fix: pFixedOSChunk;
+    fix: pFixedArena;
     fr: pFreeOSChunk;
     fr: pFreeOSChunk;
   {$ifdef FPC_HAS_FEATURE_THREADING}
   {$ifdef FPC_HAS_FEATURE_THREADING}
     tf: pFreeChunk;
     tf: pFreeChunk;
@@ -923,35 +894,52 @@ type
     vfrbn: RbTree.pNode;
     vfrbn: RbTree.pNode;
     vOs: pVarOSChunk;
     vOs: pVarOSChunk;
     p: pointer;
     p: pointer;
-  begin
-    writeln(f, 'used = ', used, ', allocated = ', allocated, ', hugeUsed = ', gs.hugeUsed, ', maxUsed = ', maxUsed, ', maxAllocated = ', maxAllocated);
-    fix := fullOS;
-    if Assigned(fix) then
+    needLE, anything: boolean;
+
+    procedure MaybeLE;
     begin
     begin
-      writeln(f);
-      repeat
-        writeln(f, 'Full fixed: size = ', fix^.size, ', usedSize = ', fix^.usedSize, ', formattedSize = ', fix^.formattedSize, ', fullThreshold = ', fix^.fullThreshold);
-        fix := fix^.next;
-      until not Assigned(fix);
+      if needLE then
+        writeln(f);
+      needLE := false;
     end;
     end;
+
+  begin
+    writeln(f, 'used = ', used, ', allocated = ', allocated, ', hugeUsed = ', gs.hugeUsed, ', maxUsed = ', maxUsed, ', maxAllocated = ', maxAllocated);
+    needLE := true;
+    anything := false;
     for i := 0 to FixedSizesCount - 1 do
     for i := 0 to FixedSizesCount - 1 do
-      if Assigned(fixedPartialOS[i]) then
+    begin
+      if not Assigned(partialArenas[i]) and (allocatedByFullArenas[i] = 0) then
+        continue;
+      MaybeLE;
+      anything := true;
+      write(f, 'Size #', i, ' (', IndexToSize(i), '):');
+      if allocatedByFullArenas[i] <> 0 then
+        write(f, ' allocatedByFullArenas = ', allocatedByFullArenas[i]);
+      if Assigned(partialArenas[i]) then
       begin
       begin
-        write(f, 'Size #', i, ' (', IndexToSize(i), ')');
-        if allocatedByFullFixed[i] <> 0 then
-          write(f, ': allocatedByFullFixed = ', allocatedByFullFixed[i]);
         writeln(f);
         writeln(f);
-        fix := fixedPartialOS[i];
-        while Assigned(fix) do
-        begin
-          writeln(f, 'size = ', fix^.size, ', usedSize = ', fix^.usedSize, ', formattedSize = ', fix^.formattedSize, ', fullThreshold = ', fix^.fullThreshold);
+        fix := partialArenas[i];
+        repeat
+          writeln(f, 'arena size = ', pVarHeader(fix)[-1].ch.h and VarSizeMask - VarHeaderSize - FixedArenaDataOffset, ', usedSize = ', fix^.usedSize, ', formattedSize = ', fix^.formattedSize, ', fullThreshold = ', fix^.fullThreshold);
           fix := fix^.next;
           fix := fix^.next;
-        end;
-      end;
+        until not Assigned(fix);
+      end
+      else if allocatedByFullArenas[i] <> 0 then
+        writeln(f);
+    end;
+    needLE := needLE or anything;
+    if nEmptyArenas <> 0 then
+    begin
+      MaybeLE;
+      writeln(f, 'nEmptyArenas = ', nEmptyArenas);
+      needLE := true;
+    end;
     vOs := varOS;
     vOs := varOS;
     while Assigned(vOs) do
     while Assigned(vOs) do
     begin
     begin
-      writeln(f, LineEnding, 'Var OS chunk, size ', vOs^.size);
+      MaybeLE;
+      writeln(f, 'Var OS chunk, size ', vOs^.size);
       p := pointer(vOs) + (VarOSChunkDataOffset + VarHeaderSize);
       p := pointer(vOs) + (VarOSChunkDataOffset + VarHeaderSize);
       repeat
       repeat
         write(f, HexStr(p), ': ',
         write(f, HexStr(p), ': ',
@@ -964,26 +952,31 @@ type
           write(f, ', first');
           write(f, ', first');
         if pVarHeader(p - VarHeaderSize)^.ch.h and LastFlag <> 0 then
         if pVarHeader(p - VarHeaderSize)^.ch.h and LastFlag <> 0 then
           write(f, ', last');
           write(f, ', last');
+        if pVarHeader(p - VarHeaderSize)^.ch.h and FixedArenaFlag <> 0 then
+          write(f, ', fixed arena');
         writeln(f);
         writeln(f);
         if pVarHeader(p - VarHeaderSize)^.ch.h and LastFlag <> 0 then
         if pVarHeader(p - VarHeaderSize)^.ch.h and LastFlag <> 0 then
           break;
           break;
         p := p + pVarHeader(p - VarHeaderSize)^.ch.h and uint32(VarSizeMask);
         p := p + pVarHeader(p - VarHeaderSize)^.ch.h and uint32(VarSizeMask);
       until false;
       until false;
+      needLE := true;
       vOs := vOs^.next;
       vOs := vOs^.next;
     end;
     end;
     fr := freeOS.first;
     fr := freeOS.first;
     if Assigned(fr) then
     if Assigned(fr) then
     begin
     begin
-      writeln(f);
+      MaybeLE;
       repeat
       repeat
         writeln(f, 'Free OS: ', HexStr(fr), ', size = ', fr^.size);
         writeln(f, 'Free OS: ', HexStr(fr), ', size = ', fr^.size);
         fr := fr^.next;
         fr := fr^.next;
       until not Assigned(fr);
       until not Assigned(fr);
+      needLE := true;
     end;
     end;
     vfrbn := varFree.First;
     vfrbn := varFree.First;
     if Assigned(vfrbn) then
     if Assigned(vfrbn) then
     begin
     begin
-      write(f, LineEnding, 'Var free:');
+      MaybeLE;
+      write(f, 'Var free:');
       repeat
       repeat
         write(f, ' ', pFreeVarChunk(pointer(vfrbn) - FreeVarChunkRbnOffset)^.size);
         write(f, ' ', pFreeVarChunk(pointer(vfrbn) - FreeVarChunkRbnOffset)^.size);
         vfrbn := varFree.Next(vfrbn);
         vfrbn := varFree.Next(vfrbn);
@@ -994,7 +987,8 @@ type
     tf := toFree;
     tf := toFree;
     if Assigned(tf) then
     if Assigned(tf) then
     begin
     begin
-      write(f, LineEnding, 'To-free:');
+      MaybeLE;
+      write(f, 'To-free:');
       repeat
       repeat
         if pCommonHeader(pointer(tf) - CommonHeaderSize)^.h and FixedFlag <> 0 then
         if pCommonHeader(pointer(tf) - CommonHeaderSize)^.h and FixedFlag <> 0 then
           write(f, ' f ', CommonHeaderSize + SysMemSize(tf))
           write(f, ' f ', CommonHeaderSize + SysMemSize(tf))
@@ -1008,46 +1002,65 @@ type
   end;
   end;
 {$endif DEBUG_HEAP_INC}
 {$endif DEBUG_HEAP_INC}
 
 
+  function HeapInc.ThreadState.ChooseFixedArenaSize(sizeIndex: SizeUint): SizeUint;
+  begin
+    result := (allocatedByFullArenas[sizeIndex] div 8 + (FixedArenaSizeQuant - 1)) and SizeUint(-FixedArenaSizeQuant); { 12.5% of memory allocated by the size. }
+    if result < MinFixedArenaSize then
+      result := MinFixedArenaSize;
+    if result > MaxFixedArenaSize then
+      result := MaxFixedArenaSize;
+    dec(result, VarHeaderSize + FixedArenaDataOffset); { Prettier fit into OS chunks. }
+  end;
+
   function HeapInc.ThreadState.AllocFixed(size: SizeUint): pointer;
   function HeapInc.ThreadState.AllocFixed(size: SizeUint): pointer;
   var
   var
     sizeIndex, statv: SizeUint;
     sizeIndex, statv: SizeUint;
-    osChunk, osNext: pFixedOSChunk;
+    arena, nextArena: pFixedArena;
   begin
   begin
     sizeIndex := SizeMinus1ToIndex(size + (CommonHeaderSize - 1));
     sizeIndex := SizeMinus1ToIndex(size + (CommonHeaderSize - 1));
 
 
-    osChunk := fixedPartialOS[sizeIndex];
-    if not Assigned(osChunk) then
+    arena := partialArenas[sizeIndex];
+    if not Assigned(arena) then
     begin
     begin
     {$ifdef FPC_HAS_FEATURE_THREADING}
     {$ifdef FPC_HAS_FEATURE_THREADING}
       if Assigned(toFree) then
       if Assigned(toFree) then
       begin
       begin
         FlushToFree;
         FlushToFree;
-        osChunk := fixedPartialOS[sizeIndex];
+        arena := partialArenas[sizeIndex];
       end;
       end;
-      if not Assigned(osChunk) then
+      if not Assigned(arena) then
     {$endif FPC_HAS_FEATURE_THREADING}
     {$endif FPC_HAS_FEATURE_THREADING}
       begin
       begin
-        osChunk := pFixedOSChunk(GetOSChunk(FixedOSChunkDataOffset + MaxFixedHeaderAndPayload, MaxFixedChunkSize, sizeIndex));
-        if not Assigned(osChunk) then
-          exit(nil);
-        if SizeUint(osChunk^.sizeIndex) = sizeIndex then
+        arena := emptyArenas;
+        if Assigned(arena) then
+        begin
+          emptyArenas := arena^.next;
+          dec(nEmptyArenas);
+        end else
+        begin
+          arena := AllocVar(ChooseFixedArenaSize(sizeIndex), true);
+          if not Assigned(arena) then
+            exit(nil);
+          arena^.sizeIndex := uint32(-1);
+        end;
+        if arena^.sizeIndex = sizeIndex then
           { Lucky! Just don’t reset the chunk and use its old freelist. }
           { Lucky! Just don’t reset the chunk and use its old freelist. }
         else
         else
         begin
         begin
-          osChunk^.sizeIndex := sizeIndex;
-          osChunk^.firstFreeChunk := nil;
-          osChunk^.usedSize := 0;
-          osChunk^.formattedSize := 0;
-          osChunk^.fullThreshold := osChunk^.size - IndexToSize(sizeIndex) - (FixedOSChunkDataOffset - 1); { available OS chunk space - chunk size + 1. }
+          arena^.sizeIndex := sizeIndex;
+          arena^.firstFreeChunk := nil;
+          arena^.usedSize := 0;
+          arena^.formattedSize := 0;
+          arena^.fullThreshold := pVarHeader(arena)[-1].ch.h and VarSizeMask - IndexToSize(sizeIndex) - (VarHeaderSize + FixedArenaDataOffset - 1); { available space - chunk size + 1. }
         end;
         end;
 
 
-        { Add osChunk to fixedPartialOS[sizeIndex]. }
-        osNext := fixedPartialOS[sizeIndex];
-        osChunk^.prev := nil;
-        osChunk^.next := osNext;
-        if Assigned(osNext) then
-          osNext^.prev := osChunk;
-        fixedPartialOS[sizeIndex] := osChunk;
+        { Add arena to partialArenas[sizeIndex]. }
+        nextArena := partialArenas[sizeIndex];
+        arena^.prev := nil;
+        arena^.next := nextArena;
+        if Assigned(nextArena) then
+          nextArena^.prev := arena;
+        partialArenas[sizeIndex] := arena;
       end;
       end;
     end;
     end;
 
 
@@ -1058,119 +1071,97 @@ type
     if statv > maxUsed then
     if statv > maxUsed then
       maxUsed := statv;
       maxUsed := statv;
 
 
-    { osChunk from the fixedPartialOS list has either free chunk or free unformatted space for a new chunk. }
-    result := osChunk^.firstFreeChunk;
+    { arena from partialArenas has either free chunk or free unformatted space for a new chunk. }
+    result := arena^.firstFreeChunk;
     if Assigned(result) then
     if Assigned(result) then
-      osChunk^.firstFreeChunk := pFreeChunk(result)^.next
+      arena^.firstFreeChunk := pFreeChunk(result)^.next
     else
     else
     begin
     begin
-      result := pointer(osChunk) + (FixedOSChunkDataOffset + CommonHeaderSize) + osChunk^.formattedSize;
-      pCommonHeader(result - CommonHeadersize)^.h := sizeIndex + osChunk^.formattedSize shl ChunkOffsetShift +
-        (FixedFlag + (FixedOSChunkDataOffset + CommonHeaderSize) shl ChunkOffsetShift); { ← const }
-      inc(osChunk^.formattedSize, size);
+      result := pointer(arena) + (FixedArenaDataOffset + CommonHeaderSize) + arena^.formattedSize;
+      pCommonHeader(result - CommonHeadersize)^.h := sizeIndex + arena^.formattedSize shl FixedArenaOffsetShift +
+        (FixedFlag + (FixedArenaDataOffset + CommonHeaderSize) shl FixedArenaOffsetShift); { ← const }
+      inc(arena^.formattedSize, size);
     end;
     end;
-    inc(osChunk^.usedSize, size);
-    if osChunk^.usedSize >= osChunk^.fullThreshold then
+    inc(arena^.usedSize, size);
+    if arena^.usedSize >= arena^.fullThreshold then
     begin
     begin
-      inc(allocatedByFullFixed[sizeIndex], osChunk^.size);
-      { Remove osChunk from fixedPartialOS[sizeIndex], add to fullOS. }
-      osNext := osChunk^.next;
-      fixedPartialOS[sizeIndex] := osNext;
-      if Assigned(osNext) then
-        osNext^.prev := nil;
-      osNext := fullOS;
-      osChunk^.next := osNext; { osChunk^.prev is already nil because osChunk was the first item of fixedPartialOS[sizeIndex]. }
-      if Assigned(osNext) then
-        osNext^.prev := osChunk;
-      fullOS := osChunk;
+      inc(allocatedByFullArenas[sizeIndex], pVarHeader(arena)[-1].ch.h and VarSizeMask);
+      { Remove arena from partialArenas[sizeIndex]. (It was first.) }
+      nextArena := arena^.next;
+      partialArenas[sizeIndex] := nextArena;
+      if Assigned(nextArena) then
+        nextArena^.prev := nil;
     end;
     end;
   end;
   end;
 
 
   function HeapInc.ThreadState.FreeFixed(p: pointer): SizeUint;
   function HeapInc.ThreadState.FreeFixed(p: pointer): SizeUint;
   var
   var
     sizeIndex, usedSize: SizeUint;
     sizeIndex, usedSize: SizeUint;
-    osChunk, osPrev, osNext: pFixedOSChunk;
-    freeOsNext: pFreeOSChunk;
+    arena, prevArena, nextArena: pFixedArena;
   begin
   begin
-    osChunk := p - pCommonHeader(p - CommonHeaderSize)^.h shr ChunkOffsetShift;
+    arena := p - pCommonHeader(p - CommonHeaderSize)^.h shr FixedArenaOffsetShift;
 
 
   {$ifdef FPC_HAS_FEATURE_THREADING}
   {$ifdef FPC_HAS_FEATURE_THREADING}
-    { This can be checked without blocking; osChunk^.threadState can only change from one value not equal to @self to another value not equal to @self. }
-    if osChunk^.threadState <> @self then
+    { This can be checked without blocking; <arena>.threadState can only change from one value not equal to @self to another value not equal to @self. }
+    if pVarHeader(arena)[-1].threadState <> @self then
     begin
     begin
       EnterCriticalSection(gs.lock);
       EnterCriticalSection(gs.lock);
-      if Assigned(osChunk^.threadState) then
+      if Assigned(pVarHeader(arena)[-1].threadState) then
       begin
       begin
         { Despite atomic Push lock must be held as otherwise target thread might end and destroy its threadState.
         { Despite atomic Push lock must be held as otherwise target thread might end and destroy its threadState.
           However, target thread won’t block to free p, so PushToFree instantly invalidates p. }
           However, target thread won’t block to free p, so PushToFree instantly invalidates p. }
         result := IndexToSize(pCommonHeader(p - CommonHeaderSize)^.h and SizeIndexMask) - CommonHeaderSize;
         result := IndexToSize(pCommonHeader(p - CommonHeaderSize)^.h and SizeIndexMask) - CommonHeaderSize;
-        osChunk^.threadState^.PushToFree(p);
+        pVarHeader(arena)[-1].threadState^.PushToFree(p);
         LeaveCriticalSection(gs.lock);
         LeaveCriticalSection(gs.lock);
         exit;
         exit;
       end;
       end;
-      Adopt(osChunk); { ...And continue! }
+      AdoptVarOwner(arena); { ...And continue! }
       LeaveCriticalSection(gs.lock);
       LeaveCriticalSection(gs.lock);
     end;
     end;
   {$endif FPC_HAS_FEATURE_THREADING}
   {$endif FPC_HAS_FEATURE_THREADING}
 
 
-    pFreeChunk(p)^.next := osChunk^.firstFreeChunk;
-    osChunk^.firstFreeChunk := p;
+    pFreeChunk(p)^.next := arena^.firstFreeChunk;
+    arena^.firstFreeChunk := p;
     sizeIndex := pCommonHeader(p - CommonHeaderSize)^.h and SizeIndexMask;
     sizeIndex := pCommonHeader(p - CommonHeaderSize)^.h and SizeIndexMask;
     result := IndexToSize(sizeIndex);
     result := IndexToSize(sizeIndex);
     dec(used, result);
     dec(used, result);
-    usedSize := osChunk^.usedSize;
-    if usedSize >= osChunk^.fullThreshold then
+    usedSize := arena^.usedSize;
+    if usedSize >= arena^.fullThreshold then
     begin
     begin
-      dec(allocatedByFullFixed[sizeIndex], osChunk^.size);
-
-      { Remove osChunk from fullOS, add to fixedPartialOS[sizeIndex]. }
-      osPrev := osChunk^.prev;
-      osNext := osChunk^.next;
-      if Assigned(osPrev) then
-        osPrev^.next := osNext
-      else
-        fullOS := osNext;
-      if Assigned(osNext) then
-        osNext^.prev := osPrev;
-      osChunk^.prev := nil;
-      osNext := fixedPartialOS[sizeIndex];
-      osChunk^.next := osNext;
-      if Assigned(osNext) then
-        osNext^.prev := osChunk;
-      fixedPartialOS[sizeIndex] := osChunk;
+      dec(allocatedByFullArenas[sizeIndex], pVarHeader(arena)[-1].ch.h and VarSizeMask);
+      { Add arena to partialArenas[sizeIndex]. }
+      nextArena := partialArenas[sizeIndex];
+      arena^.next := nextArena;
+      if Assigned(nextArena) then
+        nextArena^.prev := arena;
+      partialArenas[sizeIndex] := arena;
     end;
     end;
     dec(usedSize, result);
     dec(usedSize, result);
-    osChunk^.usedSize := usedSize;
+    arena^.usedSize := usedSize;
     if usedSize = 0 then
     if usedSize = 0 then
     begin
     begin
-      { Remove osChunk from fixedPartialOS[sizeIndex], add to freeOS. }
-      osPrev := osChunk^.prev;
-      osNext := osChunk^.next;
-      if Assigned(osPrev) then
-        osPrev^.next := osNext
+      { Remove arena from partialArenas[sizeIndex], add to emptyArenas (maybe). }
+      prevArena := arena^.prev;
+      nextArena := arena^.next;
+      if Assigned(prevArena) then
+        prevArena^.next := nextArena
       else
       else
-        fixedPartialOS[sizeIndex] := osNext;
-      if Assigned(osNext) then
-        osNext^.prev := osPrev;
+        partialArenas[sizeIndex] := nextArena;
+      if Assigned(nextArena) then
+        nextArena^.prev := prevArena;
 
 
-      freeOsNext := freeOS.first;
-      pFreeOSChunk(osChunk)^.prev := nil;
-      pFreeOSChunk(osChunk)^.next := freeOsNext;
-      if Assigned(freeOsNext) then
-        freeOsNext^.prev := osChunk
-      else
-        freeOS.last := pFreeOSChunk(osChunk);
-      freeOS.first := pFreeOSChunk(osChunk);
-    {$ifdef HAS_SYSOSFREE}
-      inc(freeOS.n);
-      if freeOS.n > MaxKeptOSChunks then
-        dec(allocated, freeOS.FreeOne);
-    {$endif}
+      if nEmptyArenas < MaxKeptFixedArenas then
+      begin
+        arena^.next := emptyArenas;
+        emptyArenas := arena;
+        inc(nEmptyArenas);
+      end else
+        FreeVar(arena);
     end;
     end;
     dec(result, CommonHeaderSize);
     dec(result, CommonHeaderSize);
   end;
   end;
 
 
-  function HeapInc.ThreadState.GetOSChunk(minSize, maxSize: SizeUint; sizeIndex: SizeInt): pOSChunk;
+  function HeapInc.ThreadState.GetOSChunk(minSize, maxSize: SizeUint): pOSChunk;
   var
   var
     statv: SizeUint;
     statv: SizeUint;
   begin
   begin
@@ -1185,7 +1176,6 @@ type
       LeaveCriticalSection(gs.lock);
       LeaveCriticalSection(gs.lock);
       if Assigned(result) then
       if Assigned(result) then
       begin
       begin
-        result^.threadState := @self;
         statv := allocated + result^.size;
         statv := allocated + result^.size;
         allocated := statv;
         allocated := statv;
         inc(statv, gs.hugeUsed);
         inc(statv, gs.hugeUsed);
@@ -1195,50 +1185,36 @@ type
       end;
       end;
     end;
     end;
   {$endif FPC_HAS_FEATURE_THREADING}
   {$endif FPC_HAS_FEATURE_THREADING}
-    result := AllocateOSChunk(minSize, sizeIndex);
+    result := AllocateOSChunk(minSize);
   end;
   end;
 
 
-  function HeapInc.ThreadState.AllocateOSChunk(minSize: SizeUint; sizeIndex: SizeInt): pOSChunk;
+  function HeapInc.ThreadState.AllocateOSChunk(minSize: SizeUint): pOSChunk;
   var
   var
-    preferredSize, statv: SizeUint;
+    query, statv: SizeUint;
   begin
   begin
-    if sizeIndex < 0 then
-    begin
-      if minSize <= GrowHeapSize1 then { 256K by default. }
-        preferredSize := GrowHeapSize1
-      else if minSize <= GrowHeapSize2 then { 1M by default. }
-        preferredSize := GrowHeapSize2
-      else
-        preferredSize := (minSize + (OSChunkVarSizeQuant - 1)) and SizeUint(-OSChunkVarSizeQuant);
-    end else
-    begin
-      preferredSize := (allocatedByFullFixed[sizeIndex] div 8 + (OSChunkFixedSizeQuant - 1)) and SizeUint(-OSChunkFixedSizeQuant); { 12.5% of memory allocated by the size. }
-      if preferredSize < growheapsizesmall then
-        preferredSize := growheapsizesmall;
-      if preferredSize > MaxFixedChunkSize then
-        preferredSize := MaxFixedChunkSize;
-    end;
-    result := SysOSAlloc(preferredSize);
-    if not Assigned(result) and (preferredSize > minSize) then
+    query := used div 16 + minSize div 2; { Base: 6.25% of the memory used, so if GrowHeapSize2 = 1 Mb, 1 Mb OS allocations start at 16 Mb used. }
+    if query > GrowHeapSize2 then { Limit by GrowHeapSize2. }
+      query := GrowHeapSize2;
+    if query < minSize then { But of course allocate at least the amount requested. }
+      query := minSize;
+    query := (query + (OSChunkVarSizeQuant - 1)) and SizeUint(-OSChunkVarSizeQuant); { Quantize. }
+    result := SysOSAlloc(query);
+    if not Assigned(result) and (query > minSize) then
     begin
     begin
-      preferredSize := minSize;
-      result := SysOSAlloc(preferredSize);
+      query := minSize;
+      result := SysOSAlloc(query);
     end;
     end;
     if not Assigned(result) then
     if not Assigned(result) then
       exit(AllocFailed);
       exit(AllocFailed);
-    statv := allocated + preferredSize;
+    result^.size := query;
+    statv := allocated + query;
     allocated := statv;
     allocated := statv;
     inc(statv, gs.hugeUsed);
     inc(statv, gs.hugeUsed);
     if statv > maxAllocated then
     if statv > maxAllocated then
       maxAllocated := statv;
       maxAllocated := statv;
-    result^.size := preferredSize;
-  {$ifdef FPC_HAS_FEATURE_THREADING}
-    result^.threadState := @self;
-  {$endif}
-    result^.sizeIndex := -1;
   end;
   end;
 
 
-  function HeapInc.ThreadState.AllocVar(size: SizeUint): pointer;
+  function HeapInc.ThreadState.AllocVar(size: SizeUint; isArena: boolean): pointer;
   var
   var
     fv, toRemove: pFreeVarChunk;
     fv, toRemove: pFreeVarChunk;
     osChunk, osNext: pVarOSChunk;
     osChunk, osNext: pVarOSChunk;
@@ -1262,16 +1238,12 @@ type
     begin
     begin
       toRemove := nil;
       toRemove := nil;
       minSize := VarOSChunkDataOffset + size;
       minSize := VarOSChunkDataOffset + size;
-      if minSize <= GrowHeapSize1 then
-        maxSize := GrowHeapSize1
-      else if minSize <= GrowHeapSize2 then
-        maxSize := GrowHeapSize2
-      else
-        maxSize := High(SizeUint);
-      osChunk := pVarOSChunk(GetOSChunk(minSize, maxSize, -1));
+      maxSize := GrowHeapSize2;
+      if maxSize < minSize then
+        maxSize := High(SizeUint); { Shouldn’t happen (unless GrowHeapSize2 is too small), so don’t optimize the case when it happens. }
+      osChunk := pVarOSChunk(GetOSChunk(minSize, maxSize));
       if not Assigned(osChunk) then
       if not Assigned(osChunk) then
         exit(nil);
         exit(nil);
-      osChunk^.sizeIndex := -1;
 
 
       { Add osChunk to varOS. }
       { Add osChunk to varOS. }
       osNext := varOS;
       osNext := varOS;
@@ -1335,11 +1307,16 @@ type
       size := fv^.size;
       size := fv^.size;
       pVarHeader(result - VarHeaderSize)^.ch.h := pVarHeader(result - VarHeaderSize)^.ch.h and (FirstFlag or LastFlag) + UsedFlag + uint32(size);
       pVarHeader(result - VarHeaderSize)^.ch.h := pVarHeader(result - VarHeaderSize)^.ch.h and (FirstFlag or LastFlag) + UsedFlag + uint32(size);
     end;
     end;
-    statv := used + size;
-    used := statv;
-    inc(statv, gs.hugeUsed);
-    if statv > maxUsed then
-      maxUsed := statv;
+    if isArena then
+      inc(pVarHeader(result)[-1].ch.h, FixedArenaFlag) { Arenas aren’t counted in “used” directly. }
+    else
+    begin
+      statv := used + size;
+      used := statv;
+      inc(statv, gs.hugeUsed);
+      if statv > maxUsed then
+        maxUsed := statv;
+    end;
   end;
   end;
 
 
   function HeapInc.ThreadState.FreeVar(p: pointer): SizeUint;
   function HeapInc.ThreadState.FreeVar(p: pointer): SizeUint;
@@ -1371,7 +1348,10 @@ type
 
 
     fSizeFlags := pVarHeader(p - VarHeaderSize)^.ch.h;
     fSizeFlags := pVarHeader(p - VarHeaderSize)^.ch.h;
     result := fSizeFlags and VarSizeMask;
     result := fSizeFlags and VarSizeMask;
-    dec(used, result);
+    if fSizeFlags and FixedArenaFlag = 0 then
+      dec(used, result)
+    else
+      dec(fSizeFlags, FixedArenaFlag);
     toRemove := nil;
     toRemove := nil;
 
 
     { If next/prev are free, remove them from varFree and merge with f — (f)uture (f)ree chunk that starts at p, has fSizeFlags,
     { If next/prev are free, remove them from varFree and merge with f — (f)uture (f)ree chunk that starts at p, has fSizeFlags,
@@ -1620,7 +1600,7 @@ type
       exit(AllocFailed);
       exit(AllocFailed);
     pHugeChunk(result)^.size := size;
     pHugeChunk(result)^.size := size;
   {$else HAS_SYSOSFREE}
   {$else HAS_SYSOSFREE}
-    result := GetOSChunk(size, High(SizeUint), -1);
+    result := GetOSChunk(size, High(SizeUint));
     if not Assigned(result) then
     if not Assigned(result) then
       exit; { GetOSChunk throws an error if required. }
       exit; { GetOSChunk throws an error if required. }
     size := pOSChunk(result)^.size;
     size := pOSChunk(result)^.size;
@@ -1663,7 +1643,6 @@ type
     else
     else
       fOs^.first := p;
       fOs^.first := p;
     fOs^.last := p;
     fOs^.last := p;
-    pFreeOSChunk(p)^.sizeIndex := -1;
   {$endif ndef HAS_SYSOSFREE}
   {$endif ndef HAS_SYSOSFREE}
   {$ifdef FPC_HAS_FEATURE_THREADING} LeaveCriticalSection(gs.lock); {$endif}
   {$ifdef FPC_HAS_FEATURE_THREADING} LeaveCriticalSection(gs.lock); {$endif}
   {$ifdef HAS_SYSOSFREE} SysOSFree(p, result); {$endif}
   {$ifdef HAS_SYSOSFREE} SysOSFree(p, result); {$endif}
@@ -1743,14 +1722,11 @@ type
 
 
   procedure HeapInc.ThreadState.Orphan;
   procedure HeapInc.ThreadState.Orphan;
   var
   var
-    sizeIndex: SizeUint;
     lastFree, nextFree: pFreeOSChunk;
     lastFree, nextFree: pFreeOSChunk;
     vOs, nextVOs, lastVOs: pVarOSChunk;
     vOs, nextVOs, lastVOs: pVarOSChunk;
   begin
   begin
     FlushToFree;
     FlushToFree;
-    Orphan(fullOS);
-    for sizeIndex := 0 to High(fixedPartialOS) do
-      Orphan(fixedPartialOS[sizeIndex]);
+    FreeEmptyArenas(0);
     { Prepend freeOS to gs.freeOS. }
     { Prepend freeOS to gs.freeOS. }
     lastFree := freeOS.last;
     lastFree := freeOS.last;
     if Assigned(lastFree) then
     if Assigned(lastFree) then
@@ -1789,55 +1765,39 @@ type
     FillChar(self, sizeof(self), 0);
     FillChar(self, sizeof(self), 0);
   end;
   end;
 
 
-  class procedure HeapInc.ThreadState.Orphan(list: pFixedOSChunk);
+  procedure HeapInc.ThreadState.AdoptArena(arena: pFixedArena);
   var
   var
-    last, osNext: pFixedOSChunk;
-  begin
-    if not Assigned(list) then
-      exit;
-    last := pFixedOSChunk(ChangeThreadStates(list, nil));
-    { Prepend list to gs.fixedOS. }
-    osNext := gs.fixedOS;
-    last^.next := osNext;
-    if Assigned(osNext) then
-      osNext^.prev := last;
-    gs.fixedOS := list;
-  end;
-
-  procedure HeapInc.ThreadState.Adopt(osChunk: pFixedOSChunk);
-  var
-    sizeIndex, statv: SizeUint;
-    dest: ^pFixedOSChunk;
+    sizeIndex: SizeUint;
+    nextArena: pFixedArena;
   begin
   begin
-    sizeIndex := pCommonHeader(pointer(osChunk) + FixedOSChunkDataOffset)^.h and SizeIndexMask;
-    statv := used + osChunk^.usedSize;
-    used := statv;
-    inc(statv, gs.hugeUsed);
-    if statv > maxUsed then
-      maxUsed := statv;
+    sizeIndex := pCommonHeader(pointer(arena) + FixedArenaDataOffset)^.h and SizeIndexMask;
+    inc(used, arena^.usedSize); { maxUsed is updated at the end of AdoptVarOwner. }
 
 
-    statv := allocated + osChunk^.size;
-    allocated := statv;
-    inc(statv, gs.hugeUsed);
-    if statv > maxAllocated then
-      maxAllocated := statv;
-
-    { Remove osChunk from gs.fixedOS, add to fullOS or fixedPartialOS[sizeIndex] as appropriate. }
-    dest := @fixedPartialOS[sizeIndex];
-    if osChunk^.usedSize >= osChunk^.fullThreshold then
+    if arena^.usedSize = 0 then
     begin
     begin
-      inc(allocatedByFullFixed[sizeIndex], osChunk^.size);
-      dest := @fullOS;
-    end;
-    osChunk^.MoveTo(gs.fixedOS, dest^);
-
-    osChunk^.threadState := @self;
+      { Add arena to emptyArenas. }
+      arena^.next := emptyArenas;
+      emptyArenas := arena;
+      inc(nEmptyArenas); { May exceed MaxKeptFixedArenas, extra arenas are freed at the end of AdoptVarOwner. }
+    end
+    else if arena^.usedSize < arena^.fullThreshold then
+    begin
+      { Add arena to partialArenas[sizeIndex]. }
+      nextArena := partialArenas[sizeIndex];
+      arena^.prev := nil;
+      arena^.next := nextArena;
+      if Assigned(nextArena) then
+        nextArena^.prev := arena;
+      partialArenas[sizeIndex] := arena;
+    end else
+      inc(allocatedByFullArenas[sizeIndex], pVarHeader(arena)[-1].ch.h and VarSizeMask);
   end;
   end;
 
 
   procedure HeapInc.ThreadState.AdoptVarOwner(p: pointer);
   procedure HeapInc.ThreadState.AdoptVarOwner(p: pointer);
   var
   var
-    prevSize, size, statv: SizeUint;
+    prevSize, statv: SizeUint;
     h: uint32;
     h: uint32;
+    vOs, osPrev, osNext: pVarOSChunk;
   begin
   begin
     repeat
     repeat
       prevSize := pVarHeader(p - VarHeaderSize)^.prevSize;
       prevSize := pVarHeader(p - VarHeaderSize)^.prevSize;
@@ -1845,50 +1805,52 @@ type
     until prevSize = 0;
     until prevSize = 0;
 
 
     { Move OS chunk from gs.varOS to varOS. }
     { Move OS chunk from gs.varOS to varOS. }
-    pOSChunk(p - (VarOSChunkDataOffset + VarHeaderSize))^.MoveTo(gs.varOS, varOS);
-    statv := allocated + pOSChunk(p - (VarOSChunkDataOffset + VarHeaderSize))^.size;
+    vOs := p - (VarOSChunkDataOffset + VarHeaderSize);
+    osPrev := vOs^.prev;
+    osNext := vOs^.next;
+    if Assigned(osPrev) then
+      osPrev^.next := osNext
+    else
+      gs.varOS := osNext;
+    if Assigned(osNext) then
+      osNext^.prev := osPrev;
+    vOs^.prev := nil;
+    osNext := varOS;
+    vOs^.next := osNext;
+    if Assigned(osNext) then
+      osNext^.prev := vOs;
+    varOS := vOs;
+
+    statv := allocated + vOs^.size;
     allocated := statv;
     allocated := statv;
     inc(statv, gs.hugeUsed);
     inc(statv, gs.hugeUsed);
     if statv > maxAllocated then
     if statv > maxAllocated then
       maxAllocated := statv;
       maxAllocated := statv;
 
 
-    { Careful: even though VarHeaders have own threadState links, correct threadState in the OS chunk is required,
-      as the chunk might be orphaned, then adopted with this function, then become free, then be reused as fixed chunk.
-      GetOSChunk does not set threadState if it takes the chunk from local freeOS, assuming it is already set. }
-    pOSChunk(p - (VarOSChunkDataOffset + VarHeaderSize))^.threadState := @self;
-
     repeat
     repeat
       pVarHeader(p - VarHeaderSize)^.threadState := @self;
       pVarHeader(p - VarHeaderSize)^.threadState := @self;
       h := pVarHeader(p - VarHeaderSize)^.ch.h;
       h := pVarHeader(p - VarHeaderSize)^.ch.h;
-      size := h and uint32(VarSizeMask);
       if h and UsedFlag = 0 then
       if h and UsedFlag = 0 then
         varFree.Add(@pFreeVarChunk(p)^.rbn)
         varFree.Add(@pFreeVarChunk(p)^.rbn)
+      else if h and FixedArenaFlag <> 0 then
+         AdoptArena(p)
       else
       else
-        inc(used, size); { maxUsed is updated after the loop. }
-      inc(p, size);
+        inc(used, h and uint32(VarSizeMask)); { maxUsed is updated after the loop. }
+      inc(p, h and uint32(VarSizeMask));
     until h and LastFlag <> 0;
     until h and LastFlag <> 0;
+
+    FreeEmptyArenas(MaxKeptFixedArenas);
+
     statv := used + gs.hugeUsed;
     statv := used + gs.hugeUsed;
     if statv > maxUsed then
     if statv > maxUsed then
       maxUsed := statv;
       maxUsed := statv;
   end;
   end;
 
 
-  class function HeapInc.ThreadState.ChangeThreadStates(list: pOSChunk; ts: pThreadState): pOSChunk; static; { Returns the last item of list. }
-  begin
-    if not Assigned(list) then
-      exit(nil);
-    repeat
-      list^.threadState := ts;
-      result := list;
-      list := list^.next;
-    until not Assigned(list);
-  end;
-
   class procedure HeapInc.ThreadState.ChangeThreadState(vOs: pVarOSChunk; ts: pThreadState);
   class procedure HeapInc.ThreadState.ChangeThreadState(vOs: pVarOSChunk; ts: pThreadState);
   var
   var
     h: uint32;
     h: uint32;
     p: pointer;
     p: pointer;
   begin
   begin
-    vOs^.threadState := ts; { Not really required (for now), but done for symmetry; also see the comment on setting OSChunk.threadState in AdoptVarOwner. }
     p := pointer(vOs) + (VarOSChunkDataOffset + VarHeaderSize);
     p := pointer(vOs) + (VarOSChunkDataOffset + VarHeaderSize);
     repeat
     repeat
       pVarHeader(p - VarHeaderSize)^.threadState := ts;
       pVarHeader(p - VarHeaderSize)^.threadState := ts;
@@ -1897,16 +1859,24 @@ type
     until h and LastFlag <> 0;
     until h and LastFlag <> 0;
   end;
   end;
 
 
+  procedure HeapInc.ThreadState.FreeEmptyArenas(untilCount: SizeUint);
+  var
+    arena: pFixedArena;
+  begin
+    while nEmptyArenas > untilCount do
+    begin
+      arena := emptyArenas;
+      emptyArenas := arena^.next;
+      dec(nEmptyArenas);
+      FreeVar(arena);
+    end;
+  end;
+
 {$ifndef FPC_SECTION_THREADVARS}
 {$ifndef FPC_SECTION_THREADVARS}
   procedure HeapInc.ThreadState.FixupSelfPtr;
   procedure HeapInc.ThreadState.FixupSelfPtr;
   var
   var
-    sizeIndex: SizeUint;
     vOs: pVarOSChunk;
     vOs: pVarOSChunk;
   begin
   begin
-    ChangeThreadStates(fullOS, @self);
-    for sizeIndex := 0 to High(fixedPartialOS) do
-      ChangeThreadStates(fixedPartialOS[sizeIndex], @self);
-    ChangeThreadStates(freeOS.first, @self);
     vOs := varOS;
     vOs := varOS;
     while Assigned(vOs) do
     while Assigned(vOs) do
     begin
     begin
@@ -1958,7 +1928,7 @@ begin
   if size <= HeapInc.MaxFixedHeaderAndPayload - HeapInc.CommonHeaderSize then
   if size <= HeapInc.MaxFixedHeaderAndPayload - HeapInc.CommonHeaderSize then
     result := ts^.AllocFixed(size)
     result := ts^.AllocFixed(size)
   else if size < GrowHeapSize2 div 2 then { Approximate idea on the max size of the variable chunk. Approximate because size does not include headers but GrowHeapSize2 does. }
   else if size < GrowHeapSize2 div 2 then { Approximate idea on the max size of the variable chunk. Approximate because size does not include headers but GrowHeapSize2 does. }
-    result := ts^.AllocVar(size)
+    result := ts^.AllocVar(size, false)
   else
   else
     result := ts^.AllocHuge(size);
     result := ts^.AllocHuge(size);
 end;
 end;

+ 2 - 2
rtl/inc/heaph.inc

@@ -57,8 +57,8 @@ function  IsMemoryManagerSet: Boolean;
 
 
 { Variables }
 { Variables }
 const
 const
-  MaxKeptOSChunks: DWord = 4; { if more than MaxKeptOSChunks are free, the heap manager will release
-                              chunks back to the OS }
+  { if more than MaxKeptOSChunks are free, the heap manager will release chunks back to the OS }
+  MaxKeptOSChunks: DWord = {$ifdef LEGACYHEAP} 4 {$else} 1 {$endif};
   growheapsizesmall : ptruint=32*1024; { fixed-size small blocks will grow with 32k }
   growheapsizesmall : ptruint=32*1024; { fixed-size small blocks will grow with 32k }
   growheapsize1 : ptruint=256*1024;  { < 256k will grow with 256k }
   growheapsize1 : ptruint=256*1024;  { < 256k will grow with 256k }
   growheapsize2 : ptruint=1024*1024; { > 256k will grow with 1m }
   growheapsize2 : ptruint=1024*1024; { > 256k will grow with 1m }