Browse Source

Reallocate memory AGGRESSIVELY, incline ReallocMem to defragment.

Rika Ichinose 3 weeks ago
parent
commit
5e0cbc5b22
1 changed files with 159 additions and 84 deletions
  1. 159 84
      rtl/inc/heap.inc

+ 159 - 84
rtl/inc/heap.inc

@@ -1094,7 +1094,7 @@ type
       pVarHeader(pointer(fv) - VarHeaderSize)^.ch.h := vSizeFlags;
       { Chunk to the right retains its PrevFreeFlag. }
       if vSizeFlags and LastFlag = 0 then
-        pFreeVarTail(pointer(fv) + vSizeFlags - VarHeaderSize - FreeVarTailSize)^.size := vSizeFlags;
+        pFreeVarTail(pointer(fv) + vSizeFlags - (VarHeaderSize + FreeVarTailSize))^.size := vSizeFlags;
       if vSizeFlags >= MinSearchableVarHeaderAndPayload then
         varFree.Add(fv, VarSizeToBinIndex(vSizeFlags, false)); { Rounding down, so not masking is ok. }
     end else
@@ -1125,7 +1125,7 @@ type
     fSizeFlags, hPrev, hNext: SizeUint;
     osChunk, osPrev, osNext: pVarOSChunk;
   {$ifdef FPC_HAS_FEATURE_THREADING}
-    ts: pThreadState;
+    pts: ^pThreadState;
   {$endif FPC_HAS_FEATURE_THREADING}
   {$ifndef HAS_SYSOSFREE}
     freeOsNext: pFreeOSChunk;
@@ -1133,16 +1133,16 @@ type
   {$endif not HAS_SYSOSFREE}
   begin
   {$ifdef FPC_HAS_FEATURE_THREADING}
-    if pVarOSChunk(p + pVarHeader(p - VarHeaderSize)^.ofsToOs)^.threadState <> @self then
+    pts := @pVarOSChunk(p + pVarHeader(p - VarHeaderSize)^.ofsToOs)^.threadState;
+    if pts^ <> @self then
     begin
       EnterCriticalSection(gs.lock);
-      ts := pVarOSChunk(p + pVarHeader(p - VarHeaderSize)^.ofsToOs)^.threadState;
-      if Assigned(ts) then
+      if Assigned(pts^) then
       begin
         { Despite atomic Push lock must be held as otherwise target thread might end and destroy its threadState.
           However, target thread won’t block to free p, so PushToFree instantly invalidates p. }
         result := pVarHeader(p - VarHeaderSize)^.ch.h and VarSizeMask - VarHeaderSize;
-        ts^.PushToFree(p);
+        pts^^.PushToFree(p);
         LeaveCriticalSection(gs.lock);
         exit;
       end;
@@ -1175,7 +1175,7 @@ type
     if fSizeFlags and PrevIsFreeFlag <> 0 then
     begin
       dec(fSizeFlags, PrevIsFreeFlag);
-      p2 := p - pFreeVarTail(p - VarHeaderSize - FreeVarTailSize)^.size;
+      p2 := p - pFreeVarTail(p - (VarHeaderSize + FreeVarTailSize))^.size;
       hPrev := pVarHeader(p2 - VarHeaderSize)^.ch.h;
       if uint32(hPrev) and UsedFlag = 0 then
       begin
@@ -1196,7 +1196,7 @@ type
       if fSizeFlags and LastFlag = 0 then
       begin
         pVarHeader(p + fSizeFlags - VarHeaderSize)^.ch.h := pVarHeader(p + fSizeFlags - VarHeaderSize)^.ch.h or PrevIsFreeFlag; { Could have it already. }
-        pFreeVarTail(p + fSizeFlags - VarHeaderSize - FreeVarTailSize)^.size := fSizeFlags;
+        pFreeVarTail(p + fSizeFlags - (VarHeaderSize + FreeVarTailSize))^.size := fSizeFlags;
       end;
     end else
     begin
@@ -1253,98 +1253,173 @@ type
 
   function HeapInc.ThreadState.TryResizeVar(p: pointer; size: SizeUint): pointer;
   var
-    fp, p2: pointer;
-    oldpsize, fSizeFlags, growby, statv: SizeUint;
+    ar: pointer absolute result;
+    fv, fp: pointer;
+    arSizeFlags, prevSize2, maxFv, minFragment, fSizeFlags, hNext, hNext2, oldph: uint32;
+    prevSize, binIndex, oldpsize, statv, arSize: SizeUint;
+  {$ifdef FPC_HAS_FEATURE_THREADING}
+    pts: ^pThreadState;
+  {$endif FPC_HAS_FEATURE_THREADING}
   begin
-    if (size <= MaxFixedHeaderAndPayload - CommonHeaderSize)
-      or (size > GrowHeapSize2) { Not strictly necessary but rejects clearly wrong values early so adding headers to the size doesn’t overflow. }
-    {$ifdef FPC_HAS_FEATURE_THREADING}
-      or (pVarOSChunk(p + pVarHeader(p - VarHeaderSize)^.ofsToOs)^.threadState <> @self)
-    {$endif}
+    result := nil;
+    if (size > GrowHeapSize2) { Assuming GrowHeapSize2 is never larger than 3.999 Gb, this prevents overflow on adding headers and allows uint32(size) to tune for x64. }
+      or (uint32(size) <= MaxFixedHeaderAndPayload - CommonHeaderSize)
     then
-      exit(nil);
+      exit;
+
+  {$ifdef FPC_HAS_FEATURE_THREADING}
+    pts := @pVarOSChunk(p + pVarHeader(p - VarHeaderSize)^.ofsToOs)^.threadState;
+    if pts^ <> @self then
+    begin
+      if Assigned(pts^) then { Pretest to avoid acquiring the lock. }
+        exit;
+      EnterCriticalSection(gs.lock);
+      if Assigned(pts^) then
+      begin
+        LeaveCriticalSection(gs.lock);
+        exit;
+      end;
+      AdoptVarOwner(p); { ...And continue! }
+      LeaveCriticalSection(gs.lock);
+    end;
+  {$endif FPC_HAS_FEATURE_THREADING}
 
     { Round the size up, but only if supported by VarSizeToBinIndex: chunks can be reallocated to the sizes larger than MaxVarHeaderAndPayload. }
-    if size <= MaxVarHeaderAndPayload - VarHeaderSize then
-      size := BinIndexToVarSize(VarSizeToBinIndex(size + VarHeaderSize, true))
-    else
-      size := (size + (VarHeaderSize + VarSizeQuant - 1)) and SizeUint(-VarSizeQuant); { Just do the strictly necessary quantization... }
+    if uint32(size) <= MaxVarHeaderAndPayload - VarHeaderSize then
+    begin
+      binIndex := VarSizeToBinIndex(size + VarHeaderSize, true);
+      size := BinIndexToVarSize(binIndex);
+    end else
+      size := uint32(uint32(size) + (VarHeaderSize + VarSizeQuant - 1)) and uint32(-VarSizeQuant); { Just do the strictly necessary quantization... }
+
+    { ar + arSizeFlags (from “around”) is the chunk made from p and its adjacent free chunks. }
+    ar := p;
+    arSizeFlags := pVarHeader(ar - VarHeaderSize)^.ch.h;
 
-    result := p; { From now on use result instead of p (saves a register). }
+    if arSizeFlags and LastFlag = 0 then
+    begin
+      hNext := pVarHeader(ar + arSizeFlags and VarSizeMask - VarHeaderSize)^.ch.h;
+      if hNext and UsedFlag = 0 then
+        inc(arSizeFlags, hNext); { Inherit LastFlag, other flags must be 0. }
+    end;
 
-    oldpsize := pVarHeader(result - VarHeaderSize)^.ch.h and VarSizeMask;
-    p2 := result + oldpsize;
-    { (f)uture (f)ree chunk starting at p + size and having fSizeFlags will be created at the end, must exit before that if not required. }
-    if size <= oldpsize then
+    if arSizeFlags and PrevIsFreeFlag <> 0 then
     begin
-      { Shrink. Maybe. }
-      fSizeFlags := oldpsize - size;
+      prevSize := pFreeVarTail(ar - (VarHeaderSize + FreeVarTailSize))^.size;
+      dec(ar, prevSize);
+      inc(arSizeFlags, prevSize);
+    end;
+
+    if uint32(size) > arSizeFlags then { “ar” has no way to fit the new chunk. }
+      exit(nil);
+
+    { Check if there is a better place... }
+    maxFv := arSizeFlags div 4 * 3;
+    if (uint32(size) <= MaxVarHeaderAndPayload) and (uint32(size) < maxFv) then { Pretest the condition on a “CONSIDERABLY” better fv below, maybe it’s not going to happen no matter what. }
+    begin
+      fv := varFree.Find(binIndex);
+      if Assigned(fv)
+        { fv may be one of the chunks around; in this case, ignore it. Checked as unsigned(fv - ar) < arSize. }
+        and (PtrUint(PtrInt(PtrUint(fv)) - PtrInt(PtrUint(ar))) >= arSizeFlags) { Logically “arSizeFlags and VarSizeMask”. }
+        { To justify moving FAR, better place should be CONSIDERABLY better: say, <75% of the ar. }
+        and (pVarHeader(fv)[-1].ch.h < maxFv) { Ignore masking, this is a rough check anyway. }
+      then
+        exit(nil);
+    end;
+
+    { So p will be placed inside “ar” after all. It is either moved to the beginning of “ar” or stays in place.
 
-      if (pVarHeader(result - VarHeaderSize)^.ch.h and LastFlag <> 0) or (pVarHeader(p2 - VarHeaderSize)^.ch.h and UsedFlag <> 0) then
+      There might be no choice but to move: reallocating A in
+      [free 1,000][A 1,000][free 1,000]
+      to 2,500 bytes has to move, resulting in
+      [A 2,500][free 500].
+
+      But if there is a choice, moving might be or not be worth it. If we have
+      [free 5,000][A 1,000][free 5,000]
+      then moving will give
+      [A 1,000][free 10,000]
+      and that’s the point — [free 10,000] is better than 2 × [free 5,000]. But if we have
+      [free 64][A 1,000][free 9,936]
+      then moving for the sake of defragmenting these 64 bytes is definitely a waste of time.
+
+      So if there is a choice, moving is performed if fragments on BOTH sides are larger than 1/8 (12.5%) of the (new) size. }
+
+    if arSizeFlags and PrevIsFreeFlag <> 0 then
+    begin
+      prevSize2 := pFreeVarTail(p - (VarHeaderSize + FreeVarTailSize))^.size;
+      { Consider (not) moving... }
+      dec(arSizeFlags, prevSize2); { Temporarily (or not) remove prevSize from arSizeFlags. This corresponds to the size available without moving. }
+      minFragment := uint32(size) div 8;
+      if (arSizeFlags < uint32(size)) { Size does not fit without moving? }
+        or (prevSize2 >= minFragment) and (uint32(arSizeFlags - uint32(size)) >= minFragment) { There are large enough fragments on both sides? }
+      then
       begin
-        { No empty chunk to the right: create free chunk following the same logic as in AllocVar regarding the non-searchable tail, otherwise report success but change nothing. }
-        if fSizeFlags < MinAnyVarHeaderAndPayload + (MinSearchableVarHeaderAndPayload - MinAnyVarHeaderAndPayload) div LastFlag * (pVarHeader(result)[-1].ch.h and LastFlag) then
-          exit;
-        dec(used, fSizeFlags);
-        inc(fSizeFlags, pVarHeader(result - VarHeaderSize)^.ch.h and LastFlag);
-        dec(pVarHeader(result - VarHeaderSize)^.ch.h, fSizeFlags);
+        if prevSize2 >= MinSearchableVarHeaderAndPayload then
+          varFree.Remove(ar);
+        inc(arSizeFlags, prevSize2 - PrevIsFreeFlag); { Add prevSize back, and remove PrevIsFreeFlag. }
+        { Move(p^, ar^, ...) is postponed, see below. }
       end else
-      begin
-        if fSizeFlags = 0 then { Exit early if going to be a no-op. Branch above does the same with a broader check. }
-          exit;
-        dec(used, fSizeFlags);
-        { Has empty chunk to the right: extend with freed space. }
-        dec(pVarHeader(result - VarHeaderSize)^.ch.h, fSizeFlags);
-        inc(fSizeFlags, pVarHeader(p2 - VarHeaderSize)^.ch.h);
-        if pVarHeader(p2)[-1].ch.h >= MinSearchableVarHeaderAndPayload then
-          varFree.Remove(p2);
-      end;
-    end
-    { Grow if there is free space. Note this can result in a chunk larger than e.g. SysGetMem allows (GrowHeapSize div 2 or so). That’s okay as it saves a Move. }
-    else if (pVarHeader(result - VarHeaderSize)^.ch.h and LastFlag = 0) and (pVarHeader(p2 - VarHeaderSize)^.ch.h and UsedFlag = 0) and
-      (pVarHeader(p2)[-1].ch.h >= SizeUint(size - oldpsize)) { Can check without “and VarSizeMask”, will remain ≥ anyway. }
-    then
+        { Not moving; finish the removal of the previous chunk from “ar”. arSizeFlags is already decreased by prevSize, and keeps PrevIsFreeFlag. }
+        ar := p; { Same as inc(ar, prevSize2). }
+    end;
+
+    { Remove the free chunk after p. Note that:
+
+      — Under some circumstances, it can be overwritten with Move, so Move must be postponed.
+
+      — This section might decide that TryResizeVar is a complete no-op and exit “early”, and this decision depends on the decision to move,
+        so the decision to move must be made first.
+        Though a nontrivial amount of work has been done by this point, some more remains and can be skipped to speed up no-op ReallocMems (e.g. 26 → 16 ns).
+        Without shortcutting the no-op case, this entire section can be simply moved above the previous one and postponing Move would not be required. }
+
+    oldph := pVarHeader(p)[-1].ch.h;
+    oldpsize := oldph and VarSizeMask;
+    if (uint32(size) = uint32(oldpsize)) and (ar = p) then
+      { TryResizeVar was a no-op, and with some explicit efforts we managed to write nothing by this point,
+        so we use our last chance to get out. }
+      exit;
+    if oldph and LastFlag = 0 then
     begin
-      fSizeFlags := pVarHeader(p2)[-1].ch.h - (size - oldpsize); { Inherits LastFlag, other flags are 0. }
-      if fSizeFlags < MinAnyVarHeaderAndPayload + (MinSearchableVarHeaderAndPayload - MinAnyVarHeaderAndPayload) div LastFlag * (fSizeFlags and LastFlag) then
-        fSizeFlags := fSizeFlags and LastFlag;
+      hNext2 := pVarHeader(p + oldpsize - VarHeaderSize)^.ch.h;
+      if (hNext2 and UsedFlag = 0) and (hNext2 >= MinSearchableVarHeaderAndPayload) then
+        varFree.Remove(p + oldpsize);
+    end;
+    dec(used, oldpsize);
 
-      growby := pVarHeader(p2)[-1].ch.h - fSizeFlags;
-      size := oldpsize + growby;
-      statv := used + growby;
-      used := statv;
-      inc(statv, gs.hugeUsed);
-      if statv > maxUsed then
-        maxUsed := statv;
-      { Update p size. }
-      inc(pVarHeader(result - VarHeaderSize)^.ch.h, growby);
+    if ar <> p then
+    begin
+      if uint32(size) < uint32(oldpsize) then { oldpsize is reused as “moveSize”. }
+        oldpsize := uint32(size);
+      Move(p^, ar^, oldpsize - VarHeaderSize);
+    end;
 
-      if pVarHeader(p2)[-1].ch.h >= MinSearchableVarHeaderAndPayload then
-        varFree.Remove(p2);
-      { No empty chunk? }
-      if fSizeFlags <= LastFlag then
+    { Format the free chunk after ar, or its absence. }
+    fSizeFlags := uint32(arSizeFlags - uint32(size)) and (VarSizeMask or LastFlag);
+    if fSizeFlags >= uint32(MinAnyVarHeaderAndPayload + (MinSearchableVarHeaderAndPayload - MinAnyVarHeaderAndPayload) div LastFlag * (fSizeFlags and LastFlag)) then
+    begin
+      dec(arSizeFlags, fSizeFlags);
+      arSize := arSizeFlags and VarSizeMask;
+      fp := ar + arSize;
+      pVarHeader(fp)[-1].ofsToOs := pVarHeader(ar)[-1].ofsToOs - int32(arSize);
+      pVarHeader(fp)[-1].ch.h := fSizeFlags;
+      if fSizeFlags and LastFlag = 0 then
       begin
-        inc(pVarHeader(result - VarHeaderSize)^.ch.h, fSizeFlags); { Either += LastFlag or a no-op. }
-        if fSizeFlags = 0 then { logically “and LastFlag = 0” }
-          dec(pVarHeader(result + size - VarHeaderSize)^.ch.h, PrevIsFreeFlag);
-        exit;
+        pFreeVarTail(fp + fSizeFlags - (VarHeaderSize + FreeVarTailSize))^.size := fSizeFlags;
+        pVarHeader(fp + fSizeFlags)[-1].ch.h := pVarHeader(fp + fSizeFlags)[-1].ch.h or PrevIsFreeFlag; { May have had it already. }
       end;
-    end else
-      { Possible another case to handle: on growth, if there is no space to the right but there is space to the LEFT, move the data there, avoiding the GetMem + FreeMem.
-        Probably not common enough, but I didn’t even investigate. }
-      exit(nil);
+      if fSizeFlags >= MinSearchableVarHeaderAndPayload then
+        varFree.Add(fp, VarSizeToBinIndex(fSizeFlags, false));
+    end
+    else if arSizeFlags and LastFlag = 0 then
+      pVarHeader(ar + arSizeFlags and VarSizeMask)[-1].ch.h := pVarHeader(ar + arSizeFlags and VarSizeMask)[-1].ch.h and uint32(not PrevIsFreeFlag); { May not have had it already. }
 
-    { Format new free var chunk. }
-    fp := result + size;
-    pVarHeader(fp - VarHeaderSize)^.ofsToOs := pVarHeader(result - VarHeaderSize)^.ofsToOs - int32(size);
-    pVarHeader(fp - VarHeaderSize)^.ch.h := fSizeFlags;
-    if fSizeFlags and LastFlag = 0 then
-    begin
-      pVarHeader(fp + fSizeFlags - VarHeaderSize)^.ch.h := pVarHeader(fp + fSizeFlags - VarHeaderSize)^.ch.h or PrevIsFreeFlag; { Could have it already. }
-      pFreeVarTail(fp + fSizeFlags - VarHeaderSize - FreeVarTailSize)^.size := fSizeFlags;
-    end;
-    if fSizeFlags >= MinSearchableVarHeaderAndPayload then
-      varFree.Add(fp, VarSizeToBinIndex(fSizeFlags, false));
+    pVarHeader(ar)[-1].ch.h := arSizeFlags;
+
+    statv := used + arSizeFlags and VarSizeMask;
+    used := statv;
+    inc(statv, gs.hugeUsed);
+    if statv > maxUsed then
+      maxUsed := statv;
   end;
 
   { If SysOSFree is available, huge chunks aren’t cached by any means.