Jelajahi Sumber

Inline two variants of VarSizeToBinIndex and fix an irreproducible bug in unsynchronized gs.varFree.Find.

Rika Ichinose 2 bulan lalu
induk
melakukan
db09ff9370
1 mengubah file dengan 31 tambahan dan 21 penghapusan
  1. 31 21
      rtl/inc/heap.inc

+ 31 - 21
rtl/inc/heap.inc

@@ -243,7 +243,8 @@ type
     MinSearchableVarHeaderAndPayload = (MaxFixedHeaderAndPayload + 1 shl FirstVarStepP2 + VarSizeQuant - 1) and -VarSizeQuant;
     MaxVarHeaderAndPayload = (MaxFixedHeaderAndPayload + (1 shl VarSizeClassesCount - 1) shl FirstVarRangeP2) and -VarSizeQuant; {$if MaxVarHeaderAndPayload <> MaxFixedHeaderAndPayload + 1047552} {$error does not match the explanation above :D} {$endif}
 
-    class function VarSizeToBinIndex(size: SizeUint; roundUp: boolean): SizeUint; static;
+    class function VarSizeToBinIndex(size: SizeUint; roundUp: boolean): SizeUint; static; inline; { roundUp is constant. }
+    class function VarSizeToBinIndexUp(size: SizeUint): SizeUint; static; { ...but VarSizeToBinIndex is nontrivial enough to not inline except for the special case in VarFreeMap.Add. }
     class function BinIndexToVarSize(binIndex: SizeUint): SizeUint; static; inline;
 
   type
@@ -362,7 +363,6 @@ type
     HugeChunk = object(OSChunkBase)
     end;
 
-    NonZeroDWord = 1 .. High(uint32); { MAYBE IT WILL WORK ONE DAY (https://gitlab.com/freepascal.org/fpc/source/-/issues/41179). }
   {$ifdef HEAP_INC_USE_SETS}
     Set32 = set of 0 .. 31;
   {$endif HEAP_INC_USE_SETS}
@@ -374,6 +374,8 @@ type
       L1: uint32;
       L0: array[0 .. (VarSizesCount + L0BinSize - 1) div L0BinSize - 1] of uint32;
       bins: array[0 .. VarSizesCount - 1] of pFreeVarChunk;
+      { As an optimization, Add.binIndex can also be a size (will be rounded down), assuming VarSizesCount <= MinSearchableVarHeaderAndPayload. }
+    {$if VarSizesCount > MinSearchableVarHeaderAndPayload} {$error assumption above does not hold} {$endif}
       procedure Add(c: pFreeVarChunk; binIndex: SizeUint);
       procedure Remove(c: pFreeVarChunk);
       function Find(binIndex: SizeUint): pFreeVarChunk;
@@ -492,25 +494,27 @@ type
   var
     maxv, binClassIndex: SizeUint;
   begin
-    if size >= MaxVarHeaderAndPayload then { Large sizes go to the last bin, assuming searches never search for more than MaxVarHeaderAndPayload. }
-      exit(VarSizeClassesCount * VarSizesPerClass - 1);
     dec(size, MaxFixedHeaderAndPayload);
-    binClassIndex := SizeUint(BsrDWord(NonZeroDWord(size)) - FirstVarRangeP2);
-    if SizeInt(binClassIndex) < 0 then binClassIndex := 0;
-    maxv := (SizeUint(2) shl binClassIndex - 1) shl FirstVarRangeP2;
+    binClassIndex := BsrDWord(uint32(size) or 1 shl FirstVarRangeP2); { Temporarily off by +FirstVarRangeP2. }
+    maxv := SizeUint(2) shl binClassIndex - 1 shl FirstVarRangeP2;
     if size <= maxv then
     begin
       maxv := maxv shr 1; { Turn into “minv” to be subtracted from size. If size > maxv, “minv” is maxv. :) }
       maxv := maxv and SizeUint(-SizeInt(1 shl FirstVarRangeP2));
-      dec(SizeInt(binClassIndex)); { Compensate inc(binClassIndex) below, so in the end, it is increased if size > maxv. All of this prevents having an “else” branch with its extra jump. }
+      dec(SizeInt(binClassIndex)); { Compensate extra +1 to binClassIndex below, so in the end, it is increased if size > maxv. All of this prevents having an “else” branch with its extra jump. }
     end;
     dec(size, maxv);
-    inc(SizeInt(binClassIndex));
-    result := binClassIndex * VarSizesPerClass + SizeUint(size - 1) shr (FirstVarStepP2 + binClassIndex);
-    if not roundUp and (size and SizeUint(SizeUint(1) shl (FirstVarStepP2 + binClassIndex) - 1) <> 0) then
+    inc(SizeInt(binClassIndex), 1 - FirstVarRangeP2); { No longer off by +FirstVarRangeP2. }
+    result := binClassIndex * VarSizesPerClass + SizeUint(size - 1) shr (binClassIndex + FirstVarStepP2);
+    if not roundUp and (size and SizeUint(SizeUint(1) shl (binClassIndex + FirstVarStepP2) - 1) <> 0) then
       dec(result);
   end;
 
+  class function HeapInc.VarSizeToBinIndexUp(size: SizeUint): SizeUint;
+  begin
+    result := VarSizeToBinIndex(size, true);
+  end;
+
   class function HeapInc.BinIndexToVarSize(binIndex: SizeUint): SizeUint;
   begin
     result := binIndex div VarSizesPerClass;
@@ -547,6 +551,12 @@ type
     iL0: SizeUint;
     vL0 {$ifdef HEAP_INC_USE_SETS}, vL1 {$endif}: uint32;
   begin
+    if binIndex >= VarSizesCount then
+      if binIndex >= MaxVarHeaderAndPayload then { Large sizes go to the last bin, assuming searches never search for more than MaxVarHeaderAndPayload. }
+        binIndex := VarSizesCount - 1
+      else
+        binIndex := VarSizeToBinIndex(binIndex, false);
+
     next := bins[binIndex];
     c^.prev := nil;
     c^.next := next;
@@ -623,12 +633,12 @@ type
       exit;
     mask := L0[binIndex div L0BinSize] shr (binIndex mod L0BinSize); { Logically should be “1 + binIndex mod L0BinSize” but the bit that represents the binIndex-th bin is 0 anyway. }
     if mask <> 0 then
-      exit(bins[binIndex + BsfDWord(NonZeroDWord(mask))]);
+      exit(bins[binIndex + BsfDWord(mask or 1 shl (L0BinSize - 1))]); { In these two BsfDWords, ensuring the highest bit to be set is just an optimization, as long as the supposed alternative from https://gitlab.com/freepascal.org/fpc/source/-/issues/41179 does not work. }
     mask := L1 and (SizeUint(-2) shl (binIndex div L0BinSize));
     if mask <> 0 then
     begin
-      binIndex := BsfDWord(NonZeroDWord(mask)); { Index at L0. }
-      result := bins[binIndex * L0BinSize + BsfDWord(NonZeroDWord(L0[binIndex]))];
+      binIndex := BsfDWord(mask or 1 shl (L0BinSize - 1)); { Index at L0. }
+      result := bins[binIndex * L0BinSize + BsfDWord(L0[binIndex] or 1 shl (L0BinSize - 1))]; { Careful, this time “or 1 shl (L0BinSize - 1)” is NOT an optimization: unsynchronized gs.varFree.Find can read zero from L0[binIndex]. }
     end;
   end;
 
@@ -880,7 +890,7 @@ type
     partialArenas[sizeIndex] := nextArena;
     if Assigned(nextArena) then
       nextArena^.prev := nil;
-    { And since this is unlikely, it won’t hurt to update maxUsed (unlike doing in in the common path). }
+    { And since this is unlikely, it won’t hurt to update maxUsed (unlike doing it in the common path). }
     statv := used + gs.hugeUsed;
     if statv > maxUsed then
       maxUsed := statv;
@@ -1028,7 +1038,7 @@ type
     binIndex, vSizeFlags, statv: SizeUint;
   begin
     { Search varFree for (roughly) smallest chunk ≥ size. }
-    binIndex := VarSizeToBinIndex(size + VarHeaderSize, true);
+    binIndex := VarSizeToBinIndexUp(size + VarHeaderSize);
     { Round the size up to the bin size.
       Can do without that, but not doing that will often mean the inability to reuse the hole because varFree rounds up for searches and down for additions. }
     size := BinIndexToVarSize(binIndex);
@@ -1095,7 +1105,7 @@ type
       if vSizeFlags and LastFlag = 0 then
         pFreeVarTail(pointer(fv) + vSizeFlags - (VarHeaderSize + FreeVarTailSize))^.size := vSizeFlags;
       if vSizeFlags >= MinSearchableVarHeaderAndPayload then
-        varFree.Add(fv, VarSizeToBinIndex(vSizeFlags, false)); { Rounding down, so not masking is ok. }
+        varFree.Add(fv, vSizeFlags); { Rounding down, so not masking is ok. }
     end else
     begin
       { Use the entire chunk. }
@@ -1190,7 +1200,7 @@ type
     begin
       dec(fSizeFlags, UsedFlag);
       pVarHeader(p - VarHeaderSize)^.ch.h := fSizeFlags;
-      varFree.Add(p, VarSizeToBinIndex(fSizeFlags, false));
+      varFree.Add(p, fSizeFlags);
       if fSizeFlags and LastFlag = 0 then
       begin
         pVarHeader(p + fSizeFlags - VarHeaderSize)^.ch.h := pVarHeader(p + fSizeFlags - VarHeaderSize)^.ch.h or PrevIsFreeFlag; { Could have it already. }
@@ -1282,10 +1292,10 @@ type
     end;
   {$endif FPC_HAS_FEATURE_THREADING}
 
-    { Round the size up, but only if supported by VarSizeToBinIndex: chunks can be reallocated to the sizes larger than MaxVarHeaderAndPayload. }
+    { Round the size up, but only if supported by VarSizeToBinIndexUp: chunks can be reallocated to the sizes larger than MaxVarHeaderAndPayload. }
     if uint32(size) <= MaxVarHeaderAndPayload - VarHeaderSize then
     begin
-      binIndex := VarSizeToBinIndex(size + VarHeaderSize, true);
+      binIndex := VarSizeToBinIndexUp(size + VarHeaderSize);
       size := BinIndexToVarSize(binIndex);
     end else
       size := uint32(uint32(size) + (VarHeaderSize + VarSizeQuant - 1)) and uint32(-VarSizeQuant); { Just do the strictly necessary quantization... }
@@ -1406,7 +1416,7 @@ type
         pVarHeader(fp + fSizeFlags)[-1].ch.h := pVarHeader(fp + fSizeFlags)[-1].ch.h or PrevIsFreeFlag; { May have had it already. }
       end;
       if fSizeFlags >= MinSearchableVarHeaderAndPayload then
-        varFree.Add(fp, VarSizeToBinIndex(fSizeFlags, false));
+        varFree.Add(fp, fSizeFlags);
     end
     else if arSizeFlags and LastFlag = 0 then
       pVarHeader(ar + arSizeFlags and VarSizeMask)[-1].ch.h := pVarHeader(ar + arSizeFlags and VarSizeMask)[-1].ch.h and uint32(not PrevIsFreeFlag); { May not have had it already. }