Browse Source

Fix leaks on DLL unloading.

Rika Ichinose 1 day ago
parent
commit
6a7d42bb22
6 changed files with 224 additions and 66 deletions
  1. 126 38
      rtl/inc/heap.inc
  2. 8 1
      rtl/inc/heaph.inc
  3. 81 18
      rtl/win/systhrd.inc
  4. 6 6
      rtl/win/syswin.inc
  5. 2 2
      rtl/win32/system.pp
  6. 1 1
      rtl/win64/system.pp

+ 126 - 38
rtl/inc/heap.inc

@@ -416,6 +416,10 @@ type
 
 
       varFree: VarFreeMap;
       varFree: VarFreeMap;
 
 
+    {$if defined(SUPPORT_INIT_HEAP_PROCESS_WIDE) and defined(HAS_SYSOSFREE)}
+      prev, next: pThreadState; { For gs.threads. }
+    {$endif SUPPORT_INIT_HEAP_PROCESS_WIDE and HAS_SYSOSFREE}
+
     {$ifdef DEBUG_HEAP_INC}
     {$ifdef DEBUG_HEAP_INC}
       procedure Dump(var f: text);
       procedure Dump(var f: text);
     {$endif}
     {$endif}
@@ -424,6 +428,9 @@ type
       function AllocFixed(size: SizeUint): pointer; {$ifndef DEBUG_HEAP_INC} inline; {$endif}
       function AllocFixed(size: SizeUint): pointer; {$ifndef DEBUG_HEAP_INC} inline; {$endif}
       function FreeFixed(p: pointer): SizeUint; {$ifndef DEBUG_HEAP_INC} inline; {$endif}
       function FreeFixed(p: pointer): SizeUint; {$ifndef DEBUG_HEAP_INC} inline; {$endif}
       procedure FreeEmptyArenas;
       procedure FreeEmptyArenas;
+    {$ifdef HAS_SYSOSFREE}
+      procedure ReplaceFreeOS1(&with: pFreeOSChunk); inline;
+    {$endif HAS_SYSOSFREE}
 
 
       function GetOSChunk(minSize, maxSize: SizeUint): pOSChunk; {$if defined(HAS_SYSOSFREE) or not defined(FPC_HAS_FEATURE_THREADING)} inline; {$endif}
       function GetOSChunk(minSize, maxSize: SizeUint): pOSChunk; {$if defined(HAS_SYSOSFREE) or not defined(FPC_HAS_FEATURE_THREADING)} inline; {$endif}
       function AllocateOSChunk(minSize, maxSize: SizeUint): pOSChunk;
       function AllocateOSChunk(minSize, maxSize: SizeUint): pOSChunk;
@@ -441,6 +448,7 @@ type
     {$ifdef FPC_HAS_FEATURE_THREADING}
     {$ifdef FPC_HAS_FEATURE_THREADING}
       procedure PushToFree(p: pFreeChunk);
       procedure PushToFree(p: pFreeChunk);
       procedure FlushToFree;
       procedure FlushToFree;
+      class procedure FreeToFreeList(tf: pFreeChunk); static;
 
 
       procedure Orphan;
       procedure Orphan;
       procedure AdoptArena(arena: pFixedArena);
       procedure AdoptArena(arena: pFixedArena);
@@ -464,13 +472,17 @@ type
     {$ifdef FPC_HAS_FEATURE_THREADING}
     {$ifdef FPC_HAS_FEATURE_THREADING}
       lock: TRTLCriticalSection;
       lock: TRTLCriticalSection;
       lockUse: int32;
       lockUse: int32;
+    {$ifdef SUPPORT_INIT_HEAP_PROCESS_WIDE}
       askedForProcessWideLockInitialization: boolean;
       askedForProcessWideLockInitialization: boolean;
+    {$endif SUPPORT_INIT_HEAP_PROCESS_WIDE}
 
 
       { Like ThreadState.varFree but over orphaned OS chunks. Protected by gs.lock. }
       { Like ThreadState.varFree but over orphaned OS chunks. Protected by gs.lock. }
       varFree: VarFreeMap;
       varFree: VarFreeMap;
-    {$ifndef HAS_SYSOSFREE}
+    {$if not defined(HAS_SYSOSFREE)}
       freeOS: FreeOSChunkList;
       freeOS: FreeOSChunkList;
-    {$endif not HAS_SYSOSFREE}
+    {$elseif defined(SUPPORT_INIT_HEAP_PROCESS_WIDE)}
+      threads: pThreadState;
+    {$endif not HAS_SYSOSFREE | SUPPORT_INIT_HEAP_PROCESS_WIDE}
     {$endif FPC_HAS_FEATURE_THREADING}
     {$endif FPC_HAS_FEATURE_THREADING}
     end;
     end;
 
 
@@ -1001,6 +1013,18 @@ type
     end;
     end;
   end;
   end;
 
 
+{$ifdef HAS_SYSOSFREE}
+  procedure HeapInc.ThreadState.ReplaceFreeOS1(&with: pFreeOSChunk);
+  begin
+    if Assigned(freeOS1) then
+    begin
+      dec(allocated, freeOS1^.size);
+      SysOSFree(freeOS1, freeOS1^.size);
+    end;
+    freeOS1 := &with;
+  end;
+{$endif HAS_SYSOSFREE}
+
   function HeapInc.ThreadState.GetOSChunk(minSize, maxSize: SizeUint): pOSChunk;
   function HeapInc.ThreadState.GetOSChunk(minSize, maxSize: SizeUint): pOSChunk;
 {$if defined(FPC_HAS_FEATURE_THREADING) and not defined(HAS_SYSOSFREE)}
 {$if defined(FPC_HAS_FEATURE_THREADING) and not defined(HAS_SYSOSFREE)}
   var
   var
@@ -1270,13 +1294,7 @@ type
         osNext^.prev := osPrev;
         osNext^.prev := osPrev;
 
 
     {$ifdef HAS_SYSOSFREE}
     {$ifdef HAS_SYSOSFREE}
-      { Move to freeOS1, discarding old freeOS1. }
-      if Assigned(freeOS1) then
-      begin
-        dec(allocated, freeOS1^.size);
-        SysOSFree(freeOS1, freeOS1^.size);
-      end;
-      freeOS1 := pFreeOSChunk(osChunk);
+      ReplaceFreeOS1(pFreeOSChunk(osChunk)); { Move to freeOS1, discarding old freeOS1. }
     {$else HAS_SYSOSFREE}
     {$else HAS_SYSOSFREE}
       fOs := @freeOS;
       fOs := @freeOS;
       { Share if huge. }
       { Share if huge. }
@@ -1601,23 +1619,27 @@ type
 {$ifdef FPC_HAS_FEATURE_THREADING}
 {$ifdef FPC_HAS_FEATURE_THREADING}
   procedure HeapInc.ThreadState.PushToFree(p: pFreeChunk);
   procedure HeapInc.ThreadState.PushToFree(p: pFreeChunk);
   var
   var
-    next: pFreeChunk;
+    nx: pFreeChunk;
   begin
   begin
     repeat
     repeat
-      next := toFree;
-      p^.next := next;
+      nx := toFree;
+      p^.next := nx;
       WriteBarrier; { Write p after p^.next. }
       WriteBarrier; { Write p after p^.next. }
-    until {$ifdef VER3_2} InterlockedCompareExchange {$else} AtomicCmpExchange {$endif} (toFree, p, next) = next;
+    until {$ifdef VER3_2} InterlockedCompareExchange {$else} AtomicCmpExchange {$endif} (toFree, p, nx) = nx;
   end;
   end;
 
 
   procedure HeapInc.ThreadState.FlushToFree;
   procedure HeapInc.ThreadState.FlushToFree;
+  begin
+    FreeToFreeList({$ifdef VER3_2} InterlockedExchange {$else} AtomicExchange {$endif} (toFree, nil));
+  end;
+
+  class procedure HeapInc.ThreadState.FreeToFreeList(tf: pFreeChunk);
   var
   var
-    tf, nx: pFreeChunk;
+    nx: pFreeChunk;
   begin
   begin
-    tf := {$ifdef VER3_2} InterlockedExchange {$else} AtomicExchange {$endif} (toFree, nil);
     while Assigned(tf) do
     while Assigned(tf) do
     begin
     begin
-      ReadDependencyBarrier; { Read toFree^.next after toFree. }
+      ReadDependencyBarrier; { Read tf^.next after tf. }
       nx := tf^.next;
       nx := tf^.next;
       SysFreeMem(tf);
       SysFreeMem(tf);
       tf := nx;
       tf := nx;
@@ -1638,7 +1660,7 @@ type
     FlushToFree; { Performing it under gs.lock guarantees there will be no new toFree requests. }
     FlushToFree; { Performing it under gs.lock guarantees there will be no new toFree requests. }
     FreeEmptyArenas; { Has to free all empty arenas, otherwise the chunk that contains only empty arenas can leak. }
     FreeEmptyArenas; { Has to free all empty arenas, otherwise the chunk that contains only empty arenas can leak. }
 
 
-{$ifndef HAS_SYSOSFREE}
+{$if not defined(HAS_SYSOSFREE)}
     { Prepend freeOS to gs.freeOS. }
     { Prepend freeOS to gs.freeOS. }
     lastFree := freeOS.last;
     lastFree := freeOS.last;
     if Assigned(lastFree) then
     if Assigned(lastFree) then
@@ -1650,12 +1672,20 @@ type
       else
       else
         gs.freeOS.last := lastFree;
         gs.freeOS.last := lastFree;
       gs.freeOS.first := freeOS.first;
       gs.freeOS.first := freeOS.first;
-      { Zeroing is probably required, because Orphan is called from FinalizeHeap which is called from DoneThread which can be called twice, according to this comment from syswin.inc: }
-      // DoneThread; { Assume everything is idempotent there }
       freeOS.first := nil;
       freeOS.first := nil;
       freeOS.last := nil;
       freeOS.last := nil;
     end;
     end;
-{$endif not HAS_SYSOSFREE}
+{$elseif defined(SUPPORT_INIT_HEAP_PROCESS_WIDE)}
+    { Remove from gs.threads, if present. }
+    if Assigned(prev) then
+      prev^.next := next
+    else if @self = gs.threads then { if prev = nil, then this ThreadState is either absent from gs.threads or is its first item. }
+      gs.threads := next;
+    if Assigned(next) then
+      next^.prev := prev;
+    prev := nil;
+    next := nil;
+{$endif not HAS_SYSOSFREE | defined(SUPPORT_INIT_HEAP_PROCESS_WIDE)}
     vOs := varOS;
     vOs := varOS;
     while Assigned(vOs) do
     while Assigned(vOs) do
     begin
     begin
@@ -1674,11 +1704,7 @@ type
       LeaveCriticalSection(gs.lock);
       LeaveCriticalSection(gs.lock);
 
 
 {$ifdef HAS_SYSOSFREE}
 {$ifdef HAS_SYSOSFREE}
-    if Assigned(freeOS1) then
-    begin
-      SysOSFree(freeOS1, freeOS1^.size); { Does not require gs.lock. }
-      freeOS1 := nil;
-    end;
+    ReplaceFreeOS1(nil); { Does not require gs.lock. }
 {$endif HAS_SYSOSFREE}
 {$endif HAS_SYSOSFREE}
   end;
   end;
 
 
@@ -1761,6 +1787,15 @@ type
       vOs^.threadState := @self;
       vOs^.threadState := @self;
       vOs := vOs^.next;
       vOs := vOs^.next;
     end;
     end;
+  {$if defined(SUPPORT_INIT_HEAP_PROCESS_WIDE) and defined(HAS_SYSOSFREE)}
+    { Not sure if required... }
+    if Assigned(prev) then
+      prev^.next := @self
+    else
+      gs.threads := @self;
+    if Assigned(next) then
+      next^.prev := @self;
+  {$endif SUPPORT_INIT_HEAP_PROCESS_WIDE and HAS_SYSOSFREE}
   end;
   end;
 {$endif ndef FPC_SECTION_THREADVARS}
 {$endif ndef FPC_SECTION_THREADVARS}
 {$endif FPC_HAS_FEATURE_THREADING}
 {$endif FPC_HAS_FEATURE_THREADING}
@@ -1928,6 +1963,7 @@ end;
 *****************************************************************************}
 *****************************************************************************}
 
 
 {$ifdef FPC_HAS_FEATURE_THREADING}
 {$ifdef FPC_HAS_FEATURE_THREADING}
+{$ifdef SUPPORT_INIT_HEAP_PROCESS_WIDE}
 { DeferInitHeapProcessWide / DoneHeapProcessWide are meant to support DLL_PROCESS_ATTACH / DLL_PROCESS_DETACH.
 { DeferInitHeapProcessWide / DoneHeapProcessWide are meant to support DLL_PROCESS_ATTACH / DLL_PROCESS_DETACH.
   Otherwise InitHeapThread + FinalizeHeap called per thread do their best with refcounting... }
   Otherwise InitHeapThread + FinalizeHeap called per thread do their best with refcounting... }
 procedure DeferInitHeapProcessWide;
 procedure DeferInitHeapProcessWide;
@@ -1936,23 +1972,80 @@ begin
 end;
 end;
 
 
 procedure DoneHeapProcessWide;
 procedure DoneHeapProcessWide;
+{$ifdef HAS_SYSOSFREE}
+var
+  thisTs, nextTs, ts: HeapInc.pThreadState;
+  stolenTf: HeapInc.pFreeChunk;
+{$endif HAS_SYSOSFREE}
 begin
 begin
-  if HeapInc.gs.lockUse = HeapInc.gs.LockInitializedProcessWide then
+  if HeapInc.gs.lockUse <> HeapInc.gs.LockInitializedProcessWide then
+    exit;
+{$ifdef HAS_SYSOSFREE}
+  { We need to free all lingering data of all threads: to-free lists, empty arenas, “freeOS1”s.
+
+    For each particular thread ts, this is what ts^.Orphan does, but ts^.Orphan can’t be (easily) called from another thread:
+    it calls ts^.FlushToFree which calls SysFreeMem which is hardcoded to work with HeapInc.thisTs.
+    It’s not worth redesigning, or the common case of SysFreeMem that must indeed work with HeapInc.thisTs will be slower.
+
+    So we steal and zero ts^.toFree (ts^.FlushToFree is the only thing that prevents ts^.Orphan from working from threads other than ts), call ts^.Orphan,
+    then manually complete toFree requests on our behalf.
+
+    This entire thing is just to handle the case of unloading a DLL before terminating the thread that used this DLL
+    (https://gitlab.com/freepascal.org/fpc/source/-/merge_requests/1173). }
+
+  thisTs := @HeapInc.thisTs;
+  nextTs := HeapInc.gs.threads;
+  while Assigned(nextTs) do
   begin
   begin
-    HeapInc.gs.lockUse := 0;
-    DoneCriticalSection(HeapInc.gs.lock);
+    ts := nextTs;
+    nextTs := ts^.next;
+    if ts = thisTs then { Used for executing toFrees and is orphaned the last. }
+      continue;
+    stolenTf := ts^.toFree;
+    ts^.toFree := nil;
+    ts^.Orphan;
+    HeapInc.ThreadState.FreeToFreeList(stolenTf);
   end;
   end;
+  thisTs^.Orphan;
+{$endif HAS_SYSOSFREE}
+  HeapInc.gs.lockUse := 0;
+  DoneCriticalSection(HeapInc.gs.lock);
 end;
 end;
+{$endif SUPPORT_INIT_HEAP_PROCESS_WIDE}
 
 
 { This function will initialize the Heap manager and need to be called from
 { This function will initialize the Heap manager and need to be called from
   the initialization of the system unit }
   the initialization of the system unit }
 procedure InitHeapThread;
 procedure InitHeapThread;
+{$if defined(SUPPORT_INIT_HEAP_PROCESS_WIDE) and defined(HAS_SYSOSFREE)}
+var
+  ts, next: HeapInc.pThreadState;
+{$endif SUPPORT_INIT_HEAP_PROCESS_WIDE and HAS_SYSOSFREE}
 begin
 begin
+{$ifdef SUPPORT_INIT_HEAP_PROCESS_WIDE}
   if (HeapInc.gs.lockUse = 0) and HeapInc.gs.askedForProcessWideLockInitialization then
   if (HeapInc.gs.lockUse = 0) and HeapInc.gs.askedForProcessWideLockInitialization then
-    HeapInc.gs.lockUse := HeapInc.gs.LockInitializedProcessWide
-  else if not ((HeapInc.gs.lockUse >= 0) and ({$ifdef VER3_2} InterlockedIncrement {$else} AtomicIncrement {$endif} (HeapInc.gs.lockUse) = 1)) then
-    exit;
-  InitCriticalSection(HeapInc.gs.lock);
+  begin
+    HeapInc.gs.lockUse := HeapInc.gs.LockInitializedProcessWide;
+    InitCriticalSection(HeapInc.gs.lock);
+  end else
+{$endif SUPPORT_INIT_HEAP_PROCESS_WIDE}
+  if (HeapInc.gs.lockUse >= 0) and ({$ifdef VER3_2} InterlockedIncrement {$else} AtomicIncrement {$endif} (HeapInc.gs.lockUse) = 1) then
+    InitCriticalSection(HeapInc.gs.lock);
+
+{$if defined(SUPPORT_INIT_HEAP_PROCESS_WIDE) and defined(HAS_SYSOSFREE)}
+  { Add to gs.threads. }
+  ts := @HeapInc.thisTs;
+  EnterCriticalSection(HeapInc.gs.lock);
+  next := HeapInc.gs.threads;
+  { Check if already in gs.threads; this function can in principle be called twice on the same thread (or at least I did call redundant InitThread for some time...). }
+  if not Assigned(ts^.prev) and (ts <> next) then
+  begin
+    ts^.next := next;
+    if Assigned(next) then
+      next^.prev := ts;
+    HeapInc.gs.threads := ts;
+  end;
+  LeaveCriticalSection(HeapInc.gs.lock);
+{$endif SUPPORT_INIT_HEAP_PROCESS_WIDE and HAS_SYSOSFREE}
 end;
 end;
 {$endif FPC_HAS_FEATURE_THREADING}
 {$endif FPC_HAS_FEATURE_THREADING}
 
 
@@ -1991,12 +2084,7 @@ begin
   if (HeapInc.gs.lockUse > 0) and ({$ifdef VER3_2} InterlockedDecrement {$else} AtomicDecrement {$endif} (HeapInc.gs.lockUse) = 0) then
   if (HeapInc.gs.lockUse > 0) and ({$ifdef VER3_2} InterlockedDecrement {$else} AtomicDecrement {$endif} (HeapInc.gs.lockUse) = 0) then
     DoneCriticalSection(HeapInc.gs.lock);
     DoneCriticalSection(HeapInc.gs.lock);
 {$elseif defined(HAS_SYSOSFREE)}
 {$elseif defined(HAS_SYSOSFREE)}
-  if Assigned(HeapInc.thisTs.freeOS1) then
-  begin
-    dec(HeapInc.thisTs.allocated, HeapInc.thisTs.freeOS1^.size); { Just in case... }
-    SysOSFree(HeapInc.thisTs.freeOS1, HeapInc.thisTs.freeOS1^.size);
-    HeapInc.thisTs.freeOS1 := nil; { Just in case... }
-  end;
+  HeapInc.thisTs.ReplaceFreeOS1(nil);
 {$endif FPC_HAS_FEATURE_THREADING | defined(HAS_SYSOSFREE)}
 {$endif FPC_HAS_FEATURE_THREADING | defined(HAS_SYSOSFREE)}
 end;
 end;
 
 

+ 8 - 1
rtl/inc/heaph.inc

@@ -100,8 +100,15 @@ function ReAllocMemory(p:pointer;Size:ptruint):pointer; cdecl;
 function GetHeapStatus:THeapStatus;
 function GetHeapStatus:THeapStatus;
 function GetFPCHeapStatus:TFPCHeapStatus;
 function GetFPCHeapStatus:TFPCHeapStatus;
 
 
+{ Support for DeferInitHeapProcessWide and DoneHeapProcessWide is not free, so they must be enabled explicitly by defining SUPPORT_INIT_HEAP_PROCESS_WIDE. }
 {$if defined(FPC_HAS_FEATURE_THREADING) and not defined(FPC_NO_DEFAULT_HEAP) and not defined(HAS_MEMORYMANAGER) and not defined(LEGACYHEAP)}
 {$if defined(FPC_HAS_FEATURE_THREADING) and not defined(FPC_NO_DEFAULT_HEAP) and not defined(HAS_MEMORYMANAGER) and not defined(LEGACYHEAP)}
+  {$if defined(WINDOWS)}
+    {$define SUPPORT_INIT_HEAP_PROCESS_WIDE}
+  {$endif need *HeapProcessWide (platforms)}
+{$endif need *HeapProcessWide (general considerations)}
+
+{$ifdef SUPPORT_INIT_HEAP_PROCESS_WIDE}
 procedure DeferInitHeapProcessWide; inline; { for Windows sysinit.pp... }
 procedure DeferInitHeapProcessWide; inline; { for Windows sysinit.pp... }
 procedure DoneHeapProcessWide; inline;
 procedure DoneHeapProcessWide; inline;
-{$endif FPC_HAS_FEATURE_THREADING and not defined(FPC_NO_DEFAULT_HEAP) and not defined(HAS_MEMORYMANAGER) and not defined(LEGACYHEAP)}
+{$endif SUPPORT_INIT_HEAP_PROCESS_WIDE}
 {$endif FPC_HAS_FEATURE_HEAP}
 {$endif FPC_HAS_FEATURE_HEAP}

+ 81 - 18
rtl/win/systhrd.inc

@@ -79,6 +79,17 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
                              Threadvar support
                              Threadvar support
 *****************************************************************************}
 *****************************************************************************}
 
 
+    type
+      { If IsLibrary, prepended to threadvar blocks allocated with HeapAlloc to forcefully free them in DLL_PROCESS_DETACH (SysFiniMultithreading). }
+      PThreadvarNode=^TThreadvarNode;
+      TThreadvarNode=record
+        prev, next : PThreadvarNode;
+      end;
+
+    const
+      { Align threadvars on 2 * sizeof(pointer)... HeapAlloc has this alignment, so no point in more. }
+      AlignedThreadvarNodeSize=(sizeof(TThreadVarNode)+2*sizeof(pointer)-1) and -(2*sizeof(pointer));
+
     var
     var
       // public names are used by heaptrc unit
       // public names are used by heaptrc unit
       threadvarblocksize : dword; public name '_FPC_TlsSize';
       threadvarblocksize : dword; public name '_FPC_TlsSize';
@@ -88,9 +99,13 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
       TLSKeyVar : DWord = $ffffffff;
       TLSKeyVar : DWord = $ffffffff;
       TLSKey : PDWord = @TLSKeyVar; public name '_FPC_TlsKey';
       TLSKey : PDWord = @TLSKeyVar; public name '_FPC_TlsKey';
       {$endif FPC_HAS_INDIRECT_ENTRY_INFORMATION}
       {$endif FPC_HAS_INDIRECT_ENTRY_INFORMATION}
+      DllProcessAttachPerformed : boolean;
 
 
-    var
-      MainThreadIdWin32 : DWORD;
+      { AllThreadvars* are used only when IsLibrary. (Careful!)
+        Could be used when not IsLibrary as a no-op to simplify the code,
+        but this would require publishing SysFiniMultithreading to be called from sysinit.pp:Exec_Tls_callback.DLL_PROCESS_DETACH. }
+      AllThreadvarsLock : TRTLCriticalSection;
+      AllThreadvars : PThreadvarNode;
 
 
     procedure SysInitThreadvar(var offset : dword;size : dword);
     procedure SysInitThreadvar(var offset : dword;size : dword);
       begin
       begin
@@ -105,8 +120,8 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
 
 
     procedure SysAllocateThreadVars; public name '_FPC_SysAllocateThreadVars';
     procedure SysAllocateThreadVars; public name '_FPC_SysAllocateThreadVars';
       var
       var
-        dataindex : pointer;
-        errorsave : dword;
+        tn,nx : PThreadvarNode;
+        prepend,errorsave : dword;
       begin
       begin
         { we've to allocate the memory from system  }
         { we've to allocate the memory from system  }
         { because the FPC heap management uses      }
         { because the FPC heap management uses      }
@@ -116,13 +131,25 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
         errorsave:=GetLastError;
         errorsave:=GetLastError;
         if tlskey^=$ffffffff then
         if tlskey^=$ffffffff then
           RunError(226);
           RunError(226);
-        dataindex:=TlsGetValue(tlskey^);
-        if dataindex=nil then
+        if TlsGetValue(tlskey^)=nil then
           begin
           begin
-            dataindex:=HeapAlloc(GetProcessHeap,HEAP_ZERO_MEMORY,threadvarblocksize);
-            if dataindex=nil then
+            prepend:=ord(IsLibrary)*AlignedThreadvarNodeSize;
+            tn:=HeapAlloc(GetProcessHeap,HEAP_ZERO_MEMORY,prepend+threadvarblocksize);
+            if tn=nil then
               RunError(226);
               RunError(226);
-            TlsSetValue(tlskey^,dataindex);
+            TlsSetValue(tlskey^,pointer(tn)+prepend);
+
+            if IsLibrary then
+            begin
+              { Add tn to AllThreadvars. }
+              WinEnterCriticalSection(AllThreadvarsLock);
+              nx:=AllThreadvars;
+              tn^.next:=nx; { “tn^.prev := nil” can be omitted thanks to HEAP_ZERO_MEMORY. }
+              if Assigned(nx) then
+                nx^.prev:=tn;
+              AllThreadvars:=tn;
+              WinLeaveCriticalSection(AllThreadvarsLock);
+            end;
           end;
           end;
         SetLastError(errorsave);
         SetLastError(errorsave);
       end;
       end;
@@ -137,6 +164,8 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
         if TLSKey^=$ffffffff then
         if TLSKey^=$ffffffff then
          begin
          begin
            { We're still running in single thread mode, setup the TLS }
            { We're still running in single thread mode, setup the TLS }
+           if IsLibrary then
+             WinInitCriticalSection(AllThreadvarsLock);
            TLSKey^:=TlsAlloc;
            TLSKey^:=TlsAlloc;
            InitThreadVars(@SysRelocateThreadvar);
            InitThreadVars(@SysRelocateThreadvar);
          end;
          end;
@@ -144,10 +173,28 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
 
 
 
 
     procedure SysFiniMultithreading;
     procedure SysFiniMultithreading;
+      var
+        tn,nx : PThreadvarNode;
       begin
       begin
-        if TLSKey^<>$ffffffff then
-          TlsFree(TLSKey^);
+        if TLSKey^=$ffffffff then
+          exit;
+        TlsFree(TLSKey^);
         TLSKey^:=$ffffffff;
         TLSKey^:=$ffffffff;
+
+        if IsLibrary then
+        begin
+          { Purge all remaining threadvars! System doesn’t preface DLL_PROCESS_DETACH by sending DLL_THREAD_DETACH to all threads,
+            so think of it as of emulating the threadvar destroying part of DLL_THREAD_DETACH for all threads that require it. }
+          tn:=AllThreadvars;
+          AllThreadvars:=nil;
+          while Assigned(tn) do
+          begin
+            nx:=tn^.next;
+            HeapFree(GetProcessHeap,0,tn);
+            tn:=nx;
+          end;
+          WinDoneCriticalSection(AllThreadvarsLock);
+        end;
       end;
       end;
 
 
 
 
@@ -257,14 +304,30 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
 
 
     procedure SysReleaseThreadVars;
     procedure SysReleaseThreadVars;
       var
       var
-        p: pointer;
+        tn,prev,next: PThreadvarNode;
       begin
       begin
-        if TLSKey^<>$ffffffff then
-          begin
-            p:=TlsGetValue(tlskey^);
-            HeapFree(GetProcessHeap,0,p); { HeapFree is OK with nil. }
-            TlsSetValue(tlskey^, nil);
-          end;
+        if TLSKey^=$ffffffff then
+          exit;
+        tn:=TlsGetValue(TLSKey^);
+        if tn=nil then
+          exit;
+        TlsSetValue(TLSKey^,nil);
+        if IsLibrary then
+        begin
+          dec(pointer(tn),AlignedThreadvarNodeSize);
+          { Remove tn from AllThreadvars. }
+          WinEnterCriticalSection(AllThreadvarsLock);
+          prev:=tn^.prev;
+          next:=tn^.next;
+          if Assigned(next) then
+            next^.prev:=prev;
+          if Assigned(prev) then
+            prev^.next:=next
+          else
+            AllThreadvars:=next;
+          WinLeaveCriticalSection(AllThreadvarsLock);
+        end;
+        HeapFree(GetProcessHeap,0,tn);
       end;
       end;
 
 
 
 

+ 6 - 6
rtl/win/syswin.inc

@@ -401,7 +401,7 @@ function Dll_entry{$ifdef FPC_HAS_INDIRECT_ENTRY_INFORMATION}(constref info : TE
      case DLLreason of
      case DLLreason of
        DLL_PROCESS_ATTACH :
        DLL_PROCESS_ATTACH :
          begin
          begin
-           MainThreadIdWin32 := Win32GetCurrentThreadId;
+           DllProcessAttachPerformed := true;
 
 
            If SetJmp(DLLBuf) = 0 then
            If SetJmp(DLLBuf) = 0 then
              begin
              begin
@@ -451,7 +451,7 @@ function Dll_entry{$ifdef FPC_HAS_INDIRECT_ENTRY_INFORMATION}(constref info : TE
          end;
          end;
        DLL_PROCESS_DETACH :
        DLL_PROCESS_DETACH :
          begin
          begin
-           if MainThreadIDWin32=0 then // already been here.
+           if not DllProcessAttachPerformed then // already been here.
              exit;
              exit;
            If SetJmp(DLLBuf) = 0 then
            If SetJmp(DLLBuf) = 0 then
              begin
              begin
@@ -460,13 +460,13 @@ function Dll_entry{$ifdef FPC_HAS_INDIRECT_ENTRY_INFORMATION}(constref info : TE
                InternalExit;
                InternalExit;
              end;
              end;
 
 
+{$ifndef LEGACYHEAP}
+           DoneHeapProcessWide; { Iterates alive “HeapInc.ThreadState”s so requires alive threadvars, careful. }
+{$endif ndef LEGACYHEAP}
            SysReleaseThreadVars;
            SysReleaseThreadVars;
            { Free TLS resources used by ThreadVars }
            { Free TLS resources used by ThreadVars }
            SysFiniMultiThreading;
            SysFiniMultiThreading;
-{$ifndef LEGACYHEAP}
-           DoneHeapProcessWide;
-{$endif ndef LEGACYHEAP}
-           MainThreadIDWin32:=0;
+           DllProcessAttachPerformed:=false;
          end;
          end;
      end;
      end;
      DllInitState:=-1;
      DllInitState:=-1;

+ 2 - 2
rtl/win32/system.pp

@@ -104,8 +104,8 @@ begin
       put down the entire process (DLL_PROCESS_DETACH will still
       put down the entire process (DLL_PROCESS_DETACH will still
       occur). At this point RTL has been already finalized in InternalExit
       occur). At this point RTL has been already finalized in InternalExit
       and shouldn't be finalized another time in DLL_PROCESS_DETACH.
       and shouldn't be finalized another time in DLL_PROCESS_DETACH.
-      Indicate this by resetting MainThreadIdWin32. }
-      MainThreadIDWin32:=0;
+      Indicate this by resetting DllProcessAttachPerformed. }
+      DllProcessAttachPerformed:=false;
   end;
   end;
   if not IsConsole then
   if not IsConsole then
    begin
    begin

+ 1 - 1
rtl/win64/system.pp

@@ -100,7 +100,7 @@ begin
     if DllInitState in [DLL_PROCESS_ATTACH,DLL_PROCESS_DETACH] then
     if DllInitState in [DLL_PROCESS_ATTACH,DLL_PROCESS_DETACH] then
       LongJmp(DLLBuf,1)
       LongJmp(DLLBuf,1)
     else
     else
-      MainThreadIDWin32:=0;
+      DllProcessAttachPerformed:=false;
   end;
   end;
   if not IsConsole then
   if not IsConsole then
    begin
    begin