|
@@ -150,7 +150,90 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
|
|
|
TLSKey^:=$ffffffff;
|
|
|
end;
|
|
|
|
|
|
- function SysRelocateThreadvar(offset : dword) : pointer;
|
|
|
+
|
|
|
+{ Directly access thread environment block (TEB). If there is a value, use it. If there is not, jump to TrulyRelocateThreadvar that can allocate it.
|
|
|
+ TrulyRelocateThreadvar is several (5+) times slower by itself; shortcutting SetLastError on errorsave = 0 helps a bit (reduces to 3.5× maybe :D).
|
|
|
+
|
|
|
+ General info (in particular, stories about de facto stability guarantees):
|
|
|
+ https://en.wikipedia.org/wiki/Win32_Thread_Information_Block
|
|
|
+
|
|
|
+ TEB layout:
|
|
|
+ https://github.com/wine-mirror/wine/blob/badaed641928edb8f2426d9f12d16c88b479e1e8/include/winternl.h#L431
|
|
|
+
|
|
|
+ “Why load fs:[0x18] into a register and then dereference that, instead of just going for fs:[n] directly?”
|
|
|
+ https://devblogs.microsoft.com/oldnewthing/20220919-00/?p=107195
|
|
|
+ TL;DR: even in Windows sources, TlsGetValue is written in relatively high-level manner and not overly optimized. }
|
|
|
+
|
|
|
+{$ifndef wince} { Don’t know a thing, maybe WinCE TEB is compatible... :D https://stackoverflow.com/questions/1099311/windows-ce-internals-teb-thread-environment-block }
|
|
|
+{$if defined(cpui386)}
|
|
|
+ function TrulyRelocateThreadvar(offset : dword) : pointer; forward;
|
|
|
+
|
|
|
+ function SysRelocateThreadvar(offset : dword) : pointer; assembler; nostackframe;
|
|
|
+ { eax = offset }
|
|
|
+ const
|
|
|
+ TlsSlots = $E10; { void* TlsSlots[64] @ fs:[E10h]. }
|
|
|
+ TlsExpansionSlots = $F94; { void** TlsExpansionSlots @ fs:[F94h] }
|
|
|
+ asm
|
|
|
+ mov TLSKey, %edx
|
|
|
+ mov (%edx), %edx { edx = TLSKey^. }
|
|
|
+
|
|
|
+ cmp $0x40, %edx { There are 64 static slots + 1024 dynamic slots. }
|
|
|
+ jae .LExp
|
|
|
+ mov %fs:TlsSlots(,%edx,4), %edx { Read TLSKey^-th slot. }
|
|
|
+ test %edx, %edx
|
|
|
+ jz .LOops
|
|
|
+ add %edx, %eax { result := TlsGetValue(TLSKey^) + offset. }
|
|
|
+ ret
|
|
|
+
|
|
|
+.LOops: jmp TrulyRelocateThreadvar { Save on relative jumps :) }
|
|
|
+
|
|
|
+.LExp: cmp $0x440, %edx
|
|
|
+ jae .LOops { Will fail as 0x440 = 1088 = 64 static + 1024 dynamic is the limit on TLS indices. }
|
|
|
+ mov %fs:TlsExpansionSlots, %ecx { ecx = TlsExpansionSlots. }
|
|
|
+ test %ecx, %ecx
|
|
|
+ jz .LOops { No TlsExpansionSlots allocated. }
|
|
|
+ mov -0x100(%ecx,%edx,4), %edx { Read (TLSKey^ − 64)-th dynamic slot. }
|
|
|
+ test %edx, %edx
|
|
|
+ jz .LOops
|
|
|
+ add %edx, %eax { result := TlsGetValue(TLSKey^) + offset. }
|
|
|
+ end;
|
|
|
+{$elseif defined(cpux86_64)}
|
|
|
+ function TrulyRelocateThreadvar(offset : dword) : pointer; forward;
|
|
|
+
|
|
|
+ function SysRelocateThreadvar(offset : dword) : pointer; assembler; nostackframe;
|
|
|
+ { ecx = offset }
|
|
|
+ const { Same as above but 64-bit: TEB pointer is in GS register, different offsets. }
|
|
|
+ TlsSlots = $1480;
|
|
|
+ TlsExpansionSlots = $1780;
|
|
|
+ asm
|
|
|
+ mov TLSKey(%rip), %rdx
|
|
|
+ mov (%rdx), %edx { edx = TLSKey^. }
|
|
|
+
|
|
|
+ cmp $0x40, %edx
|
|
|
+ jae .LExp
|
|
|
+ mov %gs:TlsSlots(,%rdx,8), %rax
|
|
|
+ test %rax, %rax
|
|
|
+ jz .LOops
|
|
|
+ add %rcx, %rax { Hopefully offset is zero-extended on entry. }
|
|
|
+ ret
|
|
|
+
|
|
|
+.LOops: jmp TrulyRelocateThreadvar
|
|
|
+
|
|
|
+.LExp: cmp $0x440, %edx
|
|
|
+ jae .LOops
|
|
|
+ mov %gs:TlsExpansionSlots, %rax
|
|
|
+ test %rax, %rax
|
|
|
+ jz .LOops
|
|
|
+ mov -0x200(%rax,%rdx,8), %rax
|
|
|
+ test %rax, %rax
|
|
|
+ jz .LOops
|
|
|
+ add %rcx, %rax
|
|
|
+ end;
|
|
|
+{$endif implement SysRelocateThreadvar with assembly}
|
|
|
+{$endif not wince}
|
|
|
+
|
|
|
+
|
|
|
+ function {$if declared(SysRelocateThreadvar)} TrulyRelocateThreadvar {$else} SysRelocateThreadvar {$endif} (offset : dword) : pointer;
|
|
|
var
|
|
|
dataindex : pointer;
|
|
|
errorsave : dword;
|
|
@@ -164,7 +247,7 @@ function WinTryEnterCriticalSection(var cs : TRTLCriticalSection):longint;
|
|
|
InitThread($1000000);
|
|
|
end;
|
|
|
SetLastError(errorsave);
|
|
|
- SysRelocateThreadvar:=DataIndex+Offset;
|
|
|
+ Result:=DataIndex+Offset;
|
|
|
end;
|
|
|
|
|
|
|