2
0
Эх сурвалжийг харах

Update to LZMA SDK 25.00.

Martijn Laan 6 өдөр өмнө
parent
commit
3def5d985b

+ 3 - 2
Components/Lzma2/7zDec.c

@@ -1,5 +1,5 @@
 /* 7zDec.c -- Decoding from 7z folder
-2024-03-01 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -312,8 +312,9 @@ static BoolInt IS_MAIN_METHOD(UInt32 m)
     case k_PPMD:
   #endif
       return True;
+    default:
+      return False;
   }
-  return False;
 }
 
 static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)

+ 5 - 5
Components/Lzma2/7zVersion.h

@@ -1,7 +1,7 @@
-#define MY_VER_MAJOR 24
-#define MY_VER_MINOR 8
+#define MY_VER_MAJOR 25
+#define MY_VER_MINOR 0
 #define MY_VER_BUILD 0
-#define MY_VERSION_NUMBERS "24.08"
+#define MY_VERSION_NUMBERS "25.00"
 #define MY_VERSION MY_VERSION_NUMBERS
 
 #ifdef MY_CPU_NAME
@@ -10,12 +10,12 @@
   #define MY_VERSION_CPU MY_VERSION
 #endif
 
-#define MY_DATE "2024-08-11"
+#define MY_DATE "2025-07-05"
 #undef MY_COPYRIGHT
 #undef MY_VERSION_COPYRIGHT_DATE
 #define MY_AUTHOR_NAME "Igor Pavlov"
 #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
-#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
+#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov"
 
 #ifdef USE_COPYRIGHT_CR
   #define MY_COPYRIGHT MY_COPYRIGHT_CR

+ 11 - 1
Components/Lzma2/Compiler.h

@@ -1,5 +1,5 @@
 /* Compiler.h : Compiler specific defines and pragmas
-2024-01-22 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_COMPILER_H
 #define ZIP7_INC_COMPILER_H
@@ -183,6 +183,16 @@ typedef void (*Z7_void_Function)(void);
   #define Z7_ATTRIB_NO_VECTORIZE
 #endif
 
+#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920)
+  #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )")
+  #define Z7_PRAGMA_OPTIMIZE_DEFAULT       _Pragma("optimize ( \"\", on )")
+#else
+  #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE
+  #define Z7_PRAGMA_OPTIMIZE_DEFAULT
+#endif
+
+
+
 #if defined(MY_CPU_X86_OR_AMD64) && ( \
        defined(__clang__) && (__clang_major__ >= 4) \
     || defined(__GNUC__) && (__GNUC__ >= 5))

+ 95 - 14
Components/Lzma2/CpuArch.c

@@ -1,5 +1,5 @@
 /* CpuArch.c -- CPU specific code
-2024-07-04 : Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -17,7 +17,7 @@
 /*
   cpuid instruction supports (subFunction) parameter in ECX,
   that is used only with some specific (function) parameter values.
-  But we always use only (subFunction==0).
+  most functions use only (subFunction==0).
 */
 /*
   __cpuid(): MSVC and GCC/CLANG use same function/macro name
@@ -49,43 +49,49 @@
 #if defined(MY_CPU_AMD64) && defined(__PIC__) \
     && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
 
-#define x86_cpuid_MACRO(p, func) { \
+  /* "=&r" selects free register. It can select even rbx, if that register is free.
+     "=&D" for (RDI) also works, but the code can be larger with "=&D"
+     "2"(subFun) : 2 is (zero-based) index in the output constraint list "=c" (ECX). */
+
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
   __asm__ __volatile__ ( \
     ASM_LN   "mov     %%rbx, %q1"  \
     ASM_LN   "cpuid"               \
     ASM_LN   "xchg    %%rbx, %q1"  \
-    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
-
-  /* "=&r" selects free register. It can select even rbx, if that register is free.
-     "=&D" for (RDI) also works, but the code can be larger with "=&D"
-     "2"(0) means (subFunction = 0),
-     2 is (zero-based) index in the output constraint list "=c" (ECX). */
+    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
 
 #elif defined(MY_CPU_X86) && defined(__PIC__) \
     && ((defined (__GNUC__) && (__GNUC__ < 5)) || defined(__clang__))
 
-#define x86_cpuid_MACRO(p, func) { \
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
   __asm__ __volatile__ ( \
     ASM_LN   "mov     %%ebx, %k1"  \
     ASM_LN   "cpuid"               \
     ASM_LN   "xchg    %%ebx, %k1"  \
-    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+    : "=a" ((p)[0]), "=&r" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
 
 #else
 
-#define x86_cpuid_MACRO(p, func) { \
+#define x86_cpuid_MACRO_2(p, func, subFunc) { \
   __asm__ __volatile__ ( \
     ASM_LN   "cpuid"               \
-    : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(0)); }
+    : "=a" ((p)[0]), "=b" ((p)[1]), "=c" ((p)[2]), "=d" ((p)[3]) : "0" (func), "2"(subFunc)); }
 
 #endif
 
+#define x86_cpuid_MACRO(p, func)  x86_cpuid_MACRO_2(p, func, 0)
 
 void Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
 {
   x86_cpuid_MACRO(p, func)
 }
 
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  x86_cpuid_MACRO_2(p, func, subFunc)
+}
+
 
 Z7_NO_INLINE
 UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void)
@@ -205,11 +211,39 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
   __asm   ret     0
 }
 
+static
+void __declspec(naked) Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  UNUSED_VAR(p)
+  UNUSED_VAR(func)
+  UNUSED_VAR(subFunc)
+  __asm   push    ebx
+  __asm   push    edi
+  __asm   mov     edi, ecx    // p
+  __asm   mov     eax, edx    // func
+  __asm   mov     ecx, [esp + 12]  // subFunc
+  __asm   cpuid
+  __asm   mov     [edi     ], eax
+  __asm   mov     [edi +  4], ebx
+  __asm   mov     [edi +  8], ecx
+  __asm   mov     [edi + 12], edx
+  __asm   pop     edi
+  __asm   pop     ebx
+  __asm   ret     4
+}
+
 #else // MY_CPU_AMD64
 
     #if _MSC_VER >= 1600
       #include <intrin.h>
       #define MY_cpuidex  __cpuidex
+
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  __cpuidex((int *)p, func, subFunc);
+}
+
     #else
 /*
  __cpuid (func == (0 or 7)) requires subfunction number in ECX.
@@ -219,7 +253,7 @@ void __declspec(naked) Z7_FASTCALL z7_x86_cpuid(UInt32 p[4], UInt32 func)
  We still can use __cpuid for low (func) values that don't require ECX,
  but __cpuid() in old MSVC will be incorrect for some func values: (func == 7).
  So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
- where ECX value is first parameter for FASTCALL / NO_INLINE func,
+ where ECX value is first parameter for FASTCALL / NO_INLINE func.
  So the caller of MY_cpuidex_HACK() sets ECX as subFunction, and
  old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
  
@@ -233,6 +267,11 @@ Z7_NO_INLINE void Z7_FASTCALL MY_cpuidex_HACK(Int32 subFunction, Int32 func, Int
 }
       #define MY_cpuidex(info, func, func2)  MY_cpuidex_HACK(func2, func, info)
       #pragma message("======== MY_cpuidex_HACK WAS USED ========")
+static
+void Z7_FASTCALL z7_x86_cpuid_subFunc(UInt32 p[4], UInt32 func, UInt32 subFunc)
+{
+  MY_cpuidex_HACK(subFunc, func, (Int32 *)p);
+}
     #endif // _MSC_VER >= 1600
 
 #if !defined(MY_CPU_AMD64)
@@ -445,6 +484,23 @@ BoolInt CPU_IsSupported_SHA(void)
   }
 }
 
+
+BoolInt CPU_IsSupported_SHA512(void)
+{
+  if (!CPU_IsSupported_AVX2()) return False; // maybe CPU_IsSupported_AVX() is enough here
+
+  if (z7_x86_cpuid_GetMaxFunc() < 7)
+    return False;
+  {
+    UInt32 d[4];
+    z7_x86_cpuid_subFunc(d, 7, 0);
+    if (d[0] < 1) // d[0] - is max supported subleaf value
+      return False;
+    z7_x86_cpuid_subFunc(d, 7, 1);
+    return (BoolInt)(d[0]) & 1;
+  }
+}
+
 /*
 MSVC: _xgetbv() intrinsic is available since VS2010SP1.
    MSVC also defines (_XCR_XFEATURE_ENABLED_MASK) macro in
@@ -776,6 +832,18 @@ BoolInt CPU_IsSupported_NEON(void)
   return z7_sysctlbyname_Get_BoolInt("hw.optional.neon");
 }
 
+BoolInt CPU_IsSupported_SHA512(void)
+{
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha512");
+}
+
+/*
+BoolInt CPU_IsSupported_SHA3(void)
+{
+  return z7_sysctlbyname_Get_BoolInt("hw.optional.armv8_2_sha3");
+}
+*/
+
 #ifdef MY_CPU_ARM64
 #define APPLE_CRYPTO_SUPPORT_VAL 1
 #else
@@ -860,6 +928,19 @@ MY_HWCAP_CHECK_FUNC (CRC32)
 MY_HWCAP_CHECK_FUNC (SHA1)
 MY_HWCAP_CHECK_FUNC (SHA2)
 MY_HWCAP_CHECK_FUNC (AES)
+#ifdef MY_CPU_ARM64
+// <hwcap.h> supports HWCAP_SHA512 and HWCAP_SHA3 since 2017.
+// we define them here, if they are not defined
+#ifndef HWCAP_SHA3
+// #define HWCAP_SHA3    (1 << 17)
+#endif
+#ifndef HWCAP_SHA512
+// #pragma message("=== HWCAP_SHA512 define === ")
+#define HWCAP_SHA512  (1 << 21)
+#endif
+MY_HWCAP_CHECK_FUNC (SHA512)
+// MY_HWCAP_CHECK_FUNC (SHA3)
+#endif
 
 #endif // __APPLE__
 #endif // _WIN32

+ 40 - 1
Components/Lzma2/CpuArch.h

@@ -1,5 +1,5 @@
 /* CpuArch.h -- CPU specific code
-2024-06-17 : Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_CPU_ARCH_H
 #define ZIP7_INC_CPU_ARCH_H
@@ -47,6 +47,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
   #define MY_CPU_SIZEOF_POINTER 4
 #endif
 
+#if defined(__SSE2__) \
+    || defined(MY_CPU_AMD64) \
+    || defined(_M_IX86_FP) && (_M_IX86_FP >= 2)
+#define MY_CPU_SSE2
+#endif
+
 
 #if  defined(_M_ARM64) \
   || defined(_M_ARM64EC) \
@@ -509,11 +515,19 @@ problem-4 : performace:
 
 #if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
 
+#if 0
+// Z7_BSWAP16 can be slow for x86-msvc
+#define GetBe16_to32(p)  (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
+#else
+#define GetBe16_to32(p)  (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
+#endif
+
 #define GetBe32(p)  Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
 #define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
 
 #if defined(MY_CPU_LE_UNALIGN_64)
 #define GetBe64(p)  Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
+#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
 #endif
 
 #else
@@ -536,21 +550,39 @@ problem-4 : performace:
 #define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
 #endif
 
+#ifndef SetBe64
+#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
+    _ppp_[0] = (Byte)(_vvv_ >> 56); \
+    _ppp_[1] = (Byte)(_vvv_ >> 48); \
+    _ppp_[2] = (Byte)(_vvv_ >> 40); \
+    _ppp_[3] = (Byte)(_vvv_ >> 32); \
+    _ppp_[4] = (Byte)(_vvv_ >> 24); \
+    _ppp_[5] = (Byte)(_vvv_ >> 16); \
+    _ppp_[6] = (Byte)(_vvv_ >> 8); \
+    _ppp_[7] = (Byte)_vvv_; }
+#endif
+
 #ifndef GetBe16
+#ifdef GetBe16_to32
+#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
+#else
 #define GetBe16(p) ( (UInt16) ( \
     ((UInt16)((const Byte *)(p))[0] << 8) | \
              ((const Byte *)(p))[1] ))
 #endif
+#endif
 
 
 #if defined(MY_CPU_BE)
 #define Z7_CONV_BE_TO_NATIVE_CONST32(v)  (v)
 #define Z7_CONV_LE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
 #define Z7_CONV_NATIVE_TO_BE_32(v)       (v)
+// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1)  ((b1) | ((b0) << 8))
 #elif defined(MY_CPU_LE)
 #define Z7_CONV_BE_TO_NATIVE_CONST32(v)  Z7_BSWAP32_CONST(v)
 #define Z7_CONV_LE_TO_NATIVE_CONST32(v)  (v)
 #define Z7_CONV_NATIVE_TO_BE_32(v)       Z7_BSWAP32(v)
+// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1)  ((b0) | ((b1) << 8))
 #else
 #error Stop_Compiling_Unknown_Endian_CONV
 #endif
@@ -589,6 +621,11 @@ problem-4 : performace:
 #endif
 
 
+#ifndef GetBe16_to32
+#define GetBe16_to32(p) GetBe16(p)
+#endif
+
+
 #if defined(MY_CPU_X86_OR_AMD64) \
   || defined(MY_CPU_ARM_OR_ARM64) \
   || defined(MY_CPU_PPC_OR_PPC64)
@@ -617,6 +654,7 @@ BoolInt CPU_IsSupported_SSE2(void);
 BoolInt CPU_IsSupported_SSSE3(void);
 BoolInt CPU_IsSupported_SSE41(void);
 BoolInt CPU_IsSupported_SHA(void);
+BoolInt CPU_IsSupported_SHA512(void);
 BoolInt CPU_IsSupported_PageGB(void);
 
 #elif defined(MY_CPU_ARM_OR_ARM64)
@@ -634,6 +672,7 @@ BoolInt CPU_IsSupported_SHA1(void);
 BoolInt CPU_IsSupported_SHA2(void);
 BoolInt CPU_IsSupported_AES(void);
 #endif
+BoolInt CPU_IsSupported_SHA512(void);
 
 #endif
 

+ 12 - 12
Components/Lzma2/LzFind.c

@@ -1,5 +1,5 @@
 /* LzFind.c -- Match finder for LZ algorithms
-2024-03-01 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -404,7 +404,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
         const unsigned nbMax =
             (p->numHashBytes == 2 ? 16 :
             (p->numHashBytes == 3 ? 24 : 32));
-        if (numBits > nbMax)
+        if (numBits >= nbMax)
           numBits = nbMax;
         if (numBits >= 32)
           hs = (UInt32)0 - 1;
@@ -416,14 +416,14 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
           hs |= (256 << kLzHash_CrcShift_2) - 1;
         {
           const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
-          if (hs > hs2)
+          if (hs >= hs2)
             hs = hs2;
         }
         hsCur = hs;
         if (p->expectedDataSize < historySize)
         {
           const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
-          if (hsCur > hs2)
+          if (hsCur >= hs2)
             hsCur = hs2;
         }
       }
@@ -434,7 +434,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
         if (p->expectedDataSize < historySize)
         {
           hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
-          if (hsCur > hs) // is it possible?
+          if (hsCur >= hs) // is it possible?
             hsCur = hs;
         }
       }
@@ -890,7 +890,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
       return d;
     {
       const Byte *pb = cur - delta;
-      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
       if (pb[maxLen] == cur[maxLen] && *pb == *cur)
       {
         UInt32 len = 0;
@@ -925,7 +925,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
       break;
     {
       ptrdiff_t diff;
-      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
       diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
       if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
       {
@@ -972,7 +972,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
   // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
 
   cmCheck = (UInt32)(pos - _cyclicBufferSize);
-  if ((UInt32)pos <= _cyclicBufferSize)
+  if ((UInt32)pos < _cyclicBufferSize)
     cmCheck = 0;
 
   if (cmCheck < curMatch)
@@ -980,7 +980,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
   {
     const UInt32 delta = pos - curMatch;
     {
-      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
       const Byte *pb = cur - delta;
       unsigned len = (len0 < len1 ? len0 : len1);
       const UInt32 pair0 = pair[0];
@@ -1039,7 +1039,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
   UInt32 cmCheck;
 
   cmCheck = (UInt32)(pos - _cyclicBufferSize);
-  if ((UInt32)pos <= _cyclicBufferSize)
+  if ((UInt32)pos < _cyclicBufferSize)
     cmCheck = 0;
 
   if (// curMatch >= pos ||  // failure
@@ -1048,7 +1048,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
   {
     const UInt32 delta = pos - curMatch;
     {
-      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
       const Byte *pb = cur - delta;
       unsigned len = (len0 < len1 ? len0 : len1);
       if (pb[len] == cur[len])
@@ -1595,7 +1595,7 @@ static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
     UInt32 pos = p->pos; \
     UInt32 num2 = num; \
     /* (p->pos == p->posLimit) is not allowed here !!! */ \
-    { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
+    { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \
     num -= num2; \
     { const UInt32 cycPos = p->cyclicBufferPos; \
       son = p->son + cycPos; \

+ 9 - 1
Components/Lzma2/LzFindMt.c

@@ -1,5 +1,5 @@
 /* LzFindMt.c -- multithreaded Match finder for LZ algorithms
-2024-01-22 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -82,6 +82,8 @@ extern UInt64 g_NumIters_Bytes;
 Z7_NO_INLINE
 static void MtSync_Construct(CMtSync *p)
 {
+  p->affinityGroup = -1;
+  p->affinityInGroup = 0;
   p->affinity = 0;
   p->wasCreated = False;
   p->csWasInitialized = False;
@@ -259,6 +261,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
   // return ERROR_TOO_MANY_POSTS; // for debug
   // return EINVAL; // for debug
 
+#ifdef _WIN32
+  if (p->affinityGroup >= 0)
+    wres = Thread_Create_With_Group(&p->thread, startAddress, obj,
+        (unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup);
+  else
+#endif
   if (p->affinity != 0)
     wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
   else

+ 4 - 2
Components/Lzma2/LzFindMt.h

@@ -1,5 +1,5 @@
 /* LzFindMt.h -- multithreaded Match finder for LZ algorithms
-2024-01-22 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_LZ_FIND_MT_H
 #define ZIP7_INC_LZ_FIND_MT_H
@@ -12,8 +12,10 @@ EXTERN_C_BEGIN
 typedef struct
 {
   UInt32 numProcessedBlocks;
-  CThread thread;
+  Int32 affinityGroup;
+  UInt64 affinityInGroup;
   UInt64 affinity;
+  CThread thread;
 
   BoolInt wasCreated;
   BoolInt needStart;

+ 3 - 1
Components/Lzma2/Lzma2Enc.c

@@ -1,5 +1,5 @@
 /* Lzma2Enc.c -- LZMA2 Encoder
-2023-04-13 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -235,6 +235,7 @@ void Lzma2EncProps_Init(CLzma2EncProps *p)
   p->numBlockThreads_Reduced = -1;
   p->numBlockThreads_Max = -1;
   p->numTotalThreads = -1;
+  p->numThreadGroups = 0;
 }
 
 void Lzma2EncProps_Normalize(CLzma2EncProps *p)
@@ -781,6 +782,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
     }
 
     p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
+    p->mtCoder.numThreadGroups = p->props.numThreadGroups;
     p->mtCoder.expectedDataSize = p->expectedDataSize;
     
     {

+ 1 - 0
Components/Lzma2/Lzma2Enc.h

@@ -18,6 +18,7 @@ typedef struct
   int numBlockThreads_Reduced;
   int numBlockThreads_Max;
   int numTotalThreads;
+  unsigned numThreadGroups; // 0 : no groups
 } CLzma2EncProps;
 
 void Lzma2EncProps_Init(CLzma2EncProps *p);

+ 14 - 8
Components/Lzma2/LzmaEnc.c

@@ -1,5 +1,5 @@
 /* LzmaEnc.c -- LZMA Encoder
-2024-01-24: Igor Pavlov : Public domain */
+Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -62,7 +62,9 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
   p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
   p->numHashOutBits = 0;
   p->writeEndMark = 0;
+  p->affinityGroup = -1;
   p->affinity = 0;
+  p->affinityInGroup = 0;
 }
 
 void LzmaEncProps_Normalize(CLzmaEncProps *p)
@@ -72,11 +74,11 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   p->level = level;
   
   if (p->dictSize == 0)
-    p->dictSize =
-      ( level <= 3 ? ((UInt32)1 << (level * 2 + 16)) :
-      ( level <= 6 ? ((UInt32)1 << (level + 19)) :
-      ( level <= 7 ? ((UInt32)1 << 25) : ((UInt32)1 << 26)
-      )));
+    p->dictSize = (unsigned)level <= 4 ?
+        (UInt32)1 << (level * 2 + 16) :
+        (unsigned)level <= sizeof(size_t) / 2 + 4 ?
+          (UInt32)1 << (level + 20) :
+          (UInt32)1 << (sizeof(size_t) / 2 + 24);
 
   if (p->dictSize > p->reduceSize)
   {
@@ -92,8 +94,8 @@ void LzmaEncProps_Normalize(CLzmaEncProps *p)
   if (p->lp < 0) p->lp = 0;
   if (p->pb < 0) p->pb = 2;
 
-  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
-  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->algo < 0) p->algo = (unsigned)level < 5 ? 0 : 1;
+  if (p->fb < 0) p->fb = (unsigned)level < 7 ? 32 : 64;
   if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
   if (p->numHashBytes < 0) p->numHashBytes = (p->btMode ? 4 : 5);
   if (p->mc == 0) p->mc = (16 + ((unsigned)p->fb >> 1)) >> (p->btMode ? 0 : 1);
@@ -598,6 +600,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
   p->multiThread = (props.numThreads > 1);
   p->matchFinderMt.btSync.affinity =
   p->matchFinderMt.hashSync.affinity = props.affinity;
+  p->matchFinderMt.btSync.affinityGroup =
+  p->matchFinderMt.hashSync.affinityGroup = props.affinityGroup;
+  p->matchFinderMt.btSync.affinityInGroup =
+  p->matchFinderMt.hashSync.affinityInGroup = props.affinityInGroup;
   #endif
 
   return SZ_OK;

+ 3 - 1
Components/Lzma2/LzmaEnc.h

@@ -1,5 +1,5 @@
 /*  LzmaEnc.h -- LZMA Encoder
-2023-04-13 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_LZMA_ENC_H
 #define ZIP7_INC_LZMA_ENC_H
@@ -29,11 +29,13 @@ typedef struct
   int numThreads;  /* 1 or 2, default = 2 */
 
   // int _pad;
+  Int32 affinityGroup;
 
   UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
                         Encoder uses this value to reduce dictionary size */
 
   UInt64 affinity;
+  UInt64 affinityInGroup;
 } CLzmaEncProps;
 
 void LzmaEncProps_Init(CLzmaEncProps *p);

+ 47 - 14
Components/Lzma2/MtCoder.c

@@ -1,5 +1,5 @@
 /* MtCoder.c -- Multi-thread Coder
-2023-09-07 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -39,14 +39,28 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
 static THREAD_FUNC_DECL ThreadFunc(void *pp);
 
 
-static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
+static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t
+#ifdef _WIN32
+    , CMtCoder * const mtc
+#endif
+    )
 {
   WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent);
+  // printf("\n====== MtCoderThread_CreateAndStart : \n");
   if (wres == 0)
   {
     t->stop = False;
     if (!Thread_WasCreated(&t->thread))
-      wres = Thread_Create(&t->thread, ThreadFunc, t);
+    {
+#ifdef _WIN32
+      if (mtc->numThreadGroups)
+        wres = Thread_Create_With_Group(&t->thread, ThreadFunc, t,
+            ThreadNextGroup_GetNext(&mtc->nextGroup), // group
+            0); // affinityMask
+      else
+#endif
+        wres = Thread_Create(&t->thread, ThreadFunc, t);
+    }
     if (wres == 0)
       wres = Event_Set(&t->startEvent);
   }
@@ -56,6 +70,7 @@ static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
 }
 
 
+Z7_FORCE_INLINE
 static void MtCoderThread_Destruct(CMtCoderThread *t)
 {
   if (Thread_WasCreated(&t->thread))
@@ -85,7 +100,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
 
 static SRes ThreadFunc2(CMtCoderThread *t)
 {
-  CMtCoder *mtc = t->mtCoder;
+  CMtCoder * const mtc = t->mtCoder;
 
   for (;;)
   {
@@ -185,7 +200,11 @@ static SRes ThreadFunc2(CMtCoderThread *t)
       if (mtc->numStartedThreads < mtc->numStartedThreadsLimit
           && mtc->expectedDataSize != readProcessed)
       {
-        res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);
+        res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]
+#ifdef _WIN32
+            , mtc
+#endif
+            );
         if (res == SZ_OK)
           mtc->numStartedThreads++;
         else
@@ -221,7 +240,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
     }
 
     {
-      CMtCoderBlock *block = &mtc->blocks[bi];
+      CMtCoderBlock * const block = &mtc->blocks[bi];
       block->res = res;
       block->bufIndex = bufIndex;
       block->finished = finished;
@@ -311,7 +330,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
 
 static THREAD_FUNC_DECL ThreadFunc(void *pp)
 {
-  CMtCoderThread *t = (CMtCoderThread *)pp;
+  CMtCoderThread * const t = (CMtCoderThread *)pp;
   for (;;)
   {
     if (Event_Wait(&t->startEvent) != 0)
@@ -319,7 +338,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
     if (t->stop)
       return 0;
     {
-      SRes res = ThreadFunc2(t);
+      const SRes res = ThreadFunc2(t);
       CMtCoder *mtc = t->mtCoder;
       if (res != SZ_OK)
       {
@@ -328,7 +347,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
       
       #ifndef MTCODER_USE_WRITE_THREAD
       {
-        unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
+        const unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
         if (numFinished == mtc->numStartedThreads)
           if (Event_Set(&mtc->finishedEvent) != 0)
             return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
@@ -346,6 +365,7 @@ void MtCoder_Construct(CMtCoder *p)
   
   p->blockSize = 0;
   p->numThreadsMax = 0;
+  p->numThreadGroups = 0;
   p->expectedDataSize = (UInt64)(Int64)-1;
 
   p->inStream = NULL;
@@ -429,6 +449,8 @@ SRes MtCoder_Code(CMtCoder *p)
   unsigned i;
   SRes res = SZ_OK;
 
+  // printf("\n====== MtCoder_Code : \n");
+
   if (numThreads > MTCODER_THREADS_MAX)
       numThreads = MTCODER_THREADS_MAX;
   numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads);
@@ -492,11 +514,22 @@ SRes MtCoder_Code(CMtCoder *p)
 
   p->numStartedThreadsLimit = numThreads;
   p->numStartedThreads = 0;
+  ThreadNextGroup_Init(&p->nextGroup, p->numThreadGroups, 0); // startGroup
 
   // for (i = 0; i < numThreads; i++)
   {
+    // here we create new thread for first block.
+    // And each new thread will create another new thread after block reading
+    // until numStartedThreadsLimit is reached.
     CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
-    RINOK(MtCoderThread_CreateAndStart(nextThread))
+    {
+      const SRes res2 = MtCoderThread_CreateAndStart(nextThread
+#ifdef _WIN32
+            , p
+#endif
+            );
+      RINOK(res2)
+    }
   }
 
   RINOK_THREAD(Event_Set(&p->readEvent))
@@ -513,9 +546,9 @@ SRes MtCoder_Code(CMtCoder *p)
       RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))
 
       {
-        const CMtCoderBlock *block = &p->blocks[bi];
-        unsigned bufIndex = block->bufIndex;
-        BoolInt finished = block->finished;
+        const CMtCoderBlock * const block = &p->blocks[bi];
+        const unsigned bufIndex = block->bufIndex;
+        const BoolInt finished = block->finished;
         if (res == SZ_OK && block->res != SZ_OK)
           res = block->res;
 
@@ -545,7 +578,7 @@ SRes MtCoder_Code(CMtCoder *p)
   }
   #else
   {
-    WRes wres = Event_Wait(&p->finishedEvent);
+    const WRes wres = Event_Wait(&p->finishedEvent);
     res = MY_SRes_HRESULT_FROM_WRes(wres);
   }
   #endif

+ 5 - 2
Components/Lzma2/MtCoder.h

@@ -1,5 +1,5 @@
 /* MtCoder.h -- Multi-thread Coder
-2023-04-13 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_MT_CODER_H
 #define ZIP7_INC_MT_CODER_H
@@ -16,7 +16,7 @@ EXTERN_C_BEGIN
 
 #ifndef Z7_ST
   #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
-  #define MTCODER_THREADS_MAX 64
+  #define MTCODER_THREADS_MAX 256
   #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3)
 #else
   #define MTCODER_THREADS_MAX 1
@@ -77,6 +77,7 @@ typedef struct CMtCoder_
   
   size_t blockSize;        /* size of input block */
   unsigned numThreadsMax;
+  unsigned numThreadGroups;
   UInt64 expectedDataSize;
 
   ISeqInStreamPtr inStream;
@@ -125,6 +126,8 @@ typedef struct CMtCoder_
   CMtProgress mtProgress;
   CMtCoderBlock blocks[MTCODER_BLOCKS_MAX];
   CMtCoderThread threads[MTCODER_THREADS_MAX];
+
+  CThreadNextGroup nextGroup;
 } CMtCoder;
 
 

+ 234 - 3
Components/Lzma2/Threads.c

@@ -1,5 +1,5 @@
 /* Threads.c -- multithreading library
-2024-03-28 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #include "Precomp.h"
 
@@ -59,6 +59,100 @@ WRes Thread_Wait_Close(CThread *p)
   return (res != 0 ? res : res2);
 }
 
+typedef struct MY_PROCESSOR_NUMBER {
+    WORD  Group;
+    BYTE  Number;
+    BYTE  Reserved;
+} MY_PROCESSOR_NUMBER, *MY_PPROCESSOR_NUMBER;
+
+typedef struct MY_GROUP_AFFINITY {
+#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 100000)
+    // KAFFINITY is not defined in old mingw
+    ULONG_PTR
+#else
+    KAFFINITY
+#endif
+      Mask;
+    WORD   Group;
+    WORD   Reserved[3];
+} MY_GROUP_AFFINITY, *MY_PGROUP_AFFINITY;
+
+typedef BOOL (WINAPI *Func_SetThreadGroupAffinity)(
+    HANDLE hThread,
+    CONST MY_GROUP_AFFINITY *GroupAffinity,
+    MY_PGROUP_AFFINITY PreviousGroupAffinity);
+
+typedef BOOL (WINAPI *Func_GetThreadGroupAffinity)(
+    HANDLE hThread,
+    MY_PGROUP_AFFINITY GroupAffinity);
+
+typedef BOOL (WINAPI *Func_GetProcessGroupAffinity)(
+    HANDLE hProcess,
+    PUSHORT GroupCount,
+    PUSHORT GroupArray);
+
+Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
+
+#if 0
+#include <stdio.h>
+#define PRF(x) x
+/*
+--
+  before call of SetThreadGroupAffinity()
+    GetProcessGroupAffinity return one group.
+  after call of SetThreadGroupAffinity():
+    GetProcessGroupAffinity return more than group,
+    if SetThreadGroupAffinity() was to another group.
+--
+  GetProcessAffinityMask MS DOCs:
+  {
+    If the calling process contains threads in multiple groups,
+    the function returns zero for both affinity masks.
+  }
+  but tests in win10 with 2 groups (less than 64 cores total):
+    GetProcessAffinityMask() still returns non-zero affinity masks
+    even after SetThreadGroupAffinity() calls.
+*/
+static void PrintProcess_Info()
+{
+  {
+    const
+      Func_GetProcessGroupAffinity fn_GetProcessGroupAffinity =
+     (Func_GetProcessGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
+          "GetProcessGroupAffinity");
+    if (fn_GetProcessGroupAffinity)
+    {
+      unsigned i;
+      USHORT GroupCounts[64];
+      USHORT GroupCount = Z7_ARRAY_SIZE(GroupCounts);
+      BOOL boolRes = fn_GetProcessGroupAffinity(GetCurrentProcess(),
+          &GroupCount, GroupCounts);
+      printf("\n====== GetProcessGroupAffinity : "
+          "boolRes=%u GroupCounts = %u :",
+          boolRes, (unsigned)GroupCount);
+      for (i = 0; i < GroupCount; i++)
+        printf(" %u", GroupCounts[i]);
+      printf("\n");
+    }
+  }
+  {
+    DWORD_PTR processAffinityMask, systemAffinityMask;
+    if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask))
+    {
+      PRF(printf("\n====== GetProcessAffinityMask : "
+        ": processAffinityMask=%x, systemAffinityMask=%x\n",
+        (UInt32)processAffinityMask, (UInt32)systemAffinityMask);)
+    }
+    else
+      printf("\n==GetProcessAffinityMask FAIL");
+  }
+}
+#else
+#ifndef USE_THREADS_CreateThread
+// #define PRF(x)
+#endif
+#endif
+
 WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
 {
   /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
@@ -72,7 +166,43 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
   
   unsigned threadId;
   *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
-  
+
+#if 0 // 1 : for debug
+  {
+      DWORD_PTR prevMask;
+      DWORD_PTR affinity = 1 << 0;
+      prevMask = SetThreadAffinityMask(*p, (DWORD_PTR)affinity);
+      prevMask = prevMask;
+  }
+#endif
+#if 0 // 1 : for debug
+  {
+      /* win10: new thread will be created in same group that is assigned to parent thread
+                but affinity mask will contain all allowed threads of that group,
+                even if affinity mask of parent group is not full
+         win11: what group it will be created, if we have set
+                affinity of parent thread with ThreadGroupAffinity?
+      */
+      const
+         Func_GetThreadGroupAffinity fn =
+        (Func_GetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
+             "GetThreadGroupAffinity");
+      if (fn)
+      {
+        // BOOL wres2;
+        MY_GROUP_AFFINITY groupAffinity;
+        memset(&groupAffinity, 0, sizeof(groupAffinity));
+        /* wres2 = */ fn(*p, &groupAffinity);
+        PRF(printf("\n==Thread_Create cur = %6u GetThreadGroupAffinity(): "
+            "wres2_BOOL = %u, group=%u mask=%x\n",
+            GetCurrentThreadId(),
+            wres2,
+            groupAffinity.Group,
+            (UInt32)groupAffinity.Mask);)
+      }
+  }
+#endif
+
   #endif
 
   /* maybe we must use errno here, but probably GetLastError() is also OK. */
@@ -110,7 +240,84 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
       */
     }
     {
-      DWORD prevSuspendCount = ResumeThread(h);
+      const DWORD prevSuspendCount = ResumeThread(h);
+      /* ResumeThread() returns:
+         0 : was_not_suspended
+         1 : was_resumed
+        -1 : error
+      */
+      if (prevSuspendCount == (DWORD)-1)
+        wres = GetError();
+    }
+  }
+
+  /* maybe we must use errno here, but probably GetLastError() is also OK. */
+  return wres;
+
+  #endif
+}
+
+
+WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask)
+{
+#ifdef USE_THREADS_CreateThread
+
+  UNUSED_VAR(group)
+  UNUSED_VAR(affinityMask)
+  return Thread_Create(p, func, param);
+  
+#else
+  
+  /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
+  HANDLE h;
+  WRes wres;
+  unsigned threadId;
+  h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
+  *p = h;
+  wres = HandleToWRes(h);
+  if (h)
+  {
+    // PrintProcess_Info();
+    {
+      const
+         Func_SetThreadGroupAffinity fn =
+        (Func_SetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
+             "SetThreadGroupAffinity");
+      if (fn)
+      {
+        // WRes wres2;
+        MY_GROUP_AFFINITY groupAffinity, prev_groupAffinity;
+        memset(&groupAffinity, 0, sizeof(groupAffinity));
+        // groupAffinity.Mask must use only bits that supported by current group
+        // (groupAffinity.Mask = 0) means all allowed bits
+        groupAffinity.Mask = affinityMask;
+        groupAffinity.Group = (WORD)group;
+        // wres2 =
+        fn(h, &groupAffinity, &prev_groupAffinity);
+        /*
+        if (groupAffinity.Group == prev_groupAffinity.Group)
+          wres2 = wres2;
+        else
+          wres2 = wres2;
+        if (wres2 == 0)
+        {
+          wres2 = GetError();
+          PRF(printf("\n==SetThreadGroupAffinity error: %u\n", wres2);)
+        }
+        else
+        {
+          PRF(printf("\n==Thread_Create_With_Group::SetThreadGroupAffinity()"
+            " threadId = %6u"
+            " group=%u mask=%x\n",
+            threadId,
+            prev_groupAffinity.Group,
+            (UInt32)prev_groupAffinity.Mask);)
+        }
+        */
+      }
+    }
+    {
+      const DWORD prevSuspendCount = ResumeThread(h);
       /* ResumeThread() returns:
          0 : was_not_suspended
          1 : was_resumed
@@ -297,6 +504,13 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
   return Thread_Create_With_CpuSet(p, func, param, NULL);
 }
 
+/*
+WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinity)
+{
+  UNUSED_VAR(group)
+  return Thread_Create_With_Affinity(p, func, param, affinity);
+}
+*/
 
 WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
 {
@@ -577,5 +791,22 @@ WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
   return AutoResetEvent_CreateNotSignaled(p);
 }
 
+void ThreadNextGroup_Init(CThreadNextGroup *p, UInt32 numGroups, UInt32 startGroup)
+{
+  // printf("\n====== ThreadNextGroup_Init numGroups = %x: startGroup=%x\n", numGroups, startGroup);
+  if (numGroups == 0)
+      numGroups = 1;
+  p->NumGroups = numGroups;
+  p->NextGroup = startGroup % numGroups;
+}
+
+
+UInt32 ThreadNextGroup_GetNext(CThreadNextGroup *p)
+{
+  const UInt32 next = p->NextGroup;
+  p->NextGroup = (next + 1) % p->NumGroups;
+  return next;
+}
+
 #undef PRF
 #undef Print

+ 11 - 1
Components/Lzma2/Threads.h

@@ -1,5 +1,5 @@
 /* Threads.h -- multithreading library
-2024-03-28 : Igor Pavlov : Public domain */
+: Igor Pavlov : Public domain */
 
 #ifndef ZIP7_INC_THREADS_H
 #define ZIP7_INC_THREADS_H
@@ -140,12 +140,22 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
 WRes Thread_Wait_Close(CThread *p);
 
 #ifdef _WIN32
+WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask);
 #define Thread_Create_With_CpuSet(p, func, param, cs) \
   Thread_Create_With_Affinity(p, func, param, *cs)
 #else
 WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
 #endif
 
+typedef struct
+{
+  unsigned NumGroups;
+  unsigned NextGroup;
+} CThreadNextGroup;
+
+void ThreadNextGroup_Init(CThreadNextGroup *p, unsigned numGroups, unsigned startGroup);
+unsigned ThreadNextGroup_GetNext(CThreadNextGroup *p);
+
 
 #ifdef _WIN32
 

+ 28 - 0
Components/Lzma2/lzma-history.txt

@@ -1,6 +1,34 @@
 HISTORY of the LZMA SDK
 -----------------------
 
+25.00          2025-07-05
+-------------------------
+- 7-Zip for Windows can now use more than 64 CPU threads for compression
+  to zip/7z/xz archives and for the 7-Zip benchmark.
+  If there are more than one processor group in Windows (on systems with more than
+  64 cpu threads), 7-Zip distributes running CPU threads across different processor groups.
+- fixed some bugs and vulnerabilities.
+
+
+24.09          2024-11-29
+-------------------------
+- The default dictionary size values for LZMA/LZMA2 compression methods were increased:
+         dictionary size   compression level
+  v24.08  v24.09  v24.09   
+          32-bit  64-bit    
+    8 MB   16 MB   16 MB   -mx4
+   16 MB   32 MB   32 MB   -mx5 : Normal
+   32 MB   64 MB   64 MB   -mx6
+   32 MB   64 MB  128 MB   -mx7 : Maximum
+   64 MB   64 MB  256 MB   -mx8
+   64 MB   64 MB  256 MB   -mx9 : Ultra
+  The default dictionary size values for 32-bit versions of LZMA/LZMA2 don't exceed 64 MB.
+- If an archive update operation uses a temporary archive folder and 
+  the archive is moved to the destination folder, 7-Zip shows the progress of moving 
+  the archive file, as this operation can take a long time if the archive is large.
+- Some bugs were fixed.
+
+
 24.07          2024-06-19
 -------------------------
 - Changes in files:

BIN
Files/islzma.dll


+ 4 - 4
Files/islzma.dll.issig

@@ -1,8 +1,8 @@
 format issig-v2
 file-name "islzma.dll"
-file-size 135816
-file-hash b252471e95f0853902b15ae71a90574f9b168f8d4a0c474b20537511f90220a5
+file-size 139408
+file-hash 6a7c37dfd77d54fc74ae619abb7eb3b15b29f5ae46610d44b5c3996b30f523e9
 file-tag ""
 key-id def0147c3bbc17ab99bf7b7a9c2de1390283f38972152418d7c2a4a7d7131a38
-sig-r fb93b45c8a032554fc65903776ab7783f95fe3c3603524c61163bae253958de4
-sig-s fdf1ed2c94a64b415896d28ac6e8b41a9a0cb16cc8d372e1d7be76445b5bc600
+sig-r 1fa739588105d001abdc3a875d3d78c5f4b6285ac33c6eb4d8a5dcfe42b0a9fa
+sig-s bc73f03d2650aad22c0f0e9984116d6f3078b21cef3df23bea2016aaae757c67

BIN
Files/islzma32.exe


+ 4 - 4
Files/islzma32.exe.issig

@@ -1,8 +1,8 @@
 format issig-v2
 file-name "islzma32.exe"
-file-size 199304
-file-hash 5665e03297af04d4f90423b56e8b016b2924d91ba781573036b710044e843f0a
+file-size 203408
+file-hash 1609e93c89285c06d182c4368e66165fd49940727f3a71c503a9374975001c0e
 file-tag ""
 key-id def0147c3bbc17ab99bf7b7a9c2de1390283f38972152418d7c2a4a7d7131a38
-sig-r a47b07dfa78c7845821a5c4122cb177d7c0e4e4ace9fe8d53564b7756ecbc598
-sig-s 0674b22927c82a6386d2eef2da7d20df12bed627dce36021ff5e6cf2933566da
+sig-r 34193b3a6315f82a84efef57b90df582e29e4185a7d2f6fb798be1070e83b9dd
+sig-s a9c34814c9958fbcfe92ef4329c52b857d454397f3a803943f78a5d48fa0ab5e

BIN
Files/islzma64.exe


+ 4 - 4
Files/islzma64.exe.issig

@@ -1,8 +1,8 @@
 format issig-v2
 file-name "islzma64.exe"
-file-size 230536
-file-hash b0ad1857da7fb94b54988c7633578dc057b51cf921b94f6f2f7b57a113310712
+file-size 226448
+file-hash 1886dc7feb6c1ea500d1f01387f80b7164df171dbad0856f61eaf8bcc3e67fd1
 file-tag ""
 key-id def0147c3bbc17ab99bf7b7a9c2de1390283f38972152418d7c2a4a7d7131a38
-sig-r 299c59b267ab06a38beb9ab28e0cfc001914bcf68495611fae73b06259981192
-sig-s 8cecbd5dcc971b603c56b937cf35859c0669883a417776c8bf5cbeacb4f70e3a
+sig-r fa2e0d3777a6b5ac72250f599d20c8ae0b9d41eb2a89651eaf59f72f48fec7db
+sig-s 46a110b603e586cb34bdeef97d7ec36d13a773e445be4fe9d2547197c7fb6488

BIN
Projects/Src/Compression.LZMADecompressor/Lzma2Decode/ISLzmaDec.obj


+ 4 - 4
Projects/Src/Compression.LZMADecompressor/Lzma2Decode/ISLzmaDec.obj.issig

@@ -1,8 +1,8 @@
 format issig-v2
 file-name "ISLzmaDec.obj"
-file-size 21751
-file-hash 79bb11fbe0b862b6a42af2db4664748daa54347d56b3c40998dcb860111195ac
+file-size 21746
+file-hash 9012fadb066ffdafa8af2f973348a84ced5f1fb4dc0d6c54d6d6b2088368691a
 file-tag ""
 key-id def0147c3bbc17ab99bf7b7a9c2de1390283f38972152418d7c2a4a7d7131a38
-sig-r 5653a50fed00f5783a88a9b6bc9ada2bd9a74f0dd7ee1ccd61e58724d8d61ec8
-sig-s 8a4c67e578e990061a56abbaf03e39464c44494a136eb2899f430d944eeeb0ee
+sig-r f3ccaa159de89d3d5d53af24d88b63fd0193894a5c81e016c1df0e2b6009af4f
+sig-s d77e4dc5900827759ea64b459b4346de625f048e4cc1f6afdd7e33ad5a28d2ef

BIN
Projects/Src/Compression.SevenZipDecoder/7zDecode/IS7zDec.obj


+ 4 - 4
Projects/Src/Compression.SevenZipDecoder/7zDecode/IS7zDec.obj.issig

@@ -1,8 +1,8 @@
 format issig-v2
 file-name "IS7zDec.obj"
-file-size 93886
-file-hash 43ef412680232ecf42556737b7ea1c341f8134e64bf0b36932458e97d569d7c5
+file-size 93775
+file-hash e2012212063a5201f5e71d83a7114ce2141bbc9be13f413519e1a2acd84a319a
 file-tag ""
 key-id def0147c3bbc17ab99bf7b7a9c2de1390283f38972152418d7c2a4a7d7131a38
-sig-r 46004c60d2d7e59202b263a1798b429523898152c2b0543513efc98cb82539c7
-sig-s 41cee9e379f13152982b0d0c4a6e70751db45953c0065bca97d11c092e50c6dd
+sig-r 89f773f3dc1a96515f0210d3869861d5e81810f26b89c1fb008f2c0012eb4c43
+sig-s 7fc68b2981f2e0e17e74a8e0de5927b67cb6093c923147424a146795644794c6