Browse Source

Merge pull request #104521 from Chubercik/pcre2-10.45

pcre2: Update to 10.45
Thaddeus Crews 4 months ago
parent
commit
d032c1170a
75 changed files with 16289 additions and 5774 deletions
  1. 1 0
      modules/regex/SCsub
  2. 3 3
      thirdparty/README.md
  3. 0 36
      thirdparty/pcre2/AUTHORS
  4. 200 0
      thirdparty/pcre2/AUTHORS.md
  5. 42 33
      thirdparty/pcre2/LICENCE.md
  6. 5 1
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c
  7. 10 13
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c
  8. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c
  9. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c
  10. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c
  11. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c
  12. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c
  13. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c
  14. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c
  15. 11 11
      thirdparty/pcre2/deps/sljit/sljit_src/sljitConfig.h
  16. 1 1
      thirdparty/pcre2/deps/sljit/sljit_src/sljitConfigCPU.h
  17. 136 45
      thirdparty/pcre2/deps/sljit/sljit_src/sljitConfigInternal.h
  18. 410 215
      thirdparty/pcre2/deps/sljit/sljit_src/sljitLir.c
  19. 404 147
      thirdparty/pcre2/deps/sljit/sljit_src/sljitLir.h
  20. 385 194
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_32.c
  21. 388 174
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_64.c
  22. 381 168
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_T2_32.c
  23. 361 195
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c
  24. 3 3
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_32.c
  25. 3 3
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_64.c
  26. 264 154
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_common.c
  27. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_32.c
  28. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_64.c
  29. 361 256
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_common.c
  30. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_32.c
  31. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_64.c
  32. 409 176
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_common.c
  33. 293 283
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c
  34. 102 36
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_32.c
  35. 114 53
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_64.c
  36. 443 297
      thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
  37. 516 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitSerialize.c
  38. 0 0
      thirdparty/pcre2/deps/sljit/sljit_src/sljitUtils.c
  39. 27 27
      thirdparty/pcre2/src/config.h
  40. 67 5
      thirdparty/pcre2/src/pcre2.h
  41. 92 51
      thirdparty/pcre2/src/pcre2_auto_possess.c
  42. 2 4
      thirdparty/pcre2/src/pcre2_chkdint.c
  43. 338 275
      thirdparty/pcre2/src/pcre2_compile.c
  44. 280 0
      thirdparty/pcre2/src/pcre2_compile.h
  45. 2737 0
      thirdparty/pcre2/src/pcre2_compile_class.c
  46. 2 2
      thirdparty/pcre2/src/pcre2_config.c
  47. 60 6
      thirdparty/pcre2/src/pcre2_context.c
  48. 10 8
      thirdparty/pcre2/src/pcre2_convert.c
  49. 49 58
      thirdparty/pcre2/src/pcre2_dfa_match.c
  50. 28 5
      thirdparty/pcre2/src/pcre2_error.c
  51. 22 8
      thirdparty/pcre2/src/pcre2_extuni.c
  52. 8 7
      thirdparty/pcre2/src/pcre2_find_bracket.c
  53. 258 107
      thirdparty/pcre2/src/pcre2_internal.h
  54. 50 16
      thirdparty/pcre2/src/pcre2_intmodedep.h
  55. 2280 0
      thirdparty/pcre2/src/pcre2_jit_char_inc.h
  56. 384 155
      thirdparty/pcre2/src/pcre2_jit_compile.c
  57. 1 1
      thirdparty/pcre2/src/pcre2_jit_match.c
  58. 2 2
      thirdparty/pcre2/src/pcre2_jit_misc.c
  59. 5 5
      thirdparty/pcre2/src/pcre2_jit_neon_inc.h
  60. 58 57
      thirdparty/pcre2/src/pcre2_jit_simd_inc.h
  61. 5 5
      thirdparty/pcre2/src/pcre2_maketables.c
  62. 406 134
      thirdparty/pcre2/src/pcre2_match.c
  63. 5 3
      thirdparty/pcre2/src/pcre2_match_data.c
  64. 1 1
      thirdparty/pcre2/src/pcre2_ord2utf.c
  65. 8 6
      thirdparty/pcre2/src/pcre2_pattern_info.c
  66. 18 18
      thirdparty/pcre2/src/pcre2_serialize.c
  67. 197 43
      thirdparty/pcre2/src/pcre2_study.c
  68. 851 164
      thirdparty/pcre2/src/pcre2_substitute.c
  69. 2 2
      thirdparty/pcre2/src/pcre2_substring.c
  70. 6 6
      thirdparty/pcre2/src/pcre2_tables.c
  71. 1879 1635
      thirdparty/pcre2/src/pcre2_ucd.c
  72. 37 25
      thirdparty/pcre2/src/pcre2_ucp.h
  73. 420 357
      thirdparty/pcre2/src/pcre2_ucptables.c
  74. 132 0
      thirdparty/pcre2/src/pcre2_util.h
  75. 316 79
      thirdparty/pcre2/src/pcre2_xclass.c

+ 1 - 0
modules/regex/SCsub

@@ -22,6 +22,7 @@ if env["builtin_pcre2"]:
         "pcre2_chartables.c",
         "pcre2_chkdint.c",
         "pcre2_compile.c",
+        "pcre2_compile_class.c",
         "pcre2_config.c",
         "pcre2_context.c",
         "pcre2_convert.c",

+ 3 - 3
thirdparty/README.md

@@ -852,7 +852,7 @@ Patches:
 ## pcre2
 
 - Upstream: http://www.pcre.org
-- Version: 10.43 (3864abdb713f78831dd12d898ab31bbb0fa630b6, 2024)
+- Version: 10.45 (2dce7761b1831fd3f82a9c2bd5476259d945da4d, 2025)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
@@ -862,8 +862,8 @@ Files extracted from upstream source:
 - `src/pcre2_jit_match.c`
 - `src/pcre2_jit_misc.c`
 - `src/pcre2_ucptables.c`
-- `src/sljit/`
-- `AUTHORS` and `LICENCE`
+- `deps/sljit/sljit_src`
+- `AUTHORS.md` and `LICENCE.md`
 
 
 ## recastnavigation

+ 0 - 36
thirdparty/pcre2/AUTHORS

@@ -1,36 +0,0 @@
-THE MAIN PCRE2 LIBRARY CODE
----------------------------
-
-Written by:       Philip Hazel
-Email local part: Philip.Hazel
-Email domain:     gmail.com
-
-Retired from University of Cambridge Computing Service,
-Cambridge, England.
-
-Copyright (c) 1997-2024 University of Cambridge
-All rights reserved
-
-
-PCRE2 JUST-IN-TIME COMPILATION SUPPORT
---------------------------------------
-
-Written by:       Zoltan Herczeg
-Email local part: hzmester
-Emain domain:     freemail.hu
-
-Copyright(c) 2010-2024 Zoltan Herczeg
-All rights reserved.
-
-
-STACK-LESS JUST-IN-TIME COMPILER
---------------------------------
-
-Written by:       Zoltan Herczeg
-Email local part: hzmester
-Emain domain:     freemail.hu
-
-Copyright(c) 2009-2024 Zoltan Herczeg
-All rights reserved.
-
-####

+ 200 - 0
thirdparty/pcre2/AUTHORS.md

@@ -0,0 +1,200 @@
+PCRE2 Authorship and Contributors
+=================================
+
+COPYRIGHT
+---------
+
+Please see the file [LICENCE](./LICENCE.md) in the PCRE2 distribution for
+copyright details.
+
+
+MAINTAINERS
+-----------
+
+The PCRE and PCRE2 libraries were authored and maintained by Philip Hazel.
+
+Since 2024, the contributors with administrator access to the project are now
+Nicholas Wilson and Zoltán Herczeg. See the file [SECURITY](./SECURITY.md) for
+GPG keys.
+
+Both administrators are volunteers acting in a personal capacity.
+
+<table>
+<thead>
+<tr>
+  <th>Name</th>
+  <th>Role</th>
+<tr>
+</thead>
+<tbody>
+<tr>
+  <td>
+
+  Nicholas Wilson<br/>
+  `[email protected]`<br/>
+  Currently of Microsoft Research Cambridge, UK
+
+  </td>
+  <td>
+
+  * General project administration & maintenance
+  * Release management
+  * Code maintenance
+
+  </td>
+</tr>
+<tr>
+  <td>
+
+  Zoltán Herczeg<br/>
+  `[email protected]`<br/>
+  Currently of the University of Szeged, Hungary
+
+  </td>
+  <td>
+
+  * Code maintenance
+  * Ownership of `sljit` and PCRE2's JIT
+
+  </td>
+</tr>
+</tbody>
+</table>
+
+
+CONTRIBUTORS
+------------
+
+Many others have participated and contributed to PCRE2 over its history.
+
+The maintainers are grateful for all contributions and participation over the
+years. We apologise for any names we have forgotten.
+
+We are especially grateful to Philip Hazel, creator of PCRE and PCRE2, and
+maintainer from 1997 to 2024.
+
+All names listed alphabetically.
+
+### Contributors to PCRE2
+
+This list includes names up until the PCRE2 10.44 release. New names will be
+added from the Git history on each release.
+
+    Scott Bell
+    Carlo Marcelo Arenas Belón
+    Edward Betts
+    Jan-Willem Blokland
+    Ross Burton
+    Dmitry Cherniachenko
+    Alexey Chupahin
+    Jessica Clarke
+    Alejandro Colomar
+    Jeremie Courreges-Anglas
+    Addison Crump
+    Alex Dowad
+    Daniel Engberg
+    Daniel Richard G
+    David Gaussmann
+    Andrey Gorbachev
+    Jordan Griege
+    Jason Hood
+    Bumsu Hyeon
+    Roy Ivy
+    Martin Joerg
+    Guillem Jover
+    Ralf Junker
+    Ayesh Karunaratne
+    Michael Kaufmann
+    Yunho Kim
+    Joshua Kinard
+    David Korczynski
+    Uwe Korn
+    Jonas Kvinge
+    Kristian Larsson
+    Kai Lu
+    Behzod Mansurov
+    B. Scott Michel
+    Nathan Moinvaziri
+    Mike Munday
+    Marc Mutz
+    Fabio Pagani
+    Christian Persch
+    Tristan Ross
+    William A Rowe Jr
+    David Seifert
+    Yaakov Selkowitz
+    Rich Siegel
+    Karl Skomski
+    Maciej Sroczyński
+    Wolfgang Stöggl
+    Thomas Tempelmann
+    Greg Thain
+    Lucas Trzesniewski
+    Theodore Tsirpanis
+    Matthew Vernon
+    Rémi Verschelde
+    Thomas Voss
+    Ezekiel Warren
+    Carl Weaver
+    Chris Wilson
+    Amin Yahyaabadi
+    Joe Zhang
+
+### Contributors to PCRE1
+
+These people contributed either by sending patches or reporting serious issues.
+
+    Irfan Adilovic
+    Alexander Barkov
+    Daniel Bergström
+    David Burgess
+    Ross Burton
+    David Byron
+    Fred Cox
+    Christian Ehrlicher
+    Tom Fortmann
+    Lionel Fourquaux
+    Mike Frysinger
+    Daniel Richard G
+    Dair Gran
+    "Graycode" (Red Hat Product Security)
+    Viktor Griph
+    Wen Guanxing
+    Robin Houston
+    Martin Jerabek
+    Peter Kankowski
+    Stephen Kelly
+    Yunho Kim
+    Joshua Kinard
+    Carsten Klein
+    Evgeny Kotkov
+    Ronald Landheer-Cieslak
+    Alan Lehotsky
+    Dmitry V. Levin
+    Nuno Lopes
+    Kai Lu
+    Giuseppe Maxia
+    Dan Mooney
+    Marc Mutz
+    Markus Oberhumer
+    Sheri Pierce
+    Petr Pisar
+    Ari Pollak
+    Bob Rossi
+    Ruiger Rill
+    Michael Shigorin
+    Rich Siegel
+    Craig Silverstein (C++ wrapper)
+    Karl Skomski
+    Paul Sokolovsky
+    Stan Switzer
+    Ian Taylor
+    Mark Tetrode
+    Jeff Trawick
+    Steven Van Ingelgem
+    Lawrence Velazquez
+    Jiong Wang
+    Stefan Weber
+    Chris Wilson
+
+Thanks go to Jeffrey Friedl for testing and debugging assistance.

+ 42 - 33
thirdparty/pcre2/LICENCE → thirdparty/pcre2/LICENCE.md

@@ -1,5 +1,8 @@
-PCRE2 LICENCE
--------------
+PCRE2 License
+=============
+
+| SPDX-License-Identifier: | BSD-3-Clause WITH PCRE2-exception |
+|---------|-------|
 
 PCRE2 is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.
@@ -16,40 +19,46 @@ optimize pattern matching. This is an optional feature that can be omitted when
 the library is built.
 
 
-THE BASIC LIBRARY FUNCTIONS
----------------------------
+COPYRIGHT
+---------
+
+### The basic library functions
 
-Written by:       Philip Hazel
-Email local part: Philip.Hazel
-Email domain:     gmail.com
+    Written by:       Philip Hazel
+    Email local part: Philip.Hazel
+    Email domain:     gmail.com
 
-Retired from University of Cambridge Computing Service,
-Cambridge, England.
+    Retired from University of Cambridge Computing Service,
+    Cambridge, England.
 
-Copyright (c) 1997-2024 University of Cambridge
-All rights reserved.
+    Copyright (c) 1997-2007 University of Cambridge
+    Copyright (c) 2007-2024 Philip Hazel
+    All rights reserved.
 
+### PCRE2 Just-In-Time compilation support
 
-PCRE2 JUST-IN-TIME COMPILATION SUPPORT
---------------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Email domain:     freemail.hu
 
-Written by:       Zoltan Herczeg
-Email local part: hzmester
-Email domain:     freemail.hu
+    Copyright (c) 2010-2024 Zoltan Herczeg
+    All rights reserved.
 
-Copyright(c) 2010-2024 Zoltan Herczeg
-All rights reserved.
+### Stack-less Just-In-Time compiler
 
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Email domain:     freemail.hu
 
-STACK-LESS JUST-IN-TIME COMPILER
---------------------------------
+    Copyright (c) 2009-2024 Zoltan Herczeg
+    All rights reserved.
 
-Written by:       Zoltan Herczeg
-Email local part: hzmester
-Email domain:     freemail.hu
+### All other contributions
 
-Copyright(c) 2009-2024 Zoltan Herczeg
-All rights reserved.
+Many other contributors have participated in the authorship of PCRE2. As PCRE2
+has never required a Contributor Licensing Agreement, or other copyright
+assignment agreement, all contributions have copyright retained by each
+original contributor or their employer.
 
 
 THE "BSD" LICENCE
@@ -58,16 +67,16 @@ THE "BSD" LICENCE
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 
-    * Redistributions of source code must retain the above copyright notices,
-      this list of conditions and the following disclaimer.
+* Redistributions of source code must retain the above copyright notices,
+  this list of conditions and the following disclaimer.
 
-    * Redistributions in binary form must reproduce the above copyright
-      notices, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
+* Redistributions in binary form must reproduce the above copyright
+  notices, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
 
-    * Neither the name of the University of Cambridge nor the names of any
-      contributors may be used to endorse or promote products derived from this
-      software without specific prior written permission.
+* Neither the name of the University of Cambridge nor the names of any
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+ 5 - 1
thirdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorApple.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorApple.c

@@ -41,9 +41,10 @@
 #include <sys/utsname.h>
 #include <stdlib.h>
 
-#define SLJIT_MAP_JIT	(get_map_jit_flag())
 #define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
 
+#ifdef MAP_JIT
+#define SLJIT_MAP_JIT	(get_map_jit_flag())
 static SLJIT_INLINE int get_map_jit_flag(void)
 {
 	size_t page_size;
@@ -70,6 +71,9 @@ static SLJIT_INLINE int get_map_jit_flag(void)
 	}
 	return map_jit_flag;
 }
+#else /* !defined(MAP_JIT) */
+#define SLJIT_MAP_JIT	(0)
+#endif
 
 #elif defined(SLJIT_CONFIG_ARM) && SLJIT_CONFIG_ARM
 

+ 10 - 13
thirdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorCore.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorCore.c

@@ -181,8 +181,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 				header->executable_offset = free_block->header.executable_offset;
 #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
 				AS_BLOCK_HEADER(header, size)->prev_size = size;
-			}
-			else {
+			} else {
 				sljit_remove_free_block(free_block);
 				header = (struct block_header*)free_block;
 				size = chunk_size;
@@ -230,26 +229,25 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
 #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
 		sljit_insert_free_block(free_block, chunk_size);
 		next_header = AS_BLOCK_HEADER(free_block, chunk_size);
-	}
-	else {
+	} else {
 		/* All space belongs to this allocation. */
 		allocated_size += chunk_size;
 		header->size = chunk_size;
 		next_header = AS_BLOCK_HEADER(header, chunk_size);
 	}
-	SLJIT_ALLOCATOR_UNLOCK();
 	next_header->size = 1;
 	next_header->prev_size = chunk_size;
 #ifdef SLJIT_HAS_EXECUTABLE_OFFSET
 	next_header->executable_offset = executable_offset;
 #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */
+	SLJIT_ALLOCATOR_UNLOCK();
 	return MEM_START(header);
 }
 
-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void *ptr)
 {
 	struct block_header *header;
-	struct free_block* free_block;
+	struct free_block *free_block;
 
 	SLJIT_ALLOCATOR_LOCK();
 	header = AS_BLOCK_HEADER(ptr, -(sljit_sw)sizeof(struct block_header));
@@ -269,8 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
 		free_block->size += header->size;
 		header = AS_BLOCK_HEADER(free_block, free_block->size);
 		header->prev_size = free_block->size;
-	}
-	else {
+	} else {
 		free_block = (struct free_block*)header;
 		sljit_insert_free_block(free_block, header->size);
 	}
@@ -308,7 +305,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 	free_block = free_blocks;
 	while (free_block) {
 		next_free_block = free_block->next;
-		if (!free_block->header.prev_size && 
+		if (!free_block->header.prev_size &&
 				AS_BLOCK_HEADER(free_block, free_block->size)->size == 1) {
 			total_size -= free_block->size;
 			sljit_remove_free_block(free_block);
@@ -317,14 +314,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
 		free_block = next_free_block;
 	}
 
-	SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
+	SLJIT_ASSERT(total_size || (!total_size && !free_blocks));
 	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
 	SLJIT_ALLOCATOR_UNLOCK();
 }
 
 #ifdef SLJIT_HAS_EXECUTABLE_OFFSET
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr)
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code)
 {
-	return ((struct block_header *)(ptr))[-1].executable_offset;
+	return ((struct block_header*)SLJIT_CODE_TO_PTR(code))[-1].executable_offset;
 }
 #endif /* SLJIT_HAS_EXECUTABLE_OFFSET */

+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorFreeBSD.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorPosix.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorPosix.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitExecAllocatorWindows.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitExecAllocatorWindows.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorNetBSD.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitProtExecAllocatorPosix.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitProtExecAllocatorPosix.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorPosix.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorPosix.c


+ 0 - 0
thirdparty/pcre2/src/sljit/allocator_src/sljitWXExecAllocatorWindows.c → thirdparty/pcre2/deps/sljit/sljit_src/allocator_src/sljitWXExecAllocatorWindows.c


+ 11 - 11
thirdparty/pcre2/src/sljit/sljitConfig.h → thirdparty/pcre2/deps/sljit/sljit_src/sljitConfig.h

@@ -29,7 +29,7 @@
 
 #ifdef __cplusplus
 extern "C" {
-#endif
+#endif /* __cplusplus */
 
 /*
   This file contains the basic configuration options for the SLJIT compiler
@@ -47,19 +47,19 @@ extern "C" {
 #ifndef SLJIT_UTIL_STACK
 /* Enabled by default */
 #define SLJIT_UTIL_STACK 1
-#endif
+#endif /* SLJIT_UTIL_STACK */
 
 /* Uses user provided allocator to allocate the stack (see SLJIT_UTIL_STACK) */
 #ifndef SLJIT_UTIL_SIMPLE_STACK_ALLOCATION
 /* Disabled by default */
 #define SLJIT_UTIL_SIMPLE_STACK_ALLOCATION 0
-#endif
+#endif /* SLJIT_UTIL_SIMPLE_STACK_ALLOCATION */
 
 /* Single threaded application. Does not require any locks. */
 #ifndef SLJIT_SINGLE_THREADED
 /* Disabled by default. */
 #define SLJIT_SINGLE_THREADED 0
-#endif
+#endif /* SLJIT_SINGLE_THREADED */
 
 /* --------------------------------------------------------------------- */
 /*  Configuration                                                        */
@@ -70,7 +70,7 @@ extern "C" {
 #ifndef SLJIT_STD_MACROS_DEFINED
 /* Disabled by default. */
 #define SLJIT_STD_MACROS_DEFINED 0
-#endif
+#endif /* SLJIT_STD_MACROS_DEFINED */
 
 /* Executable code allocation:
    If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should
@@ -93,7 +93,7 @@ extern "C" {
 #ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR
 /* Disabled by default. */
 #define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0
-#endif
+#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */
 
 /* When SLJIT_WX_EXECUTABLE_ALLOCATOR is enabled SLJIT uses an
    allocator which does not set writable and executable permission
@@ -104,7 +104,7 @@ extern "C" {
 #ifndef SLJIT_WX_EXECUTABLE_ALLOCATOR
 /* Disabled by default. */
 #define SLJIT_WX_EXECUTABLE_ALLOCATOR 0
-#endif
+#endif /* SLJIT_WX_EXECUTABLE_ALLOCATOR */
 
 #endif /* !SLJIT_EXECUTABLE_ALLOCATOR */
 
@@ -112,19 +112,19 @@ extern "C" {
 #ifndef SLJIT_ARGUMENT_CHECKS
 /* Disabled by default */
 #define SLJIT_ARGUMENT_CHECKS 0
-#endif
+#endif /* SLJIT_ARGUMENT_CHECKS */
 
 /* Debug checks (assertions, etc.). */
 #ifndef SLJIT_DEBUG
 /* Enabled by default */
 #define SLJIT_DEBUG 1
-#endif
+#endif /* SLJIT_DEBUG */
 
 /* Verbose operations. */
 #ifndef SLJIT_VERBOSE
 /* Enabled by default */
 #define SLJIT_VERBOSE 1
-#endif
+#endif /* SLJIT_VERBOSE */
 
 /*
   SLJIT_IS_FPU_AVAILABLE
@@ -137,6 +137,6 @@ extern "C" {
 
 #ifdef __cplusplus
 } /* extern "C" */
-#endif
+#endif /* __cplusplus */
 
 #endif /* SLJIT_CONFIG_H_ */

+ 1 - 1
thirdparty/pcre2/src/sljit/sljitConfigCPU.h → thirdparty/pcre2/deps/sljit/sljit_src/sljitConfigCPU.h

@@ -169,7 +169,7 @@
 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) \
 	|| (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
 #define SLJIT_CONFIG_ARM_32 1
-#endif
+#endif /* SLJIT_CONFIG_ARM_V6 || SLJIT_CONFIG_ARM_V7 || SLJIT_CONFIG_ARM_THUMB2 */
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #define SLJIT_CONFIG_X86 1

+ 136 - 45
thirdparty/pcre2/src/sljit/sljitConfigInternal.h → thirdparty/pcre2/deps/sljit/sljit_src/sljitConfigInternal.h

@@ -27,20 +27,6 @@
 #ifndef SLJIT_CONFIG_INTERNAL_H_
 #define SLJIT_CONFIG_INTERNAL_H_
 
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
-	|| (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)))
-#include <stdio.h>
-#endif
-
-#if (defined SLJIT_DEBUG && SLJIT_DEBUG \
-	&& (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS)))
-#include <stdlib.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 /*
    SLJIT defines the following architecture dependent types and macros:
 
@@ -49,8 +35,8 @@ extern "C" {
      sljit_s16, sljit_u16 : signed and unsigned 16 bit integer type
      sljit_s32, sljit_u32 : signed and unsigned 32 bit integer type
      sljit_sw, sljit_uw   : signed and unsigned machine word, enough to store a pointer
-     sljit_p              : unsgined pointer value (usually the same as sljit_uw, but
-                            some 64 bit ABIs may use 32 bit pointers)
+     sljit_sp, sljit_up   : signed and unsigned pointer value (usually the same as
+                            sljit_uw, but some 64 bit ABIs may use 32 bit pointers)
      sljit_f32            : 32 bit single precision floating point value
      sljit_f64            : 64 bit double precision floating point value
 
@@ -64,16 +50,26 @@ extern "C" {
      SLJIT_MASKED_SHIFT : all word shifts are always masked
      SLJIT_MASKED_SHIFT32 : all 32 bit shifts are always masked
      SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
+     SLJIT_UPPER_BITS_IGNORED : 32 bit operations ignores the upper bits of source registers
+     SLJIT_UPPER_BITS_ZERO_EXTENDED : 32 bit operations clears the upper bits of destination registers
+     SLJIT_UPPER_BITS_SIGN_EXTENDED : 32 bit operations replicates the sign bit in the upper bits of destination registers
+     SLJIT_UPPER_BITS_PRESERVED : 32 bit operations preserves the upper bits of destination registers
 
    Constants:
      SLJIT_NUMBER_OF_REGISTERS : number of available registers
      SLJIT_NUMBER_OF_SCRATCH_REGISTERS : number of available scratch registers
      SLJIT_NUMBER_OF_SAVED_REGISTERS : number of available saved registers
      SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
-     SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
-     SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
+     SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available scratch floating point registers
+     SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available saved floating point registers
+     SLJIT_NUMBER_OF_VECTOR_REGISTERS : number of available vector registers
+     SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS : number of available scratch vector registers
+     SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS : number of available saved vector registers
      SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers
      SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers
+     SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS : number of available temporary vector registers
+     SLJIT_SEPARATE_VECTOR_REGISTERS : if this macro is defined, the vector registers do not
+                                       overlap with floating point registers
      SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
      SLJIT_F32_SHIFT : the shift required to apply when accessing
                        a single precision floating point array by index
@@ -98,12 +94,33 @@ extern "C" {
      SLJIT_TMP_R(i) : accessing temporary registers
      SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers
      SLJIT_TMP_FR(i) : accessing temporary floating point registers
+     SLJIT_TMP_VR0 .. VR9 : accessing temporary vector registers
+     SLJIT_TMP_VR(i) : accessing temporary vector registers
+     SLJIT_TMP_DEST_REG : a temporary register for results
+     SLJIT_TMP_MEM_REG : a temporary base register for accessing memory
+                         (can be the same as SLJIT_TMP_DEST_REG)
+     SLJIT_TMP_DEST_FREG : a temporary register for float results
+     SLJIT_TMP_DEST_VREG : a temporary register for vector results
      SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
      SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
      SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
                              floating point register when SLJIT_HAS_F64_AS_F32_PAIR returns non-zero
 */
 
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+	|| (defined SLJIT_DEBUG && SLJIT_DEBUG && (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE)))
+#include <stdio.h>
+#endif
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG \
+	&& (!defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) || !defined(SLJIT_HALT_PROCESS)))
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /***********************************************************/
 /* Intel Control-flow Enforcement Technology (CET) spport. */
 /***********************************************************/
@@ -132,23 +149,23 @@ extern "C" {
 */
 
 #ifndef SLJIT_MALLOC
-#define SLJIT_MALLOC(size, allocator_data) malloc(size)
+#define SLJIT_MALLOC(size, allocator_data) (malloc(size))
 #endif
 
 #ifndef SLJIT_FREE
-#define SLJIT_FREE(ptr, allocator_data) free(ptr)
+#define SLJIT_FREE(ptr, allocator_data) (free(ptr))
 #endif
 
 #ifndef SLJIT_MEMCPY
-#define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
+#define SLJIT_MEMCPY(dest, src, len) (memcpy(dest, src, len))
 #endif
 
 #ifndef SLJIT_MEMMOVE
-#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
+#define SLJIT_MEMMOVE(dest, src, len) (memmove(dest, src, len))
 #endif
 
 #ifndef SLJIT_ZEROMEM
-#define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
+#define SLJIT_ZEROMEM(dest, len) (memset(dest, 0, len))
 #endif
 
 /***************************/
@@ -198,7 +215,7 @@ extern "C" {
 /* Type of public API functions. */
 /*********************************/
 
-#ifndef SLJIT_API_FUNC_ATTRIBUTE 
+#ifndef SLJIT_API_FUNC_ATTRIBUTE
 #if (defined SLJIT_CONFIG_STATIC && SLJIT_CONFIG_STATIC)
 /* Static ABI functions. For all-in-one programs. */
 
@@ -281,7 +298,7 @@ extern "C" {
 #elif defined(_WIN32)
 
 #define SLJIT_CACHE_FLUSH(from, to) \
-	FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
+	FlushInstructionCache(GetCurrentProcess(), (void*)(from), (size_t)((char*)(to) - (char*)(from)))
 
 #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
 
@@ -358,7 +375,8 @@ typedef long int sljit_sw;
 #endif /* _WIN32 */
 #endif
 
-typedef sljit_uw sljit_p;
+typedef sljit_sw sljit_sp;
+typedef sljit_uw sljit_up;
 
 /* Floating point types. */
 typedef float sljit_f32;
@@ -399,6 +417,10 @@ typedef double sljit_f64;
 #define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
 #define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
 #define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
+#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
+#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
+#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO
 #else
 #error "Result for float to integer conversion is not defined"
 #endif
@@ -522,19 +544,6 @@ typedef double sljit_f64;
 #define SLJIT_FUNC
 #endif /* !SLJIT_FUNC */
 
-/* Disable instrumentation for these functions as they may not be sound */
-#ifndef SLJIT_FUNC_ATTRIBUTE
-#if defined(__has_feature)
-#if __has_feature(memory_sanitizer)
-#define SLJIT_FUNC_ATTRIBUTE __attribute__((no_sanitize("memory")))
-#endif /* __has_feature(memory_sanitizer) */
-#endif /* defined(__has_feature) */
-#endif
-
-#ifndef SLJIT_FUNC_ATTRIBUTE
-#define SLJIT_FUNC_ATTRIBUTE
-#endif
-
 #ifndef SLJIT_INDIRECT_CALL
 #if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \
 	|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
@@ -557,7 +566,7 @@ determine the next executed instruction after return. */
 #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR)
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size);
 SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr);
-SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
+/* Note: sljitLir.h also defines sljit_free_unused_memory_exec() function. */
 #define SLJIT_BUILTIN_MALLOC_EXEC(size, exec_allocator_data) sljit_malloc_exec(size)
 #define SLJIT_BUILTIN_FREE_EXEC(ptr, exec_allocator_data) sljit_free_exec(ptr)
 
@@ -570,9 +579,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void);
 #endif /* SLJIT_FREE_EXEC */
 
 #if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR)
-SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
-#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr)
-#endif
+SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void *code);
+#define SLJIT_EXEC_OFFSET(code) sljit_exec_offset(code)
+#endif /* SLJIT_PROT_EXECUTABLE_ALLOCATOR */
 
 #endif /* SLJIT_EXECUTABLE_ALLOCATOR */
 
@@ -592,10 +601,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
-#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
+#define SLJIT_LOCALS_OFFSET_BASE (8 * (sljit_s32)sizeof(sljit_sw))
 #define SLJIT_PREF_SHIFT_REG SLJIT_R2
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_IGNORED 1
+#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 
@@ -610,11 +624,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #else /* _WIN64 */
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE (4 * SSIZE_OF(sw))
+#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
 #endif /* !_WIN64 */
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_PREF_SHIFT_REG SLJIT_R3
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_IGNORED 1
+#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
 
@@ -624,6 +643,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 
 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
@@ -634,9 +656,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R0
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_IGNORED 1
+#define SLJIT_UPPER_BITS_ZERO_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 
@@ -646,6 +673,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
 #define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
 #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
@@ -654,6 +684,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #else
 #define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw))
 #endif /* SLJIT_CONFIG_PPC_64 || _AIX */
+#define SLJIT_UPPER_BITS_IGNORED 1
 
 #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
 
@@ -670,8 +701,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #endif
 #define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
 
@@ -681,9 +716,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_SEPARATE_VECTOR_REGISTERS 1
+#define SLJIT_NUMBER_OF_VECTOR_REGISTERS 30
+#define SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS 0
+#define SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS 2
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
+#define SLJIT_TMP_DEST_VREG SLJIT_TMP_VR0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_IGNORED 1
+#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 
@@ -714,8 +759,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R0
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R2
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
 #define SLJIT_MASKED_SHIFT 1
+#define SLJIT_UPPER_BITS_IGNORED 1
+#define SLJIT_UPPER_BITS_PRESERVED 1
 
 #elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)
 
@@ -725,9 +775,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
+#define SLJIT_TMP_DEST_REG SLJIT_TMP_R1
+#define SLJIT_TMP_MEM_REG SLJIT_TMP_R1
+#define SLJIT_TMP_DEST_FREG SLJIT_TMP_FR0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 #define SLJIT_MASKED_SHIFT 1
 #define SLJIT_MASKED_SHIFT32 1
+#define SLJIT_UPPER_BITS_SIGN_EXTENDED 1
 
 #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
 
@@ -738,10 +792,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
 #define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0
+#define SLJIT_TMP_DEST_REG 0
+#define SLJIT_TMP_MEM_REG 0
+#define SLJIT_TMP_DEST_FREG 0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 
 #endif
 
+#if !(defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
+#define SLJIT_NUMBER_OF_VECTOR_REGISTERS (SLJIT_NUMBER_OF_FLOAT_REGISTERS)
+#define SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
+#define SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS (SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)
+#define SLJIT_TMP_DEST_VREG (SLJIT_TMP_DEST_FREG)
+#endif /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
+
 #define SLJIT_LOCALS_OFFSET (SLJIT_LOCALS_OFFSET_BASE)
 
 #define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
@@ -750,12 +814,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
 	(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
 
+#define SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS \
+	(SLJIT_NUMBER_OF_VECTOR_REGISTERS - SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS)
+
+#if (defined SLJIT_UPPER_BITS_ZERO_EXTENDED && SLJIT_UPPER_BITS_ZERO_EXTENDED) \
+	+ (defined SLJIT_UPPER_BITS_SIGN_EXTENDED && SLJIT_UPPER_BITS_SIGN_EXTENDED) \
+	+ (defined SLJIT_UPPER_BITS_PRESERVED && SLJIT_UPPER_BITS_PRESERVED) > 1
+#error "Invalid upper bits defintion"
+#endif
+
+#if (defined SLJIT_UPPER_BITS_PRESERVED && SLJIT_UPPER_BITS_PRESERVED) \
+	&& !(defined SLJIT_UPPER_BITS_IGNORED && SLJIT_UPPER_BITS_IGNORED)
+#error "Upper bits preserved requires bits ignored"
+#endif
+
 /**********************************/
 /* Temporary register management. */
 /**********************************/
 
 #define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2)
 #define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
+#define SLJIT_TMP_VREGISTER_BASE (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 1)
 
 /* WARNING: Accessing temporary registers is not recommended, because they
    are also used by the JIT compiler for various computations. Using them
@@ -789,6 +868,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_TMP_FR9		(SLJIT_TMP_FREGISTER_BASE + 9)
 #define SLJIT_TMP_FR(i)		(SLJIT_TMP_FREGISTER_BASE + (i))
 
+#define SLJIT_TMP_VR0		(SLJIT_TMP_VREGISTER_BASE + 0)
+#define SLJIT_TMP_VR1		(SLJIT_TMP_VREGISTER_BASE + 1)
+#define SLJIT_TMP_VR2		(SLJIT_TMP_VREGISTER_BASE + 2)
+#define SLJIT_TMP_VR3		(SLJIT_TMP_VREGISTER_BASE + 3)
+#define SLJIT_TMP_VR4		(SLJIT_TMP_VREGISTER_BASE + 4)
+#define SLJIT_TMP_VR5		(SLJIT_TMP_VREGISTER_BASE + 5)
+#define SLJIT_TMP_VR6		(SLJIT_TMP_VREGISTER_BASE + 6)
+#define SLJIT_TMP_VR7		(SLJIT_TMP_VREGISTER_BASE + 7)
+#define SLJIT_TMP_VR8		(SLJIT_TMP_VREGISTER_BASE + 8)
+#define SLJIT_TMP_VR9		(SLJIT_TMP_VREGISTER_BASE + 9)
+#define SLJIT_TMP_VR(i)		(SLJIT_TMP_VREGISTER_BASE + (i))
+
 /********************************/
 /* CPU status flags management. */
 /********************************/

File diff suppressed because it is too large
+ 410 - 215
thirdparty/pcre2/deps/sljit/sljit_src/sljitLir.c


File diff suppressed because it is too large
+ 404 - 147
thirdparty/pcre2/deps/sljit/sljit_src/sljitLir.h


File diff suppressed because it is too large
+ 385 - 194
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_32.c


File diff suppressed because it is too large
+ 388 - 174
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_64.c


File diff suppressed because it is too large
+ 381 - 168
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_T2_32.c


File diff suppressed because it is too large
+ 361 - 195
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c


+ 3 - 3
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_32.c

@@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_ins f64_hi = TA(6), f64_lo = TA(7);
 #endif /* SLJIT_LITTLE_ENDIAN */
 
-	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
+	SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12);
 
 	arg_types >>= SLJIT_ARG_SHIFT;
 
@@ -370,7 +370,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	} else if (type & SLJIT_CALL_RETURN)
 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
 
-	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25);
 
 	if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
 		jump->flags |= IS_MOVABLE;
@@ -441,7 +441,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 		return sljit_emit_ijump(compiler, type, src, srcw);
 	}
 
-	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25);
 
 	if (src == SLJIT_IMM)
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));

+ 3 - 3
thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_64.c

@@ -225,7 +225,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_ins prev_ins = *ins_ptr;
 	sljit_ins ins = NOP;
 
-	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
+	SLJIT_ASSERT(reg_map[TMP_REG2] == 4 && freg_map[TMP_FREG1] == 12);
 
 	arg_types >>= SLJIT_ARG_SHIFT;
 
@@ -309,7 +309,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
 		PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
 
-	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25);
 
 	if (ins == NOP && compiler->delay_slot != UNMOVABLE_INS)
 		jump->flags |= IS_MOVABLE;
@@ -366,7 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 		return sljit_emit_ijump(compiler, type, src, srcw);
 	}
 
-	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
+	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG1);
 
 	if (src == SLJIT_IMM)
 		FAIL_IF(load_immediate(compiler, DR(PIC_ADDR_REG), srcw));

+ 264 - 154
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeMIPS_common.c

@@ -83,7 +83,7 @@ typedef sljit_u32 sljit_ins;
 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
 
 /* For position independent code, t9 must contain the function address. */
-#define PIC_ADDR_REG	TMP_REG2
+#define PIC_ADDR_REG	TMP_REG1
 
 /* Floating point status register. */
 #define FCSR_REG	31
@@ -95,7 +95,7 @@ typedef sljit_u32 sljit_ins;
 #define OTHER_FLAG	1
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
-	0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31, 3, 1
+	0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 25, 4, 31, 3, 1
 };
 
 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
@@ -249,6 +249,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
 #define LDL		(HI(26))
 #define LDR		(HI(27))
 #define LDC1		(HI(53))
+#define LL		(HI(48))
+#define LLD		(HI(52))
 #define LUI		(HI(15))
 #define LW		(HI(35))
 #define LWL		(HI(34))
@@ -288,6 +290,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
 #define ROTR		(HI(0) | (1 << 21) | LO(2))
 #define ROTRV		(HI(0) | (1 << 6) | LO(6))
 #endif /* SLJIT_MIPS_REV >= 2 */
+#define SC		(HI(56))
+#define SCD		(HI(60))
 #define SD		(HI(63))
 #define SDL		(HI(44))
 #define SDR		(HI(45))
@@ -308,6 +312,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
 #define SWL		(HI(42))
 #define SWR		(HI(46))
 #define SWC1		(HI(57))
+#define SYNC		(HI(0) | LO(15))
 #define TRUNC_W_S	(HI(17) | FMT_S | LO(13))
 #if defined(SLJIT_MIPS_REV) && SLJIT_MIPS_REV >= 2
 #define WSBH		(HI(31) | (2 << 6) | LO(32))
@@ -381,11 +386,21 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s
 	if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
 		fr -= SLJIT_F64_SECOND(0);
 
-	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
-		|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
+	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches))
+		|| (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0)
 		|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
 }
 
+static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type)
+{
+	SLJIT_UNUSED_ARG(compiler);
+	SLJIT_UNUSED_ARG(vr);
+	SLJIT_UNUSED_ARG(type);
+
+	/* SIMD is not supported. */
+	return 0;
+}
+
 #endif /* SLJIT_CONFIG_MIPS_32 && SLJIT_ARGUMENT_CHECKS */
 
 static void get_cpu_features(void)
@@ -504,7 +519,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
 	if (jump->flags & JUMP_ADDR)
 		target_addr = jump->u.target;
 	else {
-		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
+		SLJIT_ASSERT(jump->u.label != NULL);
 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
 	}
 
@@ -635,75 +650,66 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_
 
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 
-static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
+static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
 {
-	if (max_label < 0x80000000l) {
-		put_label->flags = PATCH_ABS32;
+	sljit_uw addr;
+	SLJIT_UNUSED_ARG(executable_offset);
+
+	if (jump->flags & JUMP_ADDR)
+		addr = jump->u.target;
+	else
+		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
+
+	if (addr < 0x80000000l) {
+		jump->flags |= PATCH_ABS32;
 		return 1;
 	}
 
-	if (max_label < 0x800000000000l) {
-		put_label->flags = PATCH_ABS48;
+	if (addr < 0x800000000000l) {
+		jump->flags |= PATCH_ABS48;
 		return 3;
 	}
 
-	put_label->flags = 0;
 	return 5;
 }
 
 #endif /* SLJIT_CONFIG_MIPS_64 */
 
-static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg)
+static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump)
 {
-	struct sljit_jump *jump;
-	struct sljit_put_label *put_label;
-	sljit_uw flags;
-	sljit_ins *inst;
-	sljit_uw addr;
-
-	if (reg != 0) {
-		jump = (struct sljit_jump*)dst;
-		flags = jump->flags;
-		inst = (sljit_ins*)jump->addr;
-		addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
-	} else {
-		put_label = (struct sljit_put_label*)dst;
-#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-		flags = put_label->flags;
-#endif
-		inst = (sljit_ins*)put_label->addr;
-		addr = put_label->label->addr;
-		reg = *inst;
-	}
+	sljit_uw flags = jump->flags;
+	sljit_ins *ins = (sljit_ins*)jump->addr;
+	sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
+	sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : PIC_ADDR_REG;
 
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-	inst[0] = LUI | T(reg) | IMM(addr >> 16);
+	ins[0] = LUI | T(reg) | IMM(addr >> 16);
 #else /* !SLJIT_CONFIG_MIPS_32 */
 	if (flags & PATCH_ABS32) {
 		SLJIT_ASSERT(addr < 0x80000000l);
-		inst[0] = LUI | T(reg) | IMM(addr >> 16);
+		ins[0] = LUI | T(reg) | IMM(addr >> 16);
 	}
 	else if (flags & PATCH_ABS48) {
 		SLJIT_ASSERT(addr < 0x800000000000l);
-		inst[0] = LUI | T(reg) | IMM(addr >> 32);
-		inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
-		inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
-		inst += 2;
+		ins[0] = LUI | T(reg) | IMM(addr >> 32);
+		ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
+		ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		ins += 2;
 	}
 	else {
-		inst[0] = LUI | T(reg) | IMM(addr >> 48);
-		inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff);
-		inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
-		inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
-		inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16);
-		inst += 4;
+		ins[0] = LUI | T(reg) | IMM(addr >> 48);
+		ins[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff);
+		ins[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		ins[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
+		ins[4] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		ins += 4;
 	}
 #endif /* SLJIT_CONFIG_MIPS_32 */
 
-	inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff);
+	ins[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff);
 }
 
-SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
 {
 	struct sljit_memory_fragment *buf;
 	sljit_ins *code;
@@ -711,77 +717,76 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_ins *buf_ptr;
 	sljit_ins *buf_end;
 	sljit_uw word_count;
-	sljit_uw next_addr;
+	SLJIT_NEXT_DEFINE_TYPES;
 	sljit_sw executable_offset;
 	sljit_uw addr;
-
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
-	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
 	reverse_buf(compiler);
 
-	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data);
+	code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
 	PTR_FAIL_WITH_EXEC_IF(code);
 	buf = compiler->buf;
 
 	code_ptr = code;
 	word_count = 0;
-	next_addr = 0;
-	executable_offset = SLJIT_EXEC_OFFSET(code);
-
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
-	put_label = compiler->put_labels;
+	SLJIT_NEXT_INIT_TYPES();
+	SLJIT_GET_NEXT_MIN();
 
 	do {
 		buf_ptr = (sljit_ins*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 2);
 		do {
 			*code_ptr = *buf_ptr++;
-			if (next_addr == word_count) {
+			if (next_min_addr == word_count) {
 				SLJIT_ASSERT(!label || label->size >= word_count);
 				SLJIT_ASSERT(!jump || jump->addr >= word_count);
 				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
 
 				/* These structures are ordered by their address. */
-				if (label && label->size == word_count) {
-					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+				if (next_min_addr == next_label_size) {
+					label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
 					label->size = (sljit_uw)(code_ptr - code);
 					label = label->next;
+					next_label_size = SLJIT_GET_NEXT_SIZE(label);
 				}
-				if (jump && jump->addr == word_count) {
+
+				if (next_min_addr == next_jump_addr) {
+					if (!(jump->flags & JUMP_MOV_ADDR)) {
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-					word_count += 2;
-#else
-					word_count += 6;
-#endif
-					jump->addr = (sljit_uw)(code_ptr - 1);
-					code_ptr = detect_jump_type(jump, code, executable_offset);
+						word_count += 2;
+#else /* !SLJIT_CONFIG_MIPS_32 */
+						word_count += 6;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+						jump->addr = (sljit_uw)(code_ptr - 1);
+						code_ptr = detect_jump_type(jump, code, executable_offset);
+					} else {
+						jump->addr = (sljit_uw)code_ptr;
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+						code_ptr += 1;
+						word_count += 1;
+#else /* !SLJIT_CONFIG_MIPS_32 */
+						code_ptr += mov_addr_get_length(jump, code, executable_offset);
+						word_count += 5;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+					}
+
 					jump = jump->next;
-				}
-				if (const_ && const_->addr == word_count) {
+					next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
+				} else if (next_min_addr == next_const_addr) {
 					const_->addr = (sljit_uw)code_ptr;
 					const_ = const_->next;
+					next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
 				}
-				if (put_label && put_label->addr == word_count) {
-					SLJIT_ASSERT(put_label->label);
-					put_label->addr = (sljit_uw)code_ptr;
-#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-					code_ptr += 1;
-					word_count += 1;
-#else
-					code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
-					word_count += 5;
-#endif
-					put_label = put_label->next;
-				}
-				next_addr = compute_next_addr(label, jump, const_, put_label);
+
+				SLJIT_GET_NEXT_MIN();
 			}
 			code_ptr++;
 			word_count++;
@@ -791,7 +796,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	} while (buf);
 
 	if (label && label->size == word_count) {
-		label->addr = (sljit_uw)code_ptr;
+		label->u.addr = (sljit_uw)code_ptr;
 		label->size = (sljit_uw)(code_ptr - code);
 		label = label->next;
 	}
@@ -799,13 +804,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
-	SLJIT_ASSERT(!put_label);
 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
 
 	jump = compiler->jumps;
 	while (jump) {
 		do {
-			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+			addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
 			buf_ptr = (sljit_ins *)jump->addr;
 
 			if (jump->flags & PATCH_B) {
@@ -821,15 +825,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 				break;
 			}
 
-			load_addr_to_reg(jump, PIC_ADDR_REG);
+			load_addr_to_reg(jump);
 		} while (0);
-		jump = jump->next;
-	}
 
-	put_label = compiler->put_labels;
-	while (put_label) {
-		load_addr_to_reg(put_label, 0);
-		put_label = put_label->next;
+		jump = jump->next;
 	}
 
 	compiler->error = SLJIT_ERR_COMPILED;
@@ -873,6 +872,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 	case SLJIT_HAS_CLZ:
 	case SLJIT_HAS_CMOV:
 	case SLJIT_HAS_PREFETCH:
+	case SLJIT_HAS_ATOMIC:
+	case SLJIT_HAS_MEMORY_BARRIER:
 		return 1;
 
 	case SLJIT_HAS_CTZ:
@@ -932,9 +933,9 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size, sljit_ins *ins_ptr);
 
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define SELECT_OP(a, b)	(b)
+#define SELECT_OP(d, w)	(w)
 #else
-#define SELECT_OP(a, b)	(!(op & SLJIT_32) ? a : b)
+#define SELECT_OP(d, w)	(!(op & SLJIT_32) ? (d) : (w))
 #endif
 
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -944,17 +945,22 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 #endif
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
+	sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
+	sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
 	sljit_ins base;
 	sljit_s32 i, tmp, offset;
 	sljit_s32 arg_count, word_arg_count, float_arg_count;
 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
 
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
+	saveds = ENTER_GET_REGS(saveds);
 
 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1001,9 +1007,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 		offset = local_size - SSIZE_OF(sw);
 	} else {
 		FAIL_IF(load_immediate(compiler, OTHER_FLAG, local_size));
-		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
 		FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | TA(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
-		base = S(TMP_REG2);
+		base = S(TMP_REG1);
 		offset = -SSIZE_OF(sw);
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 		local_size = 0;
@@ -1154,12 +1160,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
+	sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
+	sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
+
 	CHECK_ERROR();
-	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
+	saveds = ENTER_GET_REGS(saveds);
 
 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -1212,8 +1224,8 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
 		if (tmp < frame_size)
 			tmp = frame_size;
 
-		FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size - tmp));
-		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
+		FAIL_IF(load_immediate(compiler, DR(TMP_REG2), local_size - tmp));
+		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | T(TMP_REG2) | D(SLJIT_SP), DR(SLJIT_SP)));
 		local_size = tmp;
 	}
 
@@ -1711,7 +1723,7 @@ static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit
 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
 	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
 {
-	sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled;
+	sljit_s32 is_overflow, is_carry, carry_src_ar, is_handled, reg;
 	sljit_ins op_imm, op_v;
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 	sljit_ins ins, op_dimm, op_dimm32, op_dv;
@@ -1963,8 +1975,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			is_handled = 1;
 
 			if (flags & SRC2_IMM) {
-				FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2)));
-				src2 = TMP_REG2;
+				reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
+				FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(reg) | IMM(src2), DR(reg)));
+				src2 = reg;
 				flags &= ~SRC2_IMM;
 			}
 
@@ -2283,7 +2296,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 	sljit_s32 dst_r = TMP_REG2;
 	sljit_s32 src1_r;
 	sljit_sw src2_r = 0;
-	sljit_s32 sugg_src2_r = TMP_REG2;
+	sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
 
 	if (!(flags & ALT_KEEP_CACHE)) {
 		compiler->cache_arg = 0;
@@ -2299,7 +2312,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 		dst_r = dst;
 		flags |= REG_DEST;
 		if (flags & MOVE_OP)
-			sugg_src2_r = dst_r;
+			src2_tmp_reg = dst_r;
 	}
 	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, DR(TMP_REG1), dst, dstw))
 		flags |= SLOW_DEST;
@@ -2351,8 +2364,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 	else if (src2 == SLJIT_IMM) {
 		if (!(flags & SRC2_IMM)) {
 			if (src2w) {
-				FAIL_IF(load_immediate(compiler, DR(sugg_src2_r), src2w));
-				src2_r = sugg_src2_r;
+				FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w));
+				src2_r = src2_tmp_reg;
 			}
 			else {
 				src2_r = 0;
@@ -2366,16 +2379,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 		}
 	}
 	else {
-		if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w))
+		if (getput_arg_fast(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w))
 			FAIL_IF(compiler->error);
 		else
 			flags |= SLOW_SRC2;
-		src2_r = sugg_src2_r;
+		src2_r = src2_tmp_reg;
 	}
 
 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
 		SLJIT_ASSERT(src2_r == TMP_REG2);
-		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+		if ((flags & SLOW_DEST) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG2), src2, src2w, src1, src1w));
 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
 		}
@@ -2387,7 +2400,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 	else if (flags & SLOW_SRC1)
 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(TMP_REG1), src1, src1w, dst, dstw));
 	else if (flags & SLOW_SRC2)
-		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(sugg_src2_r), src2, src2w, dst, dstw));
+		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, DR(src2_tmp_reg), src2, src2w, dst, dstw));
 
 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
 
@@ -2477,6 +2490,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
 		return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
 #endif /* SLJIT_MIPS_REV >= 6 */
+	case SLJIT_MEMORY_BARRIER:
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+		return push_inst(compiler, SYNC, UNMOVABLE_INS);
+#else /* SLJIT_MIPS_REV < 1 */
+		return SLJIT_ERR_UNSUPPORTED;
+#endif /* SLJIT_MIPS_REV >= 1 */
 	case SLJIT_ENDBR:
 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
 		return SLJIT_SUCCESS;
@@ -2659,12 +2678,28 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
 
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 #define SELECT_OP3(op, src2w, D, D32, W) (((op & SLJIT_32) ? (W) : ((src2w) < 32) ? (D) : (D32)) | (((sljit_ins)src2w & 0x1f) << 6))
-#define SELECT_OP2(op, D, W) ((op & SLJIT_32) ? (W) : (D))
 #else /* !SLJIT_CONFIG_MIPS_64 */
 #define SELECT_OP3(op, src2w, D, D32, W) ((W) | ((sljit_ins)(src2w) << 6))
-#define SELECT_OP2(op, D, W) (W)
 #endif /* SLJIT_CONFIG_MIPS_64 */
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst_reg,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MULADD:
+		SLJIT_SKIP_CHECKS(compiler);
+		FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
+		return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst_reg) | T(TMP_REG2) | D(dst_reg), DR(dst_reg));
+	}
+
+	return SLJIT_SUCCESS;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 dst_reg,
 	sljit_s32 src1_reg,
@@ -2718,18 +2753,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
 		FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src3, src3w));
 		src3 = TMP_REG2;
 	} else if (dst_reg == src3) {
-		FAIL_IF(push_inst(compiler, SELECT_OP2(op, DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src3) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
 		src3 = TMP_REG2;
 	}
 
 	if (is_left) {
-		ins1 = SELECT_OP2(op, DSRL, SRL);
-		ins2 = SELECT_OP2(op, DSLLV, SLLV);
-		ins3 = SELECT_OP2(op, DSRLV, SRLV);
+		ins1 = SELECT_OP(DSRL, SRL);
+		ins2 = SELECT_OP(DSLLV, SLLV);
+		ins3 = SELECT_OP(DSRLV, SRLV);
 	} else {
-		ins1 = SELECT_OP2(op, DSLL, SLL);
-		ins2 = SELECT_OP2(op, DSRLV, SRLV);
-		ins3 = SELECT_OP2(op, DSLLV, SLLV);
+		ins1 = SELECT_OP(DSLL, SLL);
+		ins2 = SELECT_OP(DSRLV, SRLV);
+		ins3 = SELECT_OP(DSLLV, SLLV);
 	}
 
 	FAIL_IF(push_inst(compiler, ins2 | S(src3) | T(src1_reg) | D(dst_reg), DR(dst_reg)));
@@ -2739,14 +2774,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *
 		FAIL_IF(push_inst(compiler, XORI | S(src3) | T(TMP_REG2) | ((sljit_ins)bit_length - 1), DR(TMP_REG2)));
 		src2_reg = TMP_REG1;
 	} else
-		FAIL_IF(push_inst(compiler, SELECT_OP2(op, DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2)));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(0) | T(src3) | D(TMP_REG2), DR(TMP_REG2)));
 
 	FAIL_IF(push_inst(compiler, ins3 | S(TMP_REG2) | T(src2_reg) | D(TMP_REG1), DR(TMP_REG1)));
 	return push_inst(compiler, OR | S(dst_reg) | T(TMP_REG1) | D(dst_reg), DR(dst_reg));
 }
 
 #undef SELECT_OP3
-#undef SELECT_OP2
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw)
@@ -3103,7 +3137,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
 	switch (GET_OPCODE(op)) {
 	case SLJIT_MOV_F64:
 		if (src != dst_r) {
-			if (dst_r != TMP_FREG1)
+			if (!(dst & SLJIT_MEM))
 				FAIL_IF(push_inst(compiler, MOV_fmt(FMT(op)) | FS(src) | FD(dst_r), MOVABLE_INS));
 			else
 				dst_r = src;
@@ -3162,11 +3196,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
 	}
 
 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
-		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
+		if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, src1, src1w));
 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, dst, dstw));
-		}
-		else {
+		} else {
 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG1), src1, src1w, src2, src2w));
 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, FR(TMP_FREG2), src2, src2w, dst, dstw));
 		}
@@ -3313,6 +3346,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 	case SLJIT_SIG_GREATER:
 	case SLJIT_OVERFLOW:
 	case SLJIT_CARRY:
+	case SLJIT_ATOMIC_STORED:
 		BR_Z(OTHER_FLAG);
 		break;
 	case SLJIT_GREATER_EQUAL:
@@ -3321,6 +3355,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 	case SLJIT_SIG_LESS_EQUAL:
 	case SLJIT_NOT_OVERFLOW:
 	case SLJIT_NOT_CARRY:
+	case SLJIT_ATOMIC_NOT_STORED:
 		BR_NZ(OTHER_FLAG);
 		break;
 	case SLJIT_F_NOT_EQUAL:
@@ -3361,10 +3396,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 		PTR_FAIL_IF(push_inst(compiler, inst, UNMOVABLE_INS));
 
 	if (type <= SLJIT_JUMP)
-		PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 	else {
 		jump->flags |= IS_JAL;
-		PTR_FAIL_IF(push_inst(compiler, JALR | S(TMP_REG2) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
 	}
 
 	jump->addr = compiler->size;
@@ -3392,8 +3427,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 #define RESOLVE_IMM2() \
 	if (src2 == SLJIT_IMM) { \
 		if (src2w) { \
-			PTR_FAIL_IF(load_immediate(compiler, DR(TMP_REG2), src2w)); \
-			src2 = TMP_REG2; \
+			PTR_FAIL_IF(load_immediate(compiler, DR(src2_tmp_reg), src2w)); \
+			src2 = src2_tmp_reg; \
 		} \
 		else \
 			src2 = 0; \
@@ -3406,6 +3441,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
 	struct sljit_jump *jump;
 	sljit_s32 flags;
 	sljit_ins inst;
+	sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
@@ -3426,8 +3462,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
 	}
 
 	if (src2 & SLJIT_MEM) {
-		PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(TMP_REG2), src2, src2w, 0, 0));
-		src2 = TMP_REG2;
+		PTR_FAIL_IF(emit_op_mem2(compiler, flags, DR(src2_tmp_reg), src2, src2w, 0, 0));
+		src2 = src2_tmp_reg;
 	}
 
 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
@@ -3515,7 +3551,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler
 		PTR_FAIL_IF(push_inst(compiler, (type == SLJIT_EQUAL ? BNE : BEQ) | S(TMP_REG1) | TA(0) | BRANCH_LENGTH, UNMOVABLE_INS));
 	}
 
-	PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS));
+	PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 	jump->addr = compiler->size;
 	PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
 
@@ -3553,11 +3589,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 		if (compiler->delay_slot != UNMOVABLE_INS)
 			jump->flags |= IS_MOVABLE;
 
-		src = TMP_REG2;
+		src = PIC_ADDR_REG;
 	} else if (src & SLJIT_MEM) {
 		ADJUST_LOCAL_OFFSET(src, srcw);
-		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(TMP_REG2), src, srcw));
-		src = TMP_REG2;
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
+		src = PIC_ADDR_REG;
 	}
 
 	if (type <= SLJIT_JUMP)
@@ -3755,8 +3791,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp
 
 #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
 	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG2), src1, src1w));
-		src1 = TMP_REG2;
+		FAIL_IF(emit_op_mem(compiler, inp_flags, DR(TMP_REG1), src1, src1w));
+		src1 = TMP_REG1;
 	} else if (src1 == SLJIT_IMM) {
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 		if (type & SLJIT_32)
@@ -3784,13 +3820,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *comp
 			type ^= 0x1;
 		} else {
 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
-				FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
+				FAIL_IF(push_inst(compiler, ADDU_W | S(dst_reg) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
 
 				if ((src1 & REG_MASK) == dst_reg)
-					src1 = (src1 & ~REG_MASK) | TMP_REG2;
+					src1 = (src1 & ~REG_MASK) | TMP_REG1;
 
 				if (OFFS_REG(src1) == dst_reg)
-					src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2);
+					src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
 			}
 
 			FAIL_IF(push_inst(compiler, mov_ins | S(src2_reg) | TA(0) | D(dst_reg), DR(dst_reg)));
@@ -3847,8 +3883,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *com
 
 #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1 && SLJIT_MIPS_REV < 6)
 	if (src1 & SLJIT_MEM) {
-		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG1), src1, src1w));
-		src1 = TMP_FREG1;
+		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, FR(TMP_FREG2), src1, src1w));
+		src1 = TMP_FREG2;
 	}
 
 	return push_inst(compiler, get_select_cc(type, 1) | FMT(type) | FS(src1) | FD(dst_freg), MOVABLE_INS);
@@ -4209,6 +4245,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
 
 #undef TO_ARGW_HI
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 dst_reg,
+	sljit_s32 mem_reg)
+{
+	sljit_ins ins;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+	if (op & SLJIT_ATOMIC_USE_CAS)
+		return SLJIT_ERR_UNSUPPORTED;
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MOV:
+	case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+		ins = LLD;
+		break;
+#endif /* SLJIT_CONFIG_MIPS_64 */
+	case SLJIT_MOV_S32:
+	case SLJIT_MOV32:
+		ins = LL;
+		break;
+
+	default:
+		return SLJIT_ERR_UNSUPPORTED;
+	}
+
+	if (op & SLJIT_ATOMIC_TEST)
+		return SLJIT_SUCCESS;
+
+	return push_inst(compiler, ins | T(dst_reg) | S(mem_reg), DR(dst_reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src_reg,
+	sljit_s32 mem_reg,
+	sljit_s32 temp_reg)
+{
+	sljit_ins ins;
+
+	/* temp_reg == mem_reg is undefined so use another temp register */
+	SLJIT_UNUSED_ARG(temp_reg);
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+	if (op & SLJIT_ATOMIC_USE_CAS)
+		return SLJIT_ERR_UNSUPPORTED;
+
+	switch (GET_OPCODE(op)) {
+	case SLJIT_MOV:
+	case SLJIT_MOV_P:
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+		ins = SCD;
+		break;
+#endif /* SLJIT_CONFIG_RISCV_64 */
+	case SLJIT_MOV_S32:
+	case SLJIT_MOV32:
+		op |= SLJIT_32;
+		ins = SC;
+		break;
+
+	default:
+		return SLJIT_ERR_UNSUPPORTED;
+	}
+
+	if (op & SLJIT_ATOMIC_TEST)
+		return SLJIT_SUCCESS;
+
+	FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src_reg) | TA(0) | DA(OTHER_FLAG), OTHER_FLAG));
+	return push_inst(compiler, ins | TA(OTHER_FLAG) | S(mem_reg), OTHER_FLAG);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
 	struct sljit_const *const_;
@@ -4231,18 +4341,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	return const_;
 }
 
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
 {
-	struct sljit_put_label *put_label;
+	struct sljit_jump *jump;
 	sljit_s32 dst_r;
 
 	CHECK_ERROR_PTR();
-	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 
-	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
-	PTR_FAIL_IF(!put_label);
-	set_put_label(put_label, compiler, 0);
+	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+	PTR_FAIL_IF(!jump);
+	set_mov_addr(jump, compiler, 0);
 
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
 	PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r, UNMOVABLE_INS));
@@ -4255,5 +4365,5 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct slj
 	if (dst & SLJIT_MEM)
 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, DR(TMP_REG2), dst, dstw));
 
-	return put_label;
+	return jump;
 }

+ 0 - 0
thirdparty/pcre2/src/sljit/sljitNativePPC_32.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_32.c


+ 0 - 0
thirdparty/pcre2/src/sljit/sljitNativePPC_64.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_64.c


File diff suppressed because it is too large
+ 361 - 256
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativePPC_common.c


+ 0 - 0
thirdparty/pcre2/src/sljit/sljitNativeRISCV_32.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_32.c


+ 0 - 0
thirdparty/pcre2/src/sljit/sljitNativeRISCV_64.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_64.c


File diff suppressed because it is too large
+ 409 - 176
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_common.c


File diff suppressed because it is too large
+ 293 - 283
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c


+ 102 - 36
thirdparty/pcre2/src/sljit/sljitNativeX86_32.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_32.c

@@ -283,28 +283,25 @@ static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw
 /*  Enter / return                                                       */
 /* --------------------------------------------------------------------- */
 
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
+static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
 {
 	sljit_uw type = jump->flags >> TYPE_SHIFT;
 
 	if (type == SLJIT_JUMP) {
 		*code_ptr++ = JMP_i32;
-		jump->addr++;
-	}
-	else if (type >= SLJIT_FAST_CALL) {
+	} else if (type >= SLJIT_FAST_CALL) {
 		*code_ptr++ = CALL_i32;
-		jump->addr++;
-	}
-	else {
+	} else {
 		*code_ptr++ = GROUP_0F;
 		*code_ptr++ = get_jump_code(type);
-		jump->addr += 2;
 	}
 
-	if (jump->flags & JUMP_LABEL)
-		jump->flags |= PATCH_MW;
-	else
+	jump->addr = (sljit_uw)code_ptr;
+
+	if (jump->flags & JUMP_ADDR)
 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
+	else
+		jump->flags |= PATCH_MW;
 	code_ptr += 4;
 
 	return code_ptr;
@@ -314,8 +311,8 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
 #define ENTER_TMP_TO_S		0x00002
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
 	sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
 	sljit_s32 size, args_size, types, status;
@@ -326,8 +323,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 #endif
 
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
 
 	/* Emit ENDBR32 at function entry if needed.  */
 	FAIL_IF(emit_endbranch(compiler));
@@ -539,14 +538,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
 	sljit_s32 args_size;
 
 	CHECK_ERROR();
-	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
 
 	arg_types >>= SLJIT_ARG_SHIFT;
 	args_size = 0;
@@ -1249,6 +1250,68 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
 /*  Other operations                                                     */
 /* --------------------------------------------------------------------- */
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
+	sljit_s32 dst_reg,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2_reg)
+{
+	sljit_s32 dst = dst_reg;
+	sljit_sw dstw = 0;
+	sljit_sw src2w = 0;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+	ADJUST_LOCAL_OFFSET(src1, src1w);
+
+	CHECK_EXTRA_REGS(dst, dstw, (void)0);
+	CHECK_EXTRA_REGS(src1, src1w, (void)0);
+	CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
+
+	type &= ~SLJIT_32;
+
+	if (dst & SLJIT_MEM) {
+		if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {
+			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+			src1 = src2_reg;
+			src1w = src2w;
+			type ^= 0x1;
+		} else
+			EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
+
+		dst_reg = TMP_REG1;
+	} else {
+		if (dst_reg != src2_reg) {
+			if (dst_reg == src1) {
+				src1 = src2_reg;
+				src1w = src2w;
+				type ^= 0x1;
+			} else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+				EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
+				src1 = src2_reg;
+				src1w = src2w;
+				type ^= 0x1;
+			} else
+				EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w);
+		}
+	}
+
+	if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && (src1 != SLJIT_IMM || dst_reg != TMP_REG1)) {
+		if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
+			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
+			src1 = TMP_REG1;
+			src1w = 0;
+		}
+
+		FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w));
+	} else
+		FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w));
+
+	if (dst & SLJIT_MEM)
+		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
+	return SLJIT_SUCCESS;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 reg,
 	sljit_s32 mem, sljit_sw memw)
@@ -1449,10 +1512,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *comp
 
 	if (u.imm == 0) {
 		inst[2] = PXOR_x_xm;
-		inst[3] = U8(freg | (freg << 3) | MOD_REG);
+		inst[3] = U8(freg_map[freg] | (freg_map[freg] << 3) | MOD_REG);
 	} else {
 		inst[2] = MOVD_x_rm;
-		inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG);
+		inst[3] = U8(reg_map[TMP_REG1] | (freg_map[freg] << 3) | MOD_REG);
 	}
 
 	return SLJIT_SUCCESS;
@@ -1462,7 +1525,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp
 	sljit_s32 freg, sljit_f64 value)
 {
 	sljit_u8 *inst;
-	sljit_s32 tmp_freg = freg;
 	union {
 		sljit_s32 imm[2];
 		sljit_f64 value;
@@ -1478,8 +1540,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp
 			return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
 
 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
-	} else
+	} else {
+		SLJIT_ASSERT(cpu_feature_list != 0);
+
+		if (!(cpu_feature_list & CPU_FEATURE_SSE41) && u.imm[1] != 0 && u.imm[0] != u.imm[1]) {
+			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, u.imm[0]);
+			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, u.imm[1]);
+
+			return emit_groupf(compiler, MOVLPD_x_m | EX86_SSE2, freg, SLJIT_MEM1(SLJIT_SP), 0);
+		}
+
 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);
+	}
 
 	FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));
 
@@ -1493,23 +1565,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp
 
 		inst[0] = GROUP_0F;
 		inst[1] = SHUFPS_x_xm;
-		inst[2] = U8(MOD_REG | (freg << 3) | freg);
+		inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]);
 		inst[3] = 0x51;
 		return SLJIT_SUCCESS;
 	}
 
 	if (u.imm[0] != u.imm[1]) {
-		SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0);
-
+		SLJIT_ASSERT(cpu_feature_list & CPU_FEATURE_SSE41);
 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
 
-		if (cpu_feature_list & CPU_FEATURE_SSE41) {
-			FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
-			return emit_byte(compiler, 1);
-		}
-
-		FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0));
-		tmp_freg = TMP_FREG;
+		FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
+		return emit_byte(compiler, 1);
 	}
 
 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
@@ -1518,7 +1584,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *comp
 
 	inst[0] = GROUP_0F;
 	inst[1] = UNPCKLPS_x_xm;
-	inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg);
+	inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]);
 	return SLJIT_SUCCESS;
 }
 
@@ -1581,7 +1647,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi
 		inst[0] = GROUP_66;
 		inst[1] = GROUP_0F;
 		inst[2] = PSHUFD_x_xm;
-		inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg);
+		inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg_map[freg]);
 		inst[4] = 1;
 	} else if (reg != 0)
 		FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
@@ -1597,7 +1663,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compi
 
 		inst[0] = GROUP_0F;
 		inst[1] = UNPCKLPS_x_xm;
-		inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG));
+		inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[reg == 0 ? freg : TMP_FREG]);
 	} else
 		FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
 

+ 114 - 53
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_64.c

@@ -358,26 +358,28 @@ static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw
 /*  Enter / return                                                       */
 /* --------------------------------------------------------------------- */
 
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
+static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr)
 {
 	sljit_uw type = jump->flags >> TYPE_SHIFT;
 
-	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
+	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff);
 
 	/* The relative jump below specialized for this case. */
-	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
+	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG);
 
 	if (type < SLJIT_JUMP) {
 		/* Invert type. */
-		*code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
-		*code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
+		code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10);
+		code_ptr[1] = short_addr ? (6 + 3) : (10 + 3);
+		code_ptr += 2;
 	}
 
-	*code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
-	*code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
+	code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B);
+	code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2];
+	code_ptr += 2;
 	jump->addr = (sljit_uw)code_ptr;
 
-	if (jump->flags & JUMP_LABEL)
+	if (!(jump->flags & JUMP_ADDR))
 		jump->flags |= PATCH_MD;
 	else if (short_addr)
 		sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
@@ -386,60 +388,62 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
 
 	code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
 
-	*code_ptr++ = REX_B;
-	*code_ptr++ = GROUP_FF;
-	*code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
+	code_ptr[0] = REX_B;
+	code_ptr[1] = GROUP_FF;
+	code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
 
-	return code_ptr;
+	return code_ptr + 3;
 }
 
-static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
+static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
 {
-	if (max_label > HALFWORD_MAX) {
-		put_label->addr -= put_label->flags;
-		put_label->flags = PATCH_MD;
-		return code_ptr;
-	}
+	sljit_uw addr;
+	sljit_sw diff;
+	SLJIT_UNUSED_ARG(executable_offset);
+
+	SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10);
+	if (jump->flags & JUMP_ADDR)
+		addr = jump->u.target;
+	else
+		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size;
 
-	if (put_label->flags == 0) {
-		/* Destination is register. */
-		code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
+	if (addr > 0xffffffffl) {
+		diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
 
-		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
-		SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
+		if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) {
+			SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7);
+			code_ptr -= SSIZE_OF(s32) - 1;
 
-		if ((code_ptr[0] & 0x07) != 0) {
-			code_ptr[0] = U8(code_ptr[0] & ~0x08);
-			code_ptr += 2 + sizeof(sljit_s32);
-		}
-		else {
-			code_ptr[0] = code_ptr[1];
-			code_ptr += 1 + sizeof(sljit_s32);
+			SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W);
+			SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32);
+
+			code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2));
+			code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5);
+			code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m;
+
+			jump->flags |= PATCH_MW;
+			return code_ptr;
 		}
 
-		put_label->addr = (sljit_uw)code_ptr;
+		jump->flags |= PATCH_MD;
 		return code_ptr;
 	}
 
-	code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
-	SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
+	code_ptr -= 2 + sizeof(sljit_uw);
 
 	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
+	SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
 
-	if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
-		code_ptr += 2 + sizeof(sljit_uw);
-		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
+	if ((code_ptr[0] & 0x07) != 0) {
+		SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6);
+		code_ptr[0] = U8(code_ptr[0] & ~0x08);
+		code_ptr += 2 + sizeof(sljit_s32);
+	} else {
+		SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5);
+		code_ptr[0] = code_ptr[1];
+		code_ptr += 1 + sizeof(sljit_s32);
 	}
 
-	SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
-
-	code_ptr[0] = U8(code_ptr[0] & ~0x4);
-	code_ptr[1] = MOV_rm_i32;
-	code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
-
-	code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
-	put_label->addr = (sljit_uw)code_ptr;
-	put_label->flags = 0;
 	return code_ptr;
 }
 
@@ -450,14 +454,16 @@ typedef struct {
 #endif /* _WIN64 */
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
 	sljit_uw size;
 	sljit_s32 word_arg_count = 0;
 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
 	sljit_s32 saved_regs_size, tmp, i;
 #ifdef _WIN64
+	sljit_s32 fscratches;
+	sljit_s32 fsaveds;
 	sljit_s32 saved_float_regs_size;
 	sljit_s32 saved_float_regs_offset = 0;
 	sljit_s32 float_arg_count = 0;
@@ -465,8 +471,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	sljit_u8 *inst;
 
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
+#ifdef _WIN64
+	saveds = ENTER_GET_REGS(saveds);
+	fscratches = compiler->fscratches;
+	fsaveds = compiler->fsaveds;
+#endif /* _WIN64 */
 
 	if (options & SLJIT_ENTER_REG_ARG)
 		arg_types = 0;
@@ -626,19 +639,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
-	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
-	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
+	sljit_s32 options, sljit_s32 arg_types,
+	sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
 {
 	sljit_s32 saved_regs_size;
 #ifdef _WIN64
+	sljit_s32 fscratches;
+	sljit_s32 fsaveds;
 	sljit_s32 saved_float_regs_size;
 #endif /* _WIN64 */
 
 	CHECK_ERROR();
-	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
-	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
+	set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
+
+	scratches = ENTER_GET_REGS(scratches);
 
 #ifdef _WIN64
+	saveds = ENTER_GET_REGS(saveds);
+	fscratches = compiler->fscratches;
+	fsaveds = compiler->fsaveds;
+
 	local_size += SLJIT_LOCALS_OFFSET;
 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
 
@@ -1003,6 +1024,46 @@ static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
 /*  Other operations                                                     */
 /* --------------------------------------------------------------------- */
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
+	sljit_s32 dst_reg,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2_reg)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
+
+	ADJUST_LOCAL_OFFSET(src1, src1w);
+
+	compiler->mode32 = type & SLJIT_32;
+	type &= ~SLJIT_32;
+
+	if (dst_reg != src2_reg) {
+		if (dst_reg == src1) {
+			src1 = src2_reg;
+			src1w = 0;
+			type ^= 0x1;
+		} else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+			EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
+			src1 = src2_reg;
+			src1w = 0;
+			type ^= 0x1;
+		} else
+			EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0);
+	}
+
+	if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) {
+		if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
+			EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w);
+			src1 = TMP_REG2;
+			src1w = 0;
+		}
+
+		return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w);
+	}
+
+	return emit_cmov_generic(compiler, type, dst_reg, src1, src1w);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 reg,
 	sljit_s32 mem, sljit_sw memw)

File diff suppressed because it is too large
+ 443 - 297
thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c


+ 516 - 0
thirdparty/pcre2/deps/sljit/sljit_src/sljitSerialize.c

@@ -0,0 +1,516 @@
+/*
+ *    Stack-less Just-In-Time compiler
+ *
+ *    Copyright Zoltan Herczeg ([email protected]). All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright notice, this list of
+ *      conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *      of conditions and the following disclaimer in the documentation and/or other materials
+ *      provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+ * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_label(struct sljit_jump *jump)
+{
+	return !(jump->flags & JUMP_ADDR) && (jump->u.label != NULL);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_has_target(struct sljit_jump *jump)
+{
+	return (jump->flags & JUMP_ADDR) != 0;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_jump_is_mov_addr(struct sljit_jump *jump)
+{
+	return (jump->flags & JUMP_MOV_ADDR) != 0;
+}
+
+#define SLJIT_SERIALIZE_DEBUG ((sljit_u16)0x1)
+
+struct sljit_serialized_compiler {
+	sljit_u32 signature;
+	sljit_u16 version;
+	sljit_u16 cpu_type;
+
+	sljit_uw buf_segment_count;
+	sljit_uw label_count;
+	sljit_uw jump_count;
+	sljit_uw const_count;
+
+	sljit_s32 options;
+	sljit_s32 scratches;
+	sljit_s32 saveds;
+	sljit_s32 fscratches;
+	sljit_s32 fsaveds;
+	sljit_s32 local_size;
+	sljit_uw size;
+
+#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE)
+	sljit_s32 status_flags_state;
+#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+	sljit_s32 args_size;
+#endif /* SLJIT_CONFIG_X86_32 */
+
+#if ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \
+		|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	sljit_uw args_size;
+#endif /* (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+	sljit_uw cpool_diff;
+	sljit_uw cpool_fill;
+	sljit_uw patches;
+#endif /* SLJIT_CONFIG_ARM_V6 */
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+	sljit_s32 delay_slot;
+#endif /* SLJIT_CONFIG_MIPS */
+
+};
+
+struct sljit_serialized_debug_info {
+	sljit_sw last_flags;
+	sljit_s32 last_return;
+	sljit_s32 logical_local_size;
+};
+
+struct sljit_serialized_label {
+	sljit_uw size;
+};
+
+struct sljit_serialized_jump {
+	sljit_uw addr;
+	sljit_uw flags;
+	sljit_uw value;
+};
+
+struct sljit_serialized_const {
+	sljit_uw addr;
+};
+
+#define SLJIT_SERIALIZE_ALIGN(v) (((v) + sizeof(sljit_uw) - 1) & ~(sljit_uw)(sizeof(sljit_uw) - 1))
+#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#define SLJIT_SERIALIZE_SIGNATURE 0x534c4a54
+#else /* !SLJIT_LITTLE_ENDIAN */
+#define SLJIT_SERIALIZE_SIGNATURE 0x544a4c53
+#endif /* SLJIT_LITTLE_ENDIAN */
+#define SLJIT_SERIALIZE_VERSION 1
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_uw* sljit_serialize_compiler(struct sljit_compiler *compiler,
+	sljit_s32 options, sljit_uw *size)
+{
+	sljit_uw serialized_size = sizeof(struct sljit_serialized_compiler);
+	struct sljit_memory_fragment *buf;
+	struct sljit_label *label;
+	struct sljit_jump *jump;
+	struct sljit_const *const_;
+	struct sljit_serialized_compiler *serialized_compiler;
+	struct sljit_serialized_label *serialized_label;
+	struct sljit_serialized_jump *serialized_jump;
+	struct sljit_serialized_const *serialized_const;
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	struct sljit_serialized_debug_info *serialized_debug_info;
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+	sljit_uw counter, used_size;
+	sljit_u8 *result;
+	sljit_u8 *ptr;
+	SLJIT_UNUSED_ARG(options);
+
+	if (size != NULL)
+		*size = 0;
+
+	PTR_FAIL_IF(compiler->error);
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG))
+		serialized_size += sizeof(struct sljit_serialized_debug_info);
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+	serialized_size += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1));
+#endif /* SLJIT_CONFIG_ARM_V6 */
+
+	/* Compute the size of the data. */
+	buf = compiler->buf;
+	while (buf != NULL) {
+		serialized_size += sizeof(sljit_uw) + SLJIT_SERIALIZE_ALIGN(buf->used_size);
+		buf = buf->next;
+	}
+
+	serialized_size += compiler->label_count * sizeof(struct sljit_serialized_label);
+
+	jump = compiler->jumps;
+	while (jump != NULL) {
+		serialized_size += sizeof(struct sljit_serialized_jump);
+		jump = jump->next;
+	}
+
+	const_ = compiler->consts;
+	while (const_ != NULL) {
+		serialized_size += sizeof(struct sljit_serialized_const);
+		const_ = const_->next;
+	}
+
+	result = (sljit_u8*)SLJIT_MALLOC(serialized_size, compiler->allocator_data);
+	PTR_FAIL_IF_NULL(result);
+
+	if (size != NULL)
+		*size = serialized_size;
+
+	ptr = result;
+	serialized_compiler = (struct sljit_serialized_compiler*)ptr;
+	ptr += sizeof(struct sljit_serialized_compiler);
+
+	serialized_compiler->signature = SLJIT_SERIALIZE_SIGNATURE;
+	serialized_compiler->version = SLJIT_SERIALIZE_VERSION;
+	serialized_compiler->cpu_type = 0;
+	serialized_compiler->label_count = compiler->label_count;
+	serialized_compiler->options = compiler->options;
+	serialized_compiler->scratches = compiler->scratches;
+	serialized_compiler->saveds = compiler->saveds;
+	serialized_compiler->fscratches = compiler->fscratches;
+	serialized_compiler->fsaveds = compiler->fsaveds;
+	serialized_compiler->local_size = compiler->local_size;
+	serialized_compiler->size = compiler->size;
+
+#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE)
+	serialized_compiler->status_flags_state = compiler->status_flags_state;
+#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+		|| ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \
+		|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	serialized_compiler->args_size = compiler->args_size;
+#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+	serialized_compiler->cpool_diff = compiler->cpool_diff;
+	serialized_compiler->cpool_fill = compiler->cpool_fill;
+	serialized_compiler->patches = compiler->patches;
+
+	SLJIT_MEMCPY(ptr, compiler->cpool, compiler->cpool_fill * sizeof(sljit_uw));
+	SLJIT_MEMCPY(ptr + compiler->cpool_fill * sizeof(sljit_uw), compiler->cpool_unique, compiler->cpool_fill);
+	ptr += SLJIT_SERIALIZE_ALIGN(compiler->cpool_fill * (sizeof(sljit_uw) + 1));
+#endif /* SLJIT_CONFIG_ARM_V6 */
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+	serialized_compiler->delay_slot = compiler->delay_slot;
+#endif /* SLJIT_CONFIG_MIPS */
+
+	buf = compiler->buf;
+	counter = 0;
+	while (buf != NULL) {
+		used_size = buf->used_size;
+		*(sljit_uw*)ptr = used_size;
+		ptr += sizeof(sljit_uw);
+		SLJIT_MEMCPY(ptr, buf->memory, used_size);
+		ptr += SLJIT_SERIALIZE_ALIGN(used_size);
+		buf = buf->next;
+		counter++;
+	}
+	serialized_compiler->buf_segment_count = counter;
+
+	label = compiler->labels;
+	while (label != NULL) {
+		serialized_label = (struct sljit_serialized_label*)ptr;
+		serialized_label->size = label->size;
+		ptr += sizeof(struct sljit_serialized_label);
+		label = label->next;
+	}
+
+	jump = compiler->jumps;
+	counter = 0;
+	while (jump != NULL) {
+		serialized_jump = (struct sljit_serialized_jump*)ptr;
+		serialized_jump->addr = jump->addr;
+		serialized_jump->flags = jump->flags;
+
+		if (jump->flags & JUMP_ADDR)
+			serialized_jump->value = jump->u.target;
+		else if (jump->u.label != NULL)
+			serialized_jump->value = jump->u.label->u.index;
+		else
+			serialized_jump->value = SLJIT_MAX_ADDRESS;
+
+		ptr += sizeof(struct sljit_serialized_jump);
+		jump = jump->next;
+		counter++;
+	}
+	serialized_compiler->jump_count = counter;
+
+	const_ = compiler->consts;
+	counter = 0;
+	while (const_ != NULL) {
+		serialized_const = (struct sljit_serialized_const*)ptr;
+		serialized_const->addr = const_->addr;
+		ptr += sizeof(struct sljit_serialized_const);
+		const_ = const_->next;
+		counter++;
+	}
+	serialized_compiler->const_count = counter;
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	if (!(options & SLJIT_SERIALIZE_IGNORE_DEBUG)) {
+		serialized_debug_info = (struct sljit_serialized_debug_info*)ptr;
+		serialized_debug_info->last_flags = compiler->last_flags;
+		serialized_debug_info->last_return = compiler->last_return;
+		serialized_debug_info->logical_local_size = compiler->logical_local_size;
+		serialized_compiler->cpu_type |= SLJIT_SERIALIZE_DEBUG;
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
+		ptr += sizeof(struct sljit_serialized_debug_info);
+#endif /* SLJIT_DEBUG */
+	}
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+
+	SLJIT_ASSERT((sljit_uw)(ptr - result) == serialized_size);
+	return (sljit_uw*)result;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler *sljit_deserialize_compiler(sljit_uw* buffer, sljit_uw size,
+	sljit_s32 options, void *allocator_data)
+{
+	struct sljit_compiler *compiler;
+	struct sljit_serialized_compiler *serialized_compiler;
+	struct sljit_serialized_label *serialized_label;
+	struct sljit_serialized_jump *serialized_jump;
+	struct sljit_serialized_const *serialized_const;
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	struct sljit_serialized_debug_info *serialized_debug_info;
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+	struct sljit_memory_fragment *buf;
+	struct sljit_memory_fragment *last_buf;
+	struct sljit_label *label;
+	struct sljit_label *last_label;
+	struct sljit_label **label_list = NULL;
+	struct sljit_jump *jump;
+	struct sljit_jump *last_jump;
+	struct sljit_const *const_;
+	struct sljit_const *last_const;
+	sljit_u8 *ptr = (sljit_u8*)buffer;
+	sljit_u8 *end = ptr + size;
+	sljit_uw i, used_size, aligned_size, label_count;
+	SLJIT_UNUSED_ARG(options);
+
+	if (size < sizeof(struct sljit_serialized_compiler) || (size & (sizeof(sljit_uw) - 1)) != 0)
+		return NULL;
+
+	serialized_compiler = (struct sljit_serialized_compiler*)ptr;
+
+	if (serialized_compiler->signature != SLJIT_SERIALIZE_SIGNATURE || serialized_compiler->version != SLJIT_SERIALIZE_VERSION)
+		return NULL;
+
+	compiler = sljit_create_compiler(allocator_data);
+	PTR_FAIL_IF(compiler == NULL);
+
+	compiler->label_count = serialized_compiler->label_count;
+	compiler->options = serialized_compiler->options;
+	compiler->scratches = serialized_compiler->scratches;
+	compiler->saveds = serialized_compiler->saveds;
+	compiler->fscratches = serialized_compiler->fscratches;
+	compiler->fsaveds = serialized_compiler->fsaveds;
+	compiler->local_size = serialized_compiler->local_size;
+	compiler->size = serialized_compiler->size;
+
+#if (defined SLJIT_HAS_STATUS_FLAGS_STATE && SLJIT_HAS_STATUS_FLAGS_STATE)
+	compiler->status_flags_state = serialized_compiler->status_flags_state;
+#endif /* SLJIT_HAS_STATUS_FLAGS_STATE */
+
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \
+		|| ((defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)) \
+		|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	compiler->args_size = serialized_compiler->args_size;
+#endif /* SLJIT_CONFIG_X86_32 || (SLJIT_CONFIG_ARM_32 && __SOFTFP__) || SLJIT_CONFIG_MIPS_32 */
+
+#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
+	used_size = serialized_compiler->cpool_fill;
+	aligned_size = SLJIT_SERIALIZE_ALIGN(used_size * (sizeof(sljit_uw) + 1));
+	compiler->cpool_diff = serialized_compiler->cpool_diff;
+	compiler->cpool_fill = used_size;
+	compiler->patches = serialized_compiler->patches;
+
+	if ((sljit_uw)(end - ptr) < aligned_size)
+		goto error;
+
+	SLJIT_MEMCPY(compiler->cpool, ptr, used_size * sizeof(sljit_uw));
+	SLJIT_MEMCPY(compiler->cpool_unique, ptr + used_size * sizeof(sljit_uw), used_size);
+	ptr += aligned_size;
+#endif /* SLJIT_CONFIG_ARM_V6 */
+
+#if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
+	compiler->delay_slot = serialized_compiler->delay_slot;
+#endif /* SLJIT_CONFIG_MIPS */
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	if (!(serialized_compiler->cpu_type & SLJIT_SERIALIZE_DEBUG))
+		goto error;
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+
+	ptr += sizeof(struct sljit_serialized_compiler);
+	i = serialized_compiler->buf_segment_count;
+	last_buf = NULL;
+	while (i > 0) {
+		if ((sljit_uw)(end - ptr) < sizeof(sljit_uw))
+			goto error;
+
+		used_size = *(sljit_uw*)ptr;
+		aligned_size = SLJIT_SERIALIZE_ALIGN(used_size);
+		ptr += sizeof(sljit_uw);
+
+		if ((sljit_uw)(end - ptr) < aligned_size)
+			goto error;
+
+		if (last_buf == NULL) {
+			SLJIT_ASSERT(compiler->buf != NULL && compiler->buf->next == NULL);
+			buf = compiler->buf;
+		} else {
+			buf = (struct sljit_memory_fragment*)SLJIT_MALLOC(BUF_SIZE, allocator_data);
+			if (!buf)
+				goto error;
+			buf->next = NULL;
+		}
+
+		buf->used_size = used_size;
+		SLJIT_MEMCPY(buf->memory, ptr, used_size);
+
+		if (last_buf != NULL)
+			last_buf->next = buf;
+		last_buf = buf;
+
+		ptr += aligned_size;
+		i--;
+	}
+
+	last_label = NULL;
+	label_count = serialized_compiler->label_count;
+	if ((sljit_uw)(end - ptr) < label_count * sizeof(struct sljit_serialized_label))
+		goto error;
+
+	label_list = (struct sljit_label **)SLJIT_MALLOC(label_count * sizeof(struct sljit_label*), allocator_data);
+	if (label_list == NULL)
+		goto error;
+
+	for (i = 0; i < label_count; i++) {
+		label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
+		if (label == NULL)
+			goto error;
+
+		serialized_label = (struct sljit_serialized_label*)ptr;
+		label->next = NULL;
+		label->u.index = i;
+		label->size = serialized_label->size;
+
+		if (last_label != NULL)
+			last_label->next = label;
+		else
+			compiler->labels = label;
+		last_label = label;
+
+		label_list[i] = label;
+		ptr += sizeof(struct sljit_serialized_label);
+	}
+	compiler->last_label = last_label;
+
+	last_jump = NULL;
+	i = serialized_compiler->jump_count;
+	if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_jump))
+		goto error;
+
+	while (i > 0) {
+		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+		if (jump == NULL)
+			goto error;
+
+		serialized_jump = (struct sljit_serialized_jump*)ptr;
+		jump->next = NULL;
+		jump->addr = serialized_jump->addr;
+		jump->flags = serialized_jump->flags;
+
+		if (!(serialized_jump->flags & JUMP_ADDR)) {
+			if (serialized_jump->value != SLJIT_MAX_ADDRESS) {
+				if (serialized_jump->value >= label_count)
+					goto error;
+				jump->u.label = label_list[serialized_jump->value];
+			} else
+				jump->u.label = NULL;
+		} else
+			jump->u.target = serialized_jump->value;
+
+		if (last_jump != NULL)
+			last_jump->next = jump;
+		else
+			compiler->jumps = jump;
+		last_jump = jump;
+
+		ptr += sizeof(struct sljit_serialized_jump);
+		i--;
+	}
+	compiler->last_jump = last_jump;
+
+	SLJIT_FREE(label_list, allocator_data);
+	label_list = NULL;
+
+	last_const = NULL;
+	i = serialized_compiler->const_count;
+	if ((sljit_uw)(end - ptr) < i * sizeof(struct sljit_serialized_const))
+		goto error;
+
+	while (i > 0) {
+		const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
+		if (const_ == NULL)
+			goto error;
+
+		serialized_const = (struct sljit_serialized_const*)ptr;
+		const_->next = NULL;
+		const_->addr = serialized_const->addr;
+
+		if (last_const != NULL)
+			last_const->next = const_;
+		else
+			compiler->consts = const_;
+		last_const = const_;
+
+		ptr += sizeof(struct sljit_serialized_const);
+		i--;
+	}
+	compiler->last_const = last_const;
+
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \
+		|| (defined SLJIT_DEBUG && SLJIT_DEBUG)
+	if ((sljit_uw)(end - ptr) < sizeof(struct sljit_serialized_debug_info))
+		goto error;
+
+	serialized_debug_info = (struct sljit_serialized_debug_info*)ptr;
+	compiler->last_flags = (sljit_s32)serialized_debug_info->last_flags;
+	compiler->last_return = serialized_debug_info->last_return;
+	compiler->logical_local_size = serialized_debug_info->logical_local_size;
+#endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_DEBUG */
+
+	return compiler;
+
+error:
+	sljit_free_compiler(compiler);
+	if (label_list != NULL)
+		SLJIT_FREE(label_list, allocator_data);
+	return NULL;
+}

+ 0 - 0
thirdparty/pcre2/src/sljit/sljitUtils.c → thirdparty/pcre2/deps/sljit/sljit_src/sljitUtils.c


+ 27 - 27
thirdparty/pcre2/src/config.h

@@ -52,15 +52,24 @@ sure both macros are undefined; an emulation function will then be used. */
    LF does in an ASCII/Unicode environment. */
 /* #undef EBCDIC_NL25 */
 
+/* Define to 1 if you have the <assert.h> header file. */
+/* #undef HAVE_ASSERT_H */
+
 /* Define this if your compiler supports __attribute__((uninitialized)) */
 /* #undef HAVE_ATTRIBUTE_UNINITIALIZED */
 
-/* Define to 1 if you have the 'bcopy' function. */
+/* Define to 1 if you have the `bcopy' function. */
 /* #undef HAVE_BCOPY */
 
+/* Define this if your compiler provides __assume() */
+/* #undef HAVE_BUILTIN_ASSUME */
+
 /* Define this if your compiler provides __builtin_mul_overflow() */
 /* #undef HAVE_BUILTIN_MUL_OVERFLOW */
 
+/* Define this if your compiler provides __builtin_unreachable() */
+/* #undef HAVE_BUILTIN_UNREACHABLE */
+
 /* Define to 1 if you have the <bzlib.h> header file. */
 /* #undef HAVE_BZLIB_H */
 
@@ -82,16 +91,16 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <limits.h> header file. */
 /* #undef HAVE_LIMITS_H */
 
-/* Define to 1 if you have the 'memfd_create' function. */
+/* Define to 1 if you have the `memfd_create' function. */
 /* #undef HAVE_MEMFD_CREATE */
 
-/* Define to 1 if you have the 'memmove' function. */
+/* Define to 1 if you have the `memmove' function. */
 /* #undef HAVE_MEMMOVE */
 
 /* Define to 1 if you have the <minix/config.h> header file. */
 /* #undef HAVE_MINIX_CONFIG_H */
 
-/* Define to 1 if you have the 'mkostemp' function. */
+/* Define to 1 if you have the `mkostemp' function. */
 /* #undef HAVE_MKOSTEMP */
 
 /* Define if you have POSIX threads libraries and header files. */
@@ -112,7 +121,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the `realpath' function. */
 /* #undef HAVE_REALPATH */
 
-/* Define to 1 if you have the 'secure_getenv' function. */
+/* Define to 1 if you have the `secure_getenv' function. */
 /* #undef HAVE_SECURE_GETENV */
 
 /* Define to 1 if you have the <stdint.h> header file. */
@@ -124,7 +133,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <stdlib.h> header file. */
 /* #undef HAVE_STDLIB_H */
 
-/* Define to 1 if you have the 'strerror' function. */
+/* Define to 1 if you have the `strerror' function. */
 /* #undef HAVE_STRERROR */
 
 /* Define to 1 if you have the <strings.h> header file. */
@@ -145,7 +154,8 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <unistd.h> header file. */
 /* #undef HAVE_UNISTD_H */
 
-/* Define to 1 if the compiler supports simple visibility declarations. */
+/* Define to 1 if the compiler supports GCC compatible visibility
+   declarations. */
 /* #undef HAVE_VISIBILITY */
 
 /* Define to 1 if you have the <wchar.h> header file. */
@@ -215,7 +225,7 @@ sure both macros are undefined; an emulation function will then be used. */
    Care must be taken if it is increased, because it guards against integer
    overflow caused by enormously large patterns. */
 #ifndef MAX_NAME_SIZE
-#define MAX_NAME_SIZE 32
+#define MAX_NAME_SIZE 128
 #endif
 
 /* The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
@@ -245,7 +255,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.43"
+#define PACKAGE_STRING "PCRE2 10.45"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -254,7 +264,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.43"
+#define PACKAGE_VERSION "10.45"
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -311,7 +321,7 @@ sure both macros are undefined; an emulation function will then be used. */
    unless SUPPORT_JIT is also defined. */
 /* #undef SLJIT_PROT_EXECUTABLE_ALLOCATOR */
 
-/* Define to 1 if all of the C89 standard headers exist (not just the ones
+/* Define to 1 if all of the C90 standard headers exist (not just the ones
    required in a freestanding environment). This macro is provided for
    backward compatibility; new code need not use it. */
 /* #undef STDC_HEADERS */
@@ -366,7 +376,7 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value for valgrind support to find invalid memory reads. */
 /* #undef SUPPORT_VALGRIND */
 
-/* Enable extensions on AIX, Interix, z/OS.  */
+/* Enable extensions on AIX 3, Interix.  */
 #ifndef _ALL_SOURCE
 # define _ALL_SOURCE 1
 #endif
@@ -427,15 +437,11 @@ sure both macros are undefined; an emulation function will then be used. */
 #ifndef __STDC_WANT_IEC_60559_DFP_EXT__
 # define __STDC_WANT_IEC_60559_DFP_EXT__ 1
 #endif
-/* Enable extensions specified by C23 Annex F.  */
-#ifndef __STDC_WANT_IEC_60559_EXT__
-# define __STDC_WANT_IEC_60559_EXT__ 1
-#endif
 /* Enable extensions specified by ISO/IEC TS 18661-4:2015.  */
 #ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
 # define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1
 #endif
-/* Enable extensions specified by C23 Annex H and ISO/IEC TS 18661-3:2015.  */
+/* Enable extensions specified by ISO/IEC TS 18661-3:2015.  */
 #ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
 # define __STDC_WANT_IEC_60559_TYPES_EXT__ 1
 #endif
@@ -458,26 +464,20 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 
 /* Version number of package */
-#define VERSION "10.43"
+#define VERSION "10.45"
 
 /* Number of bits in a file offset, on hosts where this is settable. */
 /* #undef _FILE_OFFSET_BITS */
 
-/* Define to 1 on platforms where this makes off_t a 64-bit type. */
+/* Define for large files, on AIX-style hosts. */
 /* #undef _LARGE_FILES */
 
-/* Number of bits in time_t, on hosts where this is settable. */
-/* #undef _TIME_BITS */
-
-/* Define to 1 on platforms where this makes time_t a 64-bit type. */
-/* #undef __MINGW_USE_VC2005_COMPAT */
-
-/* Define to empty if 'const' does not conform to ANSI C. */
+/* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */
 
 /* Define to the type of a signed integer type of width exactly 64 bits if
    such a type exists and the standard includes do not define it. */
 /* #undef int64_t */
 
-/* Define as 'unsigned int' if <stddef.h> doesn't define. */
+/* Define to `unsigned int' if <sys/types.h> does not define. */
 /* #undef size_t */

+ 67 - 5
thirdparty/pcre2/src/pcre2.h

@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           43
+#define PCRE2_MINOR           45
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2024-02-16
+#define PCRE2_DATE            2025-02-05
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -143,6 +143,7 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTENDED_MORE       0x01000000u  /* C       */
 #define PCRE2_LITERAL             0x02000000u  /* C       */
 #define PCRE2_MATCH_INVALID_UTF   0x04000000u  /*   J M D */
+#define PCRE2_ALT_EXTENDED_CLASS  0x08000000u  /* C       */
 
 /* An additional compile options word is available in the compile context. */
 
@@ -159,6 +160,10 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTRA_ASCII_BSW                0x00000400u  /* C */
 #define PCRE2_EXTRA_ASCII_POSIX              0x00000800u  /* C */
 #define PCRE2_EXTRA_ASCII_DIGIT              0x00001000u  /* C */
+#define PCRE2_EXTRA_PYTHON_OCTAL             0x00002000u  /* C */
+#define PCRE2_EXTRA_NO_BS0                   0x00004000u  /* C */
+#define PCRE2_EXTRA_NEVER_CALLOUT            0x00008000u  /* C */
+#define PCRE2_EXTRA_TURKISH_CASING           0x00010000u  /* C */
 
 /* These are for pcre2_jit_compile(). */
 
@@ -166,6 +171,7 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_JIT_PARTIAL_SOFT    0x00000002u
 #define PCRE2_JIT_PARTIAL_HARD    0x00000004u
 #define PCRE2_JIT_INVALID_UTF     0x00000100u
+#define PCRE2_JIT_TEST_ALLOC      0x00000200u
 
 /* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
 pcre2_substitute(). Some are allowed only for one of the functions, and in
@@ -318,9 +324,25 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN        195
 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE       196
 #define PCRE2_ERROR_TOO_MANY_CAPTURES              197
-#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED  198
+#define PCRE2_ERROR_MISSING_OCTAL_DIGIT            198
 #define PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND      199
-
+#define PCRE2_ERROR_MAX_VAR_LOOKBEHIND_EXCEEDED    200
+#define PCRE2_ERROR_PATTERN_COMPILED_SIZE_TOO_BIG  201
+#define PCRE2_ERROR_OVERSIZE_PYTHON_OCTAL          202
+#define PCRE2_ERROR_CALLOUT_CALLER_DISABLED        203
+#define PCRE2_ERROR_EXTRA_CASING_REQUIRES_UNICODE  204
+#define PCRE2_ERROR_TURKISH_CASING_REQUIRES_UTF    205
+#define PCRE2_ERROR_EXTRA_CASING_INCOMPATIBLE      206
+#define PCRE2_ERROR_ECLASS_NEST_TOO_DEEP           207
+#define PCRE2_ERROR_ECLASS_INVALID_OPERATOR        208
+#define PCRE2_ERROR_ECLASS_UNEXPECTED_OPERATOR     209
+#define PCRE2_ERROR_ECLASS_EXPECTED_OPERAND        210
+#define PCRE2_ERROR_ECLASS_MIXED_OPERATORS         211
+#define PCRE2_ERROR_ECLASS_HINT_SQUARE_BRACKET     212
+#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_EXPR    213
+#define PCRE2_ERROR_PERL_ECLASS_EMPTY_EXPR         214
+#define PCRE2_ERROR_PERL_ECLASS_MISSING_CLOSE      215
+#define PCRE2_ERROR_PERL_ECLASS_UNEXPECTED_CHAR    216
 
 /* "Expected" matching error codes: no match and partial match. */
 
@@ -407,6 +429,9 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
 #define PCRE2_ERROR_DFA_UINVALID_UTF  (-66)
 #define PCRE2_ERROR_INVALIDOFFSET     (-67)
+#define PCRE2_ERROR_JIT_UNSUPPORTED   (-68)
+#define PCRE2_ERROR_REPLACECASE       (-69)
+#define PCRE2_ERROR_TOOLARGEREPLACE   (-70)
 
 
 /* Request types for pcre2_pattern_info() */
@@ -460,6 +485,30 @@ released, the numbers must not be changed. */
 #define PCRE2_CONFIG_COMPILED_WIDTHS        14
 #define PCRE2_CONFIG_TABLES_LENGTH          15
 
+/* Optimization directives for pcre2_set_optimize().
+For binary compatibility, only add to this list; do not renumber. */
+
+#define PCRE2_OPTIMIZATION_NONE    0
+#define PCRE2_OPTIMIZATION_FULL    1
+
+#define PCRE2_AUTO_POSSESS         64
+#define PCRE2_AUTO_POSSESS_OFF     65
+#define PCRE2_DOTSTAR_ANCHOR       66
+#define PCRE2_DOTSTAR_ANCHOR_OFF   67
+#define PCRE2_START_OPTIMIZE       68
+#define PCRE2_START_OPTIMIZE_OFF   69
+
+/* Types used in pcre2_set_substitute_case_callout().
+
+PCRE2_SUBSTITUTE_CASE_LOWER and PCRE2_SUBSTITUTE_CASE_UPPER are passed to the
+callout to indicate that the case of the entire callout input should be
+case-transformed. PCRE2_SUBSTITUTE_CASE_TITLE_FIRST is passed to indicate that
+only the first character or glyph should be transformed to Unicode titlecase,
+and the rest to lowercase. */
+
+#define PCRE2_SUBSTITUTE_CASE_LOWER        1
+#define PCRE2_SUBSTITUTE_CASE_UPPER        2
+#define PCRE2_SUBSTITUTE_CASE_TITLE_FIRST  3
 
 /* Types for code units in patterns and subject strings. */
 
@@ -603,6 +652,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_max_pattern_compiled_length(pcre2_compile_context *, PCRE2_SIZE); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_max_varlookbehind(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -611,7 +662,9 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_parens_nest_limit(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_compile_recursion_guard(pcre2_compile_context *, \
-    int (*)(uint32_t, void *), void *);
+    int (*)(uint32_t, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_optimize(pcre2_compile_context *, uint32_t);
 
 #define PCRE2_MATCH_CONTEXT_FUNCTIONS \
 PCRE2_EXP_DECL pcre2_match_context *PCRE2_CALL_CONVENTION \
@@ -626,6 +679,11 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_substitute_callout(pcre2_match_context *, \
     int (*)(pcre2_substitute_callout_block *, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_substitute_case_callout(pcre2_match_context *, \
+    PCRE2_SIZE (*)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, PCRE2_SIZE, int, \
+                   void *), \
+    void *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -738,6 +796,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_substring_list_get(pcre2_match_data *, PCRE2_UCHAR ***, PCRE2_SIZE **);
 
+
 /* Functions for serializing / deserializing compiled patterns. */
 
 #define PCRE2_SERIALIZE_FUNCTIONS \
@@ -901,10 +960,13 @@ pcre2_compile are called by application code. */
 #define pcre2_set_match_limit                 PCRE2_SUFFIX(pcre2_set_match_limit_)
 #define pcre2_set_max_varlookbehind           PCRE2_SUFFIX(pcre2_set_max_varlookbehind_)
 #define pcre2_set_max_pattern_length          PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
+#define pcre2_set_max_pattern_compiled_length PCRE2_SUFFIX(pcre2_set_max_pattern_compiled_length_)
 #define pcre2_set_newline                     PCRE2_SUFFIX(pcre2_set_newline_)
 #define pcre2_set_parens_nest_limit           PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
 #define pcre2_set_offset_limit                PCRE2_SUFFIX(pcre2_set_offset_limit_)
+#define pcre2_set_optimize                    PCRE2_SUFFIX(pcre2_set_optimize_)
 #define pcre2_set_substitute_callout          PCRE2_SUFFIX(pcre2_set_substitute_callout_)
+#define pcre2_set_substitute_case_callout     PCRE2_SUFFIX(pcre2_set_substitute_case_callout_)
 #define pcre2_substitute                      PCRE2_SUFFIX(pcre2_substitute_)
 #define pcre2_substring_copy_byname           PCRE2_SUFFIX(pcre2_substring_copy_byname_)
 #define pcre2_substring_copy_bynumber         PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)

+ 92 - 51
thirdparty/pcre2/src/pcre2_auto_possess.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -49,6 +49,10 @@ repeats into possessive repeats where possible. */
 
 #include "pcre2_internal.h"
 
+/* This macro represents the max size of list[] and that is used to keep
+track of UCD info in several places, it should be kept on sync with the
+value used by GenerateUcd.py */
+#define MAX_LIST 8
 
 /*************************************************
 *        Tables for auto-possessification        *
@@ -64,7 +68,7 @@ The Unicode property types (\P and \p) have to be present to fill out the table
 because of what their opcode values are, but the table values should always be
 zero because property types are handled separately in the code. The last four
 columns apply to items that cannot be repeated, so there is no need to have
-rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
+rows for them. Note that OP_DIGIT etc. are generated only when PCRE2_UCP is
 *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
 
 #define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
@@ -123,21 +127,21 @@ opcode is used to select the column. The values are as follows:
 */
 
 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
-/* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
-  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
-  { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
-  { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
-  { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
-  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
-  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
-  { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
-  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
-  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
-  { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
-  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
-  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
-  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
-  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
+/* LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
+  { 3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
+  { 0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
+  { 0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
+  { 0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
+  { 0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
+  { 3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
+  { 1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
+  { 1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
+  { 0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
+  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
+  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
+  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
+  { 0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
+  /* PT_ANY does not need a record. */
 };
 
 /* This table is used to check whether auto-possessification is possible
@@ -199,7 +203,7 @@ static BOOL
 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
   BOOL negated)
 {
-BOOL ok;
+BOOL ok, rc;
 const uint32_t *p;
 const ucd_record *prop = GET_UCD(c);
 
@@ -240,12 +244,13 @@ switch(ptype)
     {
     HSPACE_CASES:
     VSPACE_CASES:
-    return negated;
+    rc = negated;
+    break;
 
     default:
-    return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
+    rc = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
     }
-  break;  /* Control never reaches here */
+  return rc;
 
   case PT_WORD:
   return (PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
@@ -259,7 +264,8 @@ switch(ptype)
     if (c < *p) return !negated;
     if (c == *p++) return negated;
     }
-  break;  /* Control never reaches here */
+  PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
+  break;
 
   /* Haven't yet thought these through. */
 
@@ -328,6 +334,7 @@ get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
 PCRE2_UCHAR c = *code;
 PCRE2_UCHAR base;
 PCRE2_SPTR end;
+PCRE2_SPTR class_end;
 uint32_t chr;
 
 #ifdef SUPPORT_UNICODE
@@ -450,10 +457,12 @@ switch(c)
   code += 2;
 
   do {
-     if (clist_dest >= list + 8)
+     if (clist_dest >= list + MAX_LIST)
        {
-       /* Early return if there is not enough space. This should never
-       happen, since all clists are shorter than 5 character now. */
+       /* Early return if there is not enough space. GenerateUcd.py
+       generated a list with more than 5 characters and something
+       must be done about that going forward. */
+       PCRE2_DEBUG_UNREACHABLE();   /* Remove if it ever triggers */
        list[2] = code[0];
        list[3] = code[1];
        return code;
@@ -473,11 +482,13 @@ switch(c)
   case OP_CLASS:
 #ifdef SUPPORT_WIDE_CHARS
   case OP_XCLASS:
-  if (c == OP_XCLASS)
+  case OP_ECLASS:
+  if (c == OP_XCLASS || c == OP_ECLASS)
     end = code + GET(code, 0) - 1;
   else
 #endif
     end = code + 32 / sizeof(PCRE2_UCHAR);
+  class_end = end;
 
   switch(*end)
     {
@@ -505,6 +516,7 @@ switch(c)
     break;
     }
   list[2] = (uint32_t)(end - code);
+  list[3] = (uint32_t)(end - class_end);
   return end;
   }
 
@@ -537,7 +549,7 @@ compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
   const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit)
 {
 PCRE2_UCHAR c;
-uint32_t list[8];
+uint32_t list[MAX_LIST];
 const uint32_t *chr_ptr;
 const uint32_t *ochr_ptr;
 const uint32_t *list_ptr;
@@ -581,7 +593,7 @@ for(;;)
     continue;
     }
 
-  /* At the end of a branch, skip to the end of the group. */
+  /* At the end of a branch, skip to the end of the group and process it. */
 
   if (c == OP_ALT)
     {
@@ -638,19 +650,29 @@ for(;;)
         return FALSE;
       break;
 
-      /* Atomic sub-patterns and assertions can always auto-possessify their
-      last iterator except for variable length lookbehinds. However, if the
-      group was entered as a result of checking a previous iterator, this is
-      not possible. */
+      /* Atomic sub-patterns and forward assertions can always auto-possessify
+      their last iterator. However, if the group was entered as a result of
+      checking a previous iterator, this is not possible. */
 
       case OP_ASSERT:
       case OP_ASSERT_NOT:
       case OP_ONCE:
       return !entered_a_group;
 
+      /* Fixed-length lookbehinds can be treated the same way, but variable
+      length lookbehinds must not auto-possessify their last iterator. Note
+      that in order to identify a variable length lookbehind we must check
+      through all branches, because some may be of fixed length. */
+
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
-      return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group;
+      do
+        {
+        if (bracode[1+LINK_SIZE] == OP_VREVERSE) return FALSE;  /* Variable */
+        bracode += GET(bracode, 1);
+        }
+      while (*bracode == OP_ALT);
+      return !entered_a_group;  /* Not variable length */
 
       /* Non-atomic assertions - don't possessify last iterator. This needs
       more thought. */
@@ -748,12 +770,12 @@ for(;;)
     if (base_list[0] == OP_CLASS)
 #endif
       {
-      set1 = (uint8_t *)(base_end - base_list[2]);
+      set1 = (const uint8_t *)(base_end - base_list[2]);
       list_ptr = list;
       }
     else
       {
-      set1 = (uint8_t *)(code - list[2]);
+      set1 = (const uint8_t *)(code - list[2]);
       list_ptr = base_list;
       }
 
@@ -762,13 +784,14 @@ for(;;)
       {
       case OP_CLASS:
       case OP_NCLASS:
-      set2 = (uint8_t *)
+      set2 = (const uint8_t *)
         ((list_ptr == list ? code : base_end) - list_ptr[2]);
       break;
 
 #ifdef SUPPORT_WIDE_CHARS
       case OP_XCLASS:
-      xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE;
+      xclass_flags = (list_ptr == list ? code : base_end) -
+        list_ptr[2] + LINK_SIZE;
       if ((*xclass_flags & XCL_HASPROP) != 0) return FALSE;
       if ((*xclass_flags & XCL_MAP) == 0)
         {
@@ -777,7 +800,7 @@ for(;;)
         /* Might be an empty repeat. */
         continue;
         }
-      set2 = (uint8_t *)(xclass_flags + 1);
+      set2 = (const uint8_t *)(xclass_flags + 1);
       break;
 #endif
 
@@ -785,21 +808,21 @@ for(;;)
       invert_bits = TRUE;
       /* Fall through */
       case OP_DIGIT:
-      set2 = (uint8_t *)(cb->cbits + cbit_digit);
+      set2 = (const uint8_t *)(cb->cbits + cbit_digit);
       break;
 
       case OP_NOT_WHITESPACE:
       invert_bits = TRUE;
       /* Fall through */
       case OP_WHITESPACE:
-      set2 = (uint8_t *)(cb->cbits + cbit_space);
+      set2 = (const uint8_t *)(cb->cbits + cbit_space);
       break;
 
       case OP_NOT_WORDCHAR:
       invert_bits = TRUE;
       /* Fall through */
       case OP_WORDCHAR:
-      set2 = (uint8_t *)(cb->cbits + cbit_word);
+      set2 = (const uint8_t *)(cb->cbits + cbit_word);
       break;
 
       default:
@@ -1084,7 +1107,7 @@ for(;;)
 
       case OP_CLASS:
       if (chr > 255) break;
-      class_bitset = (uint8_t *)
+      class_bitset = (const uint8_t *)
         ((list_ptr == list ? code : base_end) - list_ptr[2]);
       if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
       break;
@@ -1092,9 +1115,18 @@ for(;;)
 #ifdef SUPPORT_WIDE_CHARS
       case OP_XCLASS:
       if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
-          list_ptr[2] + LINK_SIZE, utf)) return FALSE;
+          list_ptr[2] + LINK_SIZE, (const uint8_t*)cb->start_code, utf))
+        return FALSE;
       break;
-#endif
+
+      case OP_ECLASS:
+      if (PRIV(eclass)(chr,
+          (list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
+          (list_ptr == list ? code : base_end) - list_ptr[3],
+          (const uint8_t*)cb->start_code, utf))
+        return FALSE;
+      break;
+#endif /* SUPPORT_WIDE_CHARS */
 
       default:
       return FALSE;
@@ -1109,8 +1141,8 @@ for(;;)
   if (list[1] == 0) return TRUE;
   }
 
-/* Control never reaches here. There used to be a fail-save return FALSE; here,
-but some compilers complain about an unreachable statement. */
+PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
+return FALSE;              /* Avoid compiler warnings */
 }
 
 
@@ -1140,7 +1172,7 @@ PRIV(auto_possessify)(PCRE2_UCHAR *code, const compile_block *cb)
 PCRE2_UCHAR c;
 PCRE2_SPTR end;
 PCRE2_UCHAR *repeat_opcode;
-uint32_t list[8];
+uint32_t list[MAX_LIST];
 int rec_limit = 1000;  /* Was 10,000 but clang+ASAN uses a lot of stack. */
 BOOL utf = (cb->external_options & PCRE2_UTF) != 0;
 BOOL ucp = (cb->external_options & PCRE2_UCP) != 0;
@@ -1149,7 +1181,11 @@ for (;;)
   {
   c = *code;
 
-  if (c >= OP_TABLE_LENGTH) return -1;   /* Something gone wrong */
+  if (c >= OP_TABLE_LENGTH)
+    {
+    PCRE2_DEBUG_UNREACHABLE();
+    return -1;   /* Something gone wrong */
+    }
 
   if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
     {
@@ -1198,10 +1234,14 @@ for (;;)
       }
     c = *code;
     }
-  else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
+  else if (c == OP_CLASS || c == OP_NCLASS
+#ifdef SUPPORT_WIDE_CHARS
+           || c == OP_XCLASS || c == OP_ECLASS
+#endif
+           )
     {
 #ifdef SUPPORT_WIDE_CHARS
-    if (c == OP_XCLASS)
+    if (c == OP_XCLASS || c == OP_ECLASS)
       repeat_opcode = code + GET(code, 1);
     else
 #endif
@@ -1211,7 +1251,7 @@ for (;;)
     if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
       {
       /* The return from get_chr_property_list() will never be NULL when
-      *code (aka c) is one of the three class opcodes. However, gcc with
+      *code (aka c) is one of the four class opcodes. However, gcc with
       -fanalyzer notes that a NULL return is possible, and grumbles. Hence we
       put in a check. */
 
@@ -1279,6 +1319,7 @@ for (;;)
 
 #ifdef SUPPORT_WIDE_CHARS
     case OP_XCLASS:
+    case OP_ECLASS:
     code += GET(code, 1);
     break;
 #endif

+ 2 - 4
thirdparty/pcre2/src/pcre2_chkdint.c

@@ -74,9 +74,7 @@ if (__builtin_mul_overflow(a, b, &m)) return TRUE;
 #else
 INT64_OR_DOUBLE m;
 
-#ifdef PCRE2_DEBUG
-if (a < 0 || b < 0) abort();
-#endif
+PCRE2_ASSERT(a >= 0 && b >= 0);
 
 m = (INT64_OR_DOUBLE)a * (INT64_OR_DOUBLE)b;
 
@@ -93,4 +91,4 @@ if (m > PCRE2_SIZE_MAX) return TRUE;
 return FALSE;
 }
 
-/* End of pcre_chkdint.c */
+/* End of pcre2_chkdint.c */

File diff suppressed because it is too large
+ 338 - 275
thirdparty/pcre2/src/pcre2_compile.c


+ 280 - 0
thirdparty/pcre2/src/pcre2_compile.h

@@ -0,0 +1,280 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_COMPILE_H_IDEMPOTENT_GUARD
+#define PCRE2_COMPILE_H_IDEMPOTENT_GUARD
+
+#include "pcre2_internal.h"
+
+/* Compile time error code numbers. They are given names so that they can more
+easily be tracked. When a new number is added, the tables called eint1 and
+eint2 in pcre2posix.c may need to be updated, and a new error text must be
+added to compile_error_texts in pcre2_error.c. Also, the error codes in
+pcre2.h.in must be updated - their values are exactly 100 greater than these
+values. */
+
+enum { ERR0 = COMPILE_ERROR_BASE,
+       ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
+       ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
+       ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, ERR30,
+       ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
+       ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50,
+       ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
+       ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
+       ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
+       ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
+       ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99, ERR100,
+       ERR101,ERR102,ERR103,ERR104,ERR105,ERR106,ERR107,ERR108,ERR109,ERR110,
+       ERR111,ERR112,ERR113,ERR114,ERR115,ERR116 };
+
+/* Code values for parsed patterns, which are stored in a vector of 32-bit
+unsigned ints. Values less than META_END are literal data values. The coding
+for identifying the item is in the top 16-bits, leaving 16 bits for the
+additional data that some of them need. The META_CODE, META_DATA, and META_DIFF
+macros are used to manipulate parsed pattern elements.
+
+NOTE: When these definitions are changed, the table of extra lengths for each
+code (meta_extra_lengths) must be updated to remain in step. */
+
+#define META_END              0x80000000u  /* End of pattern */
+
+#define META_ALT              0x80010000u  /* alternation */
+#define META_ATOMIC           0x80020000u  /* atomic group */
+#define META_BACKREF          0x80030000u  /* Back ref */
+#define META_BACKREF_BYNAME   0x80040000u  /* \k'name' */
+#define META_BIGVALUE         0x80050000u  /* Next is a literal > META_END */
+#define META_CALLOUT_NUMBER   0x80060000u  /* (?C with numerical argument */
+#define META_CALLOUT_STRING   0x80070000u  /* (?C with string argument */
+#define META_CAPTURE          0x80080000u  /* Capturing parenthesis */
+#define META_CIRCUMFLEX       0x80090000u  /* ^ metacharacter */
+#define META_CLASS            0x800a0000u  /* start non-empty class */
+#define META_CLASS_EMPTY      0x800b0000u  /* empty class */
+#define META_CLASS_EMPTY_NOT  0x800c0000u  /* negative empty class */
+#define META_CLASS_END        0x800d0000u  /* end of non-empty class */
+#define META_CLASS_NOT        0x800e0000u  /* start non-empty negative class */
+#define META_COND_ASSERT      0x800f0000u  /* (?(?assertion)... */
+#define META_COND_DEFINE      0x80100000u  /* (?(DEFINE)... */
+#define META_COND_NAME        0x80110000u  /* (?(<name>)... */
+#define META_COND_NUMBER      0x80120000u  /* (?(digits)... */
+#define META_COND_RNAME       0x80130000u  /* (?(R&name)... */
+#define META_COND_RNUMBER     0x80140000u  /* (?(Rdigits)... */
+#define META_COND_VERSION     0x80150000u  /* (?(VERSION<op>x.y)... */
+#define META_OFFSET           0x80160000u  /* Setting offset for various
+                                              META codes (e.g. META_SCS_NAME) */
+#define META_SCS              0x80170000u  /* (*scan_substring:... */
+#define META_SCS_NAME         0x80180000u  /* Next <name> of scan_substring */
+#define META_SCS_NUMBER       0x80190000u  /* Next digits of scan_substring */
+#define META_DOLLAR           0x801a0000u  /* $ metacharacter */
+#define META_DOT              0x801b0000u  /* . metacharacter */
+#define META_ESCAPE           0x801c0000u  /* \d and friends */
+#define META_KET              0x801d0000u  /* closing parenthesis */
+#define META_NOCAPTURE        0x801e0000u  /* no capture parens */
+#define META_OPTIONS          0x801f0000u  /* (?i) and friends */
+#define META_POSIX            0x80200000u  /* POSIX class item */
+#define META_POSIX_NEG        0x80210000u  /* negative POSIX class item */
+#define META_RANGE_ESCAPED    0x80220000u  /* range with at least one escape */
+#define META_RANGE_LITERAL    0x80230000u  /* range defined literally */
+#define META_RECURSE          0x80240000u  /* Recursion */
+#define META_RECURSE_BYNAME   0x80250000u  /* (?&name) */
+#define META_SCRIPT_RUN       0x80260000u  /* (*script_run:...) */
+
+/* These must be kept together to make it easy to check that an assertion
+is present where expected in a conditional group. */
+
+#define META_LOOKAHEAD        0x80270000u  /* (?= */
+#define META_LOOKAHEADNOT     0x80280000u  /* (?! */
+#define META_LOOKBEHIND       0x80290000u  /* (?<= */
+#define META_LOOKBEHINDNOT    0x802a0000u  /* (?<! */
+
+/* These cannot be conditions */
+
+#define META_LOOKAHEAD_NA     0x802b0000u  /* (*napla: */
+#define META_LOOKBEHIND_NA    0x802c0000u  /* (*naplb: */
+
+/* These must be kept in this order, with consecutive values, and the _ARG
+versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
+versions. */
+
+#define META_MARK             0x802d0000u  /* (*MARK) */
+#define META_ACCEPT           0x802e0000u  /* (*ACCEPT) */
+#define META_FAIL             0x802f0000u  /* (*FAIL) */
+#define META_COMMIT           0x80300000u  /* These               */
+#define META_COMMIT_ARG       0x80310000u  /*   pairs             */
+#define META_PRUNE            0x80320000u  /*     must            */
+#define META_PRUNE_ARG        0x80330000u  /*       be            */
+#define META_SKIP             0x80340000u  /*         kept        */
+#define META_SKIP_ARG         0x80350000u  /*           in        */
+#define META_THEN             0x80360000u  /*             this    */
+#define META_THEN_ARG         0x80370000u  /*               order */
+
+/* These must be kept in groups of adjacent 3 values, and all together. */
+
+#define META_ASTERISK         0x80380000u  /* *  */
+#define META_ASTERISK_PLUS    0x80390000u  /* *+ */
+#define META_ASTERISK_QUERY   0x803a0000u  /* *? */
+#define META_PLUS             0x803b0000u  /* +  */
+#define META_PLUS_PLUS        0x803c0000u  /* ++ */
+#define META_PLUS_QUERY       0x803d0000u  /* +? */
+#define META_QUERY            0x803e0000u  /* ?  */
+#define META_QUERY_PLUS       0x803f0000u  /* ?+ */
+#define META_QUERY_QUERY      0x80400000u  /* ?? */
+#define META_MINMAX           0x80410000u  /* {n,m}  repeat */
+#define META_MINMAX_PLUS      0x80420000u  /* {n,m}+ repeat */
+#define META_MINMAX_QUERY     0x80430000u  /* {n,m}? repeat */
+
+/* These meta codes must be kept in a group, with the OR/SUB/XOR in
+this order, and AND/NOT at the start/end. */
+
+#define META_ECLASS_AND       0x80440000u  /* && (or &) in a class */
+#define META_ECLASS_OR        0x80450000u  /* || (or |, +) in a class */
+#define META_ECLASS_SUB       0x80460000u  /* -- (or -) in a class */
+#define META_ECLASS_XOR       0x80470000u  /* ~~ (or ^) in a class */
+#define META_ECLASS_NOT       0x80480000u  /* ! in a class */
+
+/* Convenience aliases. */
+
+#define META_FIRST_QUANTIFIER META_ASTERISK
+#define META_LAST_QUANTIFIER  META_MINMAX_QUERY
+
+/* This is a special "meta code" that is used only to distinguish (*asr: from
+(*sr: in the table of alphabetic assertions. It is never stored in the parsed
+pattern because (*asr: is turned into (*sr:(*atomic: at that stage. There is
+therefore no need for it to have a length entry, so use a high value. */
+
+#define META_ATOMIC_SCRIPT_RUN 0x8fff0000u
+
+/* Macros for manipulating elements of the parsed pattern vector. */
+
+#define META_CODE(x)   (x & 0xffff0000u)
+#define META_DATA(x)   (x & 0x0000ffffu)
+#define META_DIFF(x,y) ((x-y)>>16)
+
+/* Extended class management flags. */
+
+#define CLASS_IS_ECLASS 0x1
+
+/* Macro for the highest character value. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define MAX_UCHAR_VALUE 0xffu
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+#define MAX_UCHAR_VALUE 0xffffu
+#else
+#define MAX_UCHAR_VALUE 0xffffffffu
+#endif
+
+#define GET_MAX_CHAR_VALUE(utf) \
+  ((utf) ? MAX_UTF_CODE_POINT : MAX_UCHAR_VALUE)
+
+/* Macro for setting individual bits in class bitmaps. */
+
+#define SETBIT(a,b) a[(b) >> 3] |= (uint8_t)(1u << ((b) & 0x7))
+
+/* Macro for 8 bit specific checks. */
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define SELECT_VALUE8(value8, value) (value8)
+#else
+#define SELECT_VALUE8(value8, value) (value)
+#endif
+
+/* Macro for aligning data. */
+#define CLIST_ALIGN_TO(base, align) \
+  ((base + ((size_t)(align) - 1)) & ~((size_t)(align) - 1))
+
+/* Structure for holding information about an OP_ECLASS internal operand.
+An "operand" here could be just a single OP_[X]CLASS, or it could be some
+complex expression; but it's some sequence of ECL_* codes which pushes one
+value to the stack. */
+typedef struct {
+  /* The position of the operand - or NULL if (lengthptr != NULL). */
+  PCRE2_UCHAR *code_start;
+  PCRE2_SIZE length;
+  /* The operand's type if it is a single code (ECL_XCLASS, ECL_ANY, ECL_NONE);
+  otherwise zero if the operand is not atomic. */
+  uint8_t op_single_type;
+  /* Regardless of whether it's a single code or not, we fully constant-fold
+  the bitmap for code points < 256. */
+  class_bits_storage bits;
+} eclass_op_info;
+
+/* Macros for the definitions below, to prevent name collisions. */
+
+#define _pcre2_posix_class_maps          PCRE2_SUFFIX(_pcre2_posix_class_maps)
+#define _pcre2_update_classbits          PCRE2_SUFFIX(_pcre2_update_classbits_)
+#define _pcre2_compile_class_nested      PCRE2_SUFFIX(_pcre2_compile_class_nested_)
+#define _pcre2_compile_class_not_nested  PCRE2_SUFFIX(_pcre2_compile_class_not_nested_)
+
+
+/* Indices of the POSIX classes in posix_names, posix_name_lengths,
+posix_class_maps, and posix_substitutes. They must be kept in sync. */
+
+#define PC_DIGIT   7
+#define PC_GRAPH   8
+#define PC_PRINT   9
+#define PC_PUNCT  10
+#define PC_XDIGIT 13
+
+extern const int PRIV(posix_class_maps)[];
+
+
+/* Set bits in classbits according to the property type */
+
+void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
+  uint8_t *classbits);
+
+/* Compile the META codes from start_ptr...end_ptr, writing a single OP_CLASS
+OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */
+
+uint32_t *PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
+  uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
+  int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr);
+
+/* Compile the META codes in pptr into opcodes written to pcode. The pptr must
+start at a META_CLASS or META_CLASS_NOT.
+
+The pptr will be left pointing at the matching META_CLASS_END. */
+
+BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
+  compile_block *cb, PCRE2_SIZE *lengthptr);
+
+#endif  /* PCRE2_COMPILE_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_compile.h */

+ 2737 - 0
thirdparty/pcre2/src/pcre2_compile_class.c

@@ -0,0 +1,2737 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_compile.h"
+
+typedef struct {
+  /* Option bits for eclass. */
+  uint32_t options;
+  uint32_t xoptions;
+  /* Rarely used members. */
+  int *errorcodeptr;
+  compile_block *cb;
+  /* Bitmap is needed. */
+  BOOL needs_bitmap;
+} eclass_context;
+
+/* Checks the allowed tokens at the end of a class structure in debug mode.
+When a new token is not processed by all loops, and the token is equals to
+a) one of the cases here:
+   the compiler will complain about a duplicated case value.
+b) none of the cases here:
+   the loop without the handler will stop with an assertion failure. */
+
+#ifdef PCRE2_DEBUG
+#define CLASS_END_CASES(meta) \
+  default: \
+  PCRE2_ASSERT((meta) <= META_END); \
+  /* Fall through */ \
+  case META_CLASS: \
+  case META_CLASS_NOT: \
+  case META_CLASS_EMPTY: \
+  case META_CLASS_EMPTY_NOT: \
+  case META_CLASS_END: \
+  case META_ECLASS_AND: \
+  case META_ECLASS_OR: \
+  case META_ECLASS_SUB: \
+  case META_ECLASS_XOR: \
+  case META_ECLASS_NOT:
+#else
+#define CLASS_END_CASES(meta) \
+  default:
+#endif
+
+#ifdef SUPPORT_WIDE_CHARS
+
+/* Heapsort algorithm. */
+
+static void do_heapify(uint32_t *buffer, size_t size, size_t i)
+{
+size_t max;
+size_t left;
+size_t right;
+uint32_t tmp1, tmp2;
+
+while (TRUE)
+  {
+  max = i;
+  left = (i << 1) + 2;
+  right = left + 2;
+
+  if (left < size && buffer[left] > buffer[max]) max = left;
+  if (right < size && buffer[right] > buffer[max]) max = right;
+  if (i == max) return;
+
+  /* Swap items. */
+  tmp1 = buffer[i];
+  tmp2 = buffer[i + 1];
+  buffer[i] = buffer[max];
+  buffer[i + 1] = buffer[max + 1];
+  buffer[max] = tmp1;
+  buffer[max + 1] = tmp2;
+  i = max;
+  }
+}
+
+#ifdef SUPPORT_UNICODE
+
+#define PARSE_CLASS_UTF               0x1
+#define PARSE_CLASS_CASELESS_UTF      0x2
+#define PARSE_CLASS_RESTRICTED_UTF    0x4
+#define PARSE_CLASS_TURKISH_UTF       0x8
+
+/* Get the range of nocase characters which includes the
+'c' character passed as argument, or directly follows 'c'. */
+
+static const uint32_t*
+get_nocase_range(uint32_t c)
+{
+uint32_t left = 0;
+uint32_t right = PRIV(ucd_nocase_ranges_size);
+uint32_t middle;
+
+if (c > MAX_UTF_CODE_POINT) return PRIV(ucd_nocase_ranges) + right;
+
+while (TRUE)
+  {
+  /* Range end of the middle element. */
+  middle = ((left + right) >> 1) | 0x1;
+
+  if (PRIV(ucd_nocase_ranges)[middle] <= c)
+    left = middle + 1;
+  else if (middle > 1 && PRIV(ucd_nocase_ranges)[middle - 2] > c)
+    right = middle - 1;
+  else
+    return PRIV(ucd_nocase_ranges) + (middle - 1);
+  }
+}
+
+/* Get the list of othercase characters, which belongs to the passed range.
+Create ranges from these characters, and append them to the buffer argument. */
+
+static size_t
+utf_caseless_extend(uint32_t start, uint32_t end, uint32_t options,
+  uint32_t *buffer)
+{
+uint32_t new_start = start;
+uint32_t new_end = end;
+uint32_t c = start;
+const uint32_t *list;
+uint32_t tmp[3];
+size_t result = 2;
+const uint32_t *skip_range = get_nocase_range(c);
+uint32_t skip_start = skip_range[0];
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+PCRE2_ASSERT(options & PARSE_CLASS_UTF);
+#endif
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (end > MAX_UTF_CODE_POINT) end = MAX_UTF_CODE_POINT;
+#endif
+
+while (c <= end)
+  {
+  uint32_t co;
+
+  if (c > skip_start)
+    {
+    c = skip_range[1];
+    skip_range += 2;
+    skip_start = skip_range[0];
+    continue;
+    }
+
+  /* Compute caseless set. */
+
+  if ((options & (PARSE_CLASS_TURKISH_UTF|PARSE_CLASS_RESTRICTED_UTF)) ==
+        PARSE_CLASS_TURKISH_UTF &&
+      UCD_ANY_I(c))
+    {
+    co = PRIV(ucd_turkish_dotted_i_caseset) + (UCD_DOTTED_I(c)? 0 : 3);
+    }
+  else if ((co = UCD_CASESET(c)) != 0 &&
+           (options & PARSE_CLASS_RESTRICTED_UTF) != 0 &&
+           PRIV(ucd_caseless_sets)[co] < 128)
+    {
+    co = 0;  /* Ignore the caseless set if it's restricted. */
+    }
+
+  if (co != 0)
+    list = PRIV(ucd_caseless_sets) + co;
+  else
+    {
+    co = UCD_OTHERCASE(c);
+    list = tmp;
+    tmp[0] = c;
+    tmp[1] = NOTACHAR;
+
+    if (co != c)
+      {
+      tmp[1] = co;
+      tmp[2] = NOTACHAR;
+      }
+    }
+  c++;
+
+  /* Add characters. */
+  do
+    {
+#if PCRE2_CODE_UNIT_WIDTH == 16
+    if (!(options & PARSE_CLASS_UTF) && *list > 0xffff) continue;
+#endif
+
+    if (*list < new_start)
+      {
+      if (*list + 1 == new_start)
+        {
+        new_start--;
+        continue;
+        }
+      }
+    else if (*list > new_end)
+      {
+      if (*list - 1 == new_end)
+        {
+        new_end++;
+        continue;
+        }
+      }
+    else continue;
+
+    result += 2;
+    if (buffer != NULL)
+      {
+      buffer[0] = *list;
+      buffer[1] = *list;
+      buffer += 2;
+      }
+    }
+  while (*(++list) != NOTACHAR);
+  }
+
+  if (buffer != NULL)
+    {
+    buffer[0] = new_start;
+    buffer[1] = new_end;
+    buffer += 2;
+    (void)buffer;
+    }
+  return result;
+}
+
+#endif
+
+/* Add a character list to a buffer. */
+
+static size_t
+append_char_list(const uint32_t *p, uint32_t *buffer)
+{
+const uint32_t *n;
+size_t result = 0;
+
+while (*p != NOTACHAR)
+  {
+  n = p;
+  while (n[0] == n[1] - 1) n++;
+
+  PCRE2_ASSERT(*p < 0xffff);
+
+  if (buffer != NULL)
+    {
+    buffer[0] = *p;
+    buffer[1] = *n;
+    buffer += 2;
+    }
+
+  result += 2;
+  p = n + 1;
+  }
+
+  return result;
+}
+
+static uint32_t
+get_highest_char(uint32_t options)
+{
+(void)options; /* Avoid compiler warning. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return MAX_UTF_CODE_POINT;
+#else
+#ifdef SUPPORT_UNICODE
+return GET_MAX_CHAR_VALUE((options & PARSE_CLASS_UTF) != 0);
+#else
+return MAX_UCHAR_VALUE;
+#endif
+#endif
+}
+
+/* Add a negated character list to a buffer. */
+static size_t
+append_negated_char_list(const uint32_t *p, uint32_t options, uint32_t *buffer)
+{
+const uint32_t *n;
+uint32_t start = 0;
+size_t result = 2;
+
+PCRE2_ASSERT(*p > 0);
+
+while (*p != NOTACHAR)
+  {
+  n = p;
+  while (n[0] == n[1] - 1) n++;
+
+  PCRE2_ASSERT(*p < 0xffff);
+
+  if (buffer != NULL)
+    {
+    buffer[0] = start;
+    buffer[1] = *p - 1;
+    buffer += 2;
+    }
+
+  result += 2;
+  start = *n + 1;
+  p = n + 1;
+  }
+
+  if (buffer != NULL)
+    {
+    buffer[0] = start;
+    buffer[1] = get_highest_char(options);
+    buffer += 2;
+    (void)buffer;
+    }
+
+  return result;
+}
+
+static uint32_t *
+append_non_ascii_range(uint32_t options, uint32_t *buffer)
+{
+  if (buffer == NULL) return NULL;
+
+  buffer[0] = 0x100;
+  buffer[1] = get_highest_char(options);
+  return buffer + 2;
+}
+
+static size_t
+parse_class(uint32_t *ptr, uint32_t options, uint32_t *buffer)
+{
+size_t total_size = 0;
+size_t size;
+uint32_t meta_arg;
+uint32_t start_char;
+
+while (TRUE)
+  {
+  switch (META_CODE(*ptr))
+    {
+    case META_ESCAPE:
+      meta_arg = META_DATA(*ptr);
+      switch (meta_arg)
+        {
+        case ESC_D:
+        case ESC_W:
+        case ESC_S:
+        buffer = append_non_ascii_range(options, buffer);
+        total_size += 2;
+        break;
+
+        case ESC_h:
+        size = append_char_list(PRIV(hspace_list), buffer);
+        total_size += size;
+        if (buffer != NULL) buffer += size;
+        break;
+
+        case ESC_H:
+        size = append_negated_char_list(PRIV(hspace_list), options, buffer);
+        total_size += size;
+        if (buffer != NULL) buffer += size;
+        break;
+
+        case ESC_v:
+        size = append_char_list(PRIV(vspace_list), buffer);
+        total_size += size;
+        if (buffer != NULL) buffer += size;
+        break;
+
+        case ESC_V:
+        size = append_negated_char_list(PRIV(vspace_list), options, buffer);
+        total_size += size;
+        if (buffer != NULL) buffer += size;
+        break;
+
+        case ESC_p:
+        case ESC_P:
+        ptr++;
+        if (meta_arg == ESC_p && (*ptr >> 16) == PT_ANY)
+          {
+          if (buffer != NULL)
+            {
+            buffer[0] = 0;
+            buffer[1] = get_highest_char(options);
+            buffer += 2;
+            }
+          total_size += 2;
+          }
+        break;
+        }
+      ptr++;
+      continue;
+    case META_POSIX_NEG:
+      buffer = append_non_ascii_range(options, buffer);
+      total_size += 2;
+      ptr += 2;
+      continue;
+    case META_POSIX:
+      ptr += 2;
+      continue;
+    case META_BIGVALUE:
+      /* Character literal */
+      ptr++;
+      break;
+    CLASS_END_CASES(*ptr)
+      if (*ptr >= META_END) return total_size;
+      break;
+    }
+
+    start_char = *ptr;
+
+    if (ptr[1] == META_RANGE_LITERAL || ptr[1] == META_RANGE_ESCAPED)
+      {
+      ptr += 2;
+      PCRE2_ASSERT(*ptr < META_END || *ptr == META_BIGVALUE);
+
+      if (*ptr == META_BIGVALUE) ptr++;
+
+#ifdef EBCDIC
+#error "Missing EBCDIC support"
+#endif
+      }
+
+#ifdef SUPPORT_UNICODE
+    if (options & PARSE_CLASS_CASELESS_UTF)
+      {
+      size = utf_caseless_extend(start_char, *ptr++, options, buffer);
+      if (buffer != NULL) buffer += size;
+      total_size += size;
+      continue;
+      }
+#endif
+
+    if (buffer != NULL)
+      {
+      buffer[0] = start_char;
+      buffer[1] = *ptr;
+      buffer += 2;
+      }
+
+    ptr++;
+    total_size += 2;
+  }
+
+  return total_size;
+}
+
+/* Extra uint32_t values for storing the lengths of range lists in
+the worst case. Two uint32_t lengths and a range end for a range
+starting before 255 */
+#define CHAR_LIST_EXTRA_SIZE 3
+
+/* Starting character values for each character list. */
+
+static const uint32_t char_list_starts[] = {
+#if PCRE2_CODE_UNIT_WIDTH == 32
+  XCL_CHAR_LIST_HIGH_32_START,
+#endif
+#if PCRE2_CODE_UNIT_WIDTH == 32 || defined SUPPORT_UNICODE
+  XCL_CHAR_LIST_LOW_32_START,
+#endif
+  XCL_CHAR_LIST_HIGH_16_START,
+  /* Must be terminated by XCL_CHAR_LIST_LOW_16_START,
+  which also represents the end of the bitset. */
+  XCL_CHAR_LIST_LOW_16_START,
+};
+
+static class_ranges *
+compile_optimize_class(uint32_t *start_ptr, uint32_t options,
+  uint32_t xoptions, compile_block *cb)
+{
+class_ranges* cranges;
+uint32_t *ptr;
+uint32_t *buffer;
+uint32_t *dst;
+uint32_t class_options = 0;
+size_t range_list_size = 0, total_size, i;
+uint32_t tmp1, tmp2;
+const uint32_t *char_list_next;
+uint16_t *next_char;
+uint32_t char_list_start, char_list_end;
+uint32_t range_start, range_end;
+
+#ifdef SUPPORT_UNICODE
+if (options & PCRE2_UTF)
+  class_options |= PARSE_CLASS_UTF;
+
+if ((options & PCRE2_CASELESS) && (options & (PCRE2_UTF|PCRE2_UCP)))
+  class_options |= PARSE_CLASS_CASELESS_UTF;
+
+if (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT)
+  class_options |= PARSE_CLASS_RESTRICTED_UTF;
+
+if (xoptions & PCRE2_EXTRA_TURKISH_CASING)
+  class_options |= PARSE_CLASS_TURKISH_UTF;
+#endif
+
+/* Compute required space for the range. */
+
+range_list_size = parse_class(start_ptr, class_options, NULL);
+PCRE2_ASSERT((range_list_size & 0x1) == 0);
+
+/* Allocate buffer. The total_size also represents the end of the buffer. */
+
+total_size = range_list_size +
+   ((range_list_size >= 2) ? CHAR_LIST_EXTRA_SIZE : 0);
+
+cranges = cb->cx->memctl.malloc(
+  sizeof(class_ranges) + total_size * sizeof(uint32_t),
+  cb->cx->memctl.memory_data);
+
+if (cranges == NULL) return NULL;
+
+cranges->next = NULL;
+cranges->range_list_size = (uint16_t)range_list_size;
+cranges->char_lists_types = 0;
+cranges->char_lists_size = 0;
+cranges->char_lists_start = 0;
+
+if (range_list_size == 0) return cranges;
+
+buffer = (uint32_t*)(cranges + 1);
+parse_class(start_ptr, class_options, buffer);
+
+/* Using <= instead of == to help static analysis. */
+if (range_list_size <= 2) return cranges;
+
+/* In-place sorting of ranges. */
+
+i = (((range_list_size >> 2) - 1) << 1);
+while (TRUE)
+  {
+  do_heapify(buffer, range_list_size, i);
+  if (i == 0) break;
+  i -= 2;
+  }
+
+i = range_list_size - 2;
+while (TRUE)
+  {
+  tmp1 = buffer[i];
+  tmp2 = buffer[i + 1];
+  buffer[i] = buffer[0];
+  buffer[i + 1] = buffer[1];
+  buffer[0] = tmp1;
+  buffer[1] = tmp2;
+
+  do_heapify(buffer, i, 0);
+  if (i == 0) break;
+  i -= 2;
+  }
+
+/* Merge ranges whenever possible. */
+dst = buffer;
+ptr = buffer + 2;
+range_list_size -= 2;
+
+/* The second condition is a very rare corner case, where the end of the last
+range is the maximum character. This range cannot be extended further. */
+
+while (range_list_size > 0 && dst[1] != ~(uint32_t)0)
+  {
+  if (dst[1] + 1 < ptr[0])
+    {
+    dst += 2;
+    dst[0] = ptr[0];
+    dst[1] = ptr[1];
+    }
+  else if (dst[1] < ptr[1]) dst[1] = ptr[1];
+
+  ptr += 2;
+  range_list_size -= 2;
+  }
+
+PCRE2_ASSERT(dst[1] <= get_highest_char(class_options));
+
+/* When the number of ranges are less than six,
+they are not converted to range lists. */
+
+ptr = buffer;
+while (ptr < dst && ptr[1] < 0x100) ptr += 2;
+if (dst - ptr < (2 * (6 - 1)))
+  {
+  cranges->range_list_size = (uint16_t)(dst + 2 - buffer);
+  return cranges;
+  }
+
+/* Compute character lists structures. */
+
+char_list_next = char_list_starts;
+char_list_start = *char_list_next++;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+char_list_end = XCL_CHAR_LIST_HIGH_32_END;
+#elif defined SUPPORT_UNICODE
+char_list_end = XCL_CHAR_LIST_LOW_32_END;
+#else
+char_list_end = XCL_CHAR_LIST_HIGH_16_END;
+#endif
+next_char = (uint16_t*)(buffer + total_size);
+
+tmp1 = 0;
+tmp2 = ((sizeof(char_list_starts) / sizeof(uint32_t)) - 1) * XCL_TYPE_BIT_LEN;
+PCRE2_ASSERT(tmp2 <= 3 * XCL_TYPE_BIT_LEN && tmp2 >= XCL_TYPE_BIT_LEN);
+range_start = dst[0];
+range_end = dst[1];
+
+while (TRUE)
+  {
+  if (range_start >= char_list_start)
+    {
+    if (range_start == range_end || range_end < char_list_end)
+      {
+      tmp1++;
+      next_char--;
+
+      if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
+        *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
+      else
+        *(uint32_t*)(--next_char) =
+          (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
+      }
+
+    if (range_start < range_end)
+      {
+      if (range_start > char_list_start)
+        {
+        tmp1++;
+        next_char--;
+
+        if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
+          *next_char = (uint16_t)(range_start << XCL_CHAR_SHIFT);
+        else
+          *(uint32_t*)(--next_char) = (range_start << XCL_CHAR_SHIFT);
+        }
+      else
+        cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
+      }
+
+    PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
+
+    if (dst > buffer)
+      {
+      dst -= 2;
+      range_start = dst[0];
+      range_end = dst[1];
+      continue;
+      }
+
+    range_start = 0;
+    range_end = 0;
+    }
+
+  if (range_end >= char_list_start)
+    {
+    PCRE2_ASSERT(range_start < char_list_start);
+
+    if (range_end < char_list_end)
+      {
+      tmp1++;
+      next_char--;
+
+      if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
+        *next_char = (uint16_t)((range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END);
+      else
+        *(uint32_t*)(--next_char) =
+          (range_end << XCL_CHAR_SHIFT) | XCL_CHAR_END;
+
+      PCRE2_ASSERT((uint32_t*)next_char >= dst + 2);
+      }
+
+    cranges->char_lists_types |= XCL_BEGIN_WITH_RANGE << tmp2;
+    }
+
+  if (tmp1 >= XCL_ITEM_COUNT_MASK)
+    {
+    cranges->char_lists_types |= XCL_ITEM_COUNT_MASK << tmp2;
+    next_char--;
+
+    if (char_list_start < XCL_CHAR_LIST_LOW_32_START)
+      *next_char = (uint16_t)tmp1;
+    else
+      *(uint32_t*)(--next_char) = tmp1;
+    }
+  else
+    cranges->char_lists_types |= tmp1 << tmp2;
+
+  if (range_start < XCL_CHAR_LIST_LOW_16_START) break;
+
+  PCRE2_ASSERT(tmp2 >= XCL_TYPE_BIT_LEN);
+  char_list_end = char_list_start - 1;
+  char_list_start = *char_list_next++;
+  tmp1 = 0;
+  tmp2 -= XCL_TYPE_BIT_LEN;
+  }
+
+if (dst[0] < XCL_CHAR_LIST_LOW_16_START) dst += 2;
+PCRE2_ASSERT((uint16_t*)dst <= next_char);
+
+cranges->char_lists_size =
+  (size_t)((uint8_t*)(buffer + total_size) - (uint8_t*)next_char);
+cranges->char_lists_start = (size_t)((uint8_t*)next_char - (uint8_t*)buffer);
+cranges->range_list_size = (uint16_t)(dst - buffer);
+return cranges;
+}
+
+#endif /* SUPPORT_WIDE_CHARS */
+
+#ifdef SUPPORT_UNICODE
+
+void PRIV(update_classbits)(uint32_t ptype, uint32_t pdata, BOOL negated,
+  uint8_t *classbits)
+{
+/* Update PRIV(xclass) when this function is changed. */
+int c, chartype;
+const ucd_record *prop;
+uint32_t gentype;
+BOOL set_bit;
+
+if (ptype == PT_ANY)
+  {
+  if (!negated) memset(classbits, 0xff, 32);
+  return;
+  }
+
+for (c = 0; c < 256; c++)
+  {
+  prop = GET_UCD(c);
+  set_bit = FALSE;
+  (void)set_bit;
+
+  switch (ptype)
+    {
+    case PT_LAMP:
+    chartype = prop->chartype;
+    set_bit = (chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt);
+    break;
+
+    case PT_GC:
+    set_bit = (PRIV(ucp_gentype)[prop->chartype] == pdata);
+    break;
+
+    case PT_PC:
+    set_bit = (prop->chartype == pdata);
+    break;
+
+    case PT_SC:
+    set_bit = (prop->script == pdata);
+    break;
+
+    case PT_SCX:
+    set_bit = (prop->script == pdata ||
+      MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
+    break;
+
+    case PT_ALNUM:
+    gentype = PRIV(ucp_gentype)[prop->chartype];
+    set_bit = (gentype == ucp_L || gentype == ucp_N);
+    break;
+
+    case PT_SPACE:    /* Perl space */
+    case PT_PXSPACE:  /* POSIX space */
+    switch(c)
+      {
+      HSPACE_BYTE_CASES:
+      VSPACE_BYTE_CASES:
+      set_bit = TRUE;
+      break;
+
+      default:
+      set_bit = (PRIV(ucp_gentype)[prop->chartype] == ucp_Z);
+      break;
+      }
+    break;
+
+    case PT_WORD:
+    chartype = prop->chartype;
+    gentype = PRIV(ucp_gentype)[chartype];
+    set_bit = (gentype == ucp_L || gentype == ucp_N ||
+               chartype == ucp_Mn || chartype == ucp_Pc);
+    break;
+
+    case PT_UCNC:
+    set_bit = (c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
+               c == CHAR_GRAVE_ACCENT || c >= 0xa0);
+    break;
+
+    case PT_BIDICL:
+    set_bit = (UCD_BIDICLASS_PROP(prop) == pdata);
+    break;
+
+    case PT_BOOL:
+    set_bit = MAPBIT(PRIV(ucd_boolprop_sets) +
+                     UCD_BPROPS_PROP(prop), pdata) != 0;
+    break;
+
+    case PT_PXGRAPH:
+    chartype = prop->chartype;
+    gentype = PRIV(ucp_gentype)[chartype];
+    set_bit = (gentype != ucp_Z && (gentype != ucp_C || chartype == ucp_Cf));
+    break;
+
+    case PT_PXPRINT:
+    chartype = prop->chartype;
+    set_bit = (chartype != ucp_Zl && chartype != ucp_Zp &&
+       (PRIV(ucp_gentype)[chartype] != ucp_C || chartype == ucp_Cf));
+    break;
+
+    case PT_PXPUNCT:
+    gentype = PRIV(ucp_gentype)[prop->chartype];
+    set_bit = (gentype == ucp_P || (c < 128 && gentype == ucp_S));
+    break;
+
+    default:
+    PCRE2_ASSERT(ptype == PT_PXXDIGIT);
+    set_bit = (c >= CHAR_0 && c <= CHAR_9) ||
+              (c >= CHAR_A && c <= CHAR_F) ||
+              (c >= CHAR_a && c <= CHAR_f);
+    break;
+    }
+
+  if (negated) set_bit = !set_bit;
+  if (set_bit) *classbits |= (uint8_t)(1 << (c & 0x7));
+  if ((c & 0x7) == 0x7) classbits++;
+  }
+}
+
+#endif /* SUPPORT_UNICODE */
+
+
+
+#ifdef SUPPORT_WIDE_CHARS
+
+/*************************************************
+*           XClass related properties            *
+*************************************************/
+
+/* XClass needs to be generated. */
+#define XCLASS_REQUIRED 0x1
+/* XClass has 8 bit character. */
+#define XCLASS_HAS_8BIT_CHARS 0x2
+/* XClass has properties. */
+#define XCLASS_HAS_PROPS 0x4
+/* XClass has character lists. */
+#define XCLASS_HAS_CHAR_LISTS 0x8
+/* XClass matches to all >= 256 characters. */
+#define XCLASS_HIGH_ANY 0x10
+
+#endif
+
+
+/*************************************************
+*   Internal entry point for add range to class  *
+*************************************************/
+
+/* This function sets the overall range for characters < 256.
+It also handles non-utf case folding.
+
+Arguments:
+  options       the options bits
+  xoptions      the extra options bits
+  cb            compile data
+  start         start of range character
+  end           end of range character
+
+Returns:        cb->classbits is updated
+*/
+
+static void
+add_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
+  uint32_t start, uint32_t end)
+{
+uint8_t *classbits = cb->classbits.classbits;
+uint32_t c, byte_start, byte_end;
+uint32_t classbits_end = (end <= 0xff ? end : 0xff);
+
+/* If caseless matching is required, scan the range and process alternate
+cases. In Unicode, there are 8-bit characters that have alternate cases that
+are greater than 255 and vice-versa (though these may be ignored if caseless
+restriction is in force). Sometimes we can just extend the original range. */
+
+if ((options & PCRE2_CASELESS) != 0)
+  {
+#ifdef SUPPORT_UNICODE
+  /* UTF mode. This branch is taken if we don't support wide characters (e.g.
+  8-bit library, without UTF), but we do treat those characters as Unicode
+  (if UCP flag is set). In this case, we only need to expand the character class
+  set to include the case pairs which are in the 0-255 codepoint range. */
+  if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
+    {
+      BOOL turkish_i = (xoptions & (PCRE2_EXTRA_TURKISH_CASING|PCRE2_EXTRA_CASELESS_RESTRICT)) ==
+        PCRE2_EXTRA_TURKISH_CASING;
+      if (start < 128)
+        {
+        uint32_t lo_end = (classbits_end < 127 ? classbits_end : 127);
+        for (c = start; c <= lo_end; c++)
+          {
+          if (turkish_i && UCD_ANY_I(c)) continue;
+          SETBIT(classbits, cb->fcc[c]);
+          }
+        }
+      if (classbits_end >= 128)
+        {
+        uint32_t hi_start = (start > 128 ? start : 128);
+        for (c = hi_start; c <= classbits_end; c++)
+          {
+          uint32_t co = UCD_OTHERCASE(c);
+          if (co <= 0xff) SETBIT(classbits, co);
+          }
+        }
+    }
+
+  else
+#endif  /* SUPPORT_UNICODE */
+
+  /* Not UTF mode */
+    {
+    for (c = start; c <= classbits_end; c++)
+      SETBIT(classbits, cb->fcc[c]);
+    }
+  }
+
+/* Use the bitmap for characters < 256. Otherwise use extra data. */
+
+byte_start = (start + 7) >> 3;
+byte_end = (classbits_end + 1) >> 3;
+
+if (byte_start >= byte_end)
+  {
+  for (c = start; c <= classbits_end; c++)
+    /* Regardless of start, c will always be <= 255. */
+    SETBIT(classbits, c);
+  return;
+  }
+
+for (c = byte_start; c < byte_end; c++)
+  classbits[c] = 0xff;
+
+byte_start <<= 3;
+byte_end <<= 3;
+
+for (c = start; c < byte_start; c++)
+  SETBIT(classbits, c);
+
+for (c = byte_end; c <= classbits_end; c++)
+  SETBIT(classbits, c);
+}
+
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+/*************************************************
+*   Internal entry point for add list to class   *
+*************************************************/
+
+/* This function is used for adding a list of horizontal or vertical whitespace
+characters to a class. The list must be in order so that ranges of characters
+can be detected and handled appropriately. This function sets the overall range
+so that the internal functions can try to avoid duplication when handling
+case-independence.
+
+Arguments:
+  options       the options bits
+  xoptions      the extra options bits
+  cb            contains pointers to tables etc.
+  p             points to row of 32-bit values, terminated by NOTACHAR
+
+Returns:        cb->classbits is updated
+*/
+
+static void
+add_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
+  const uint32_t *p)
+{
+while (p[0] < 256)
+  {
+  unsigned int n = 0;
+
+  while(p[n+1] == p[0] + n + 1) n++;
+  add_to_class(options, xoptions, cb, p[0], p[n]);
+
+  p += n + 1;
+  }
+}
+
+
+
+/*************************************************
+*    Add characters not in a list to a class     *
+*************************************************/
+
+/* This function is used for adding the complement of a list of horizontal or
+vertical whitespace to a class. The list must be in order.
+
+Arguments:
+  options       the options bits
+  xoptions      the extra options bits
+  cb            contains pointers to tables etc.
+  p             points to row of 32-bit values, terminated by NOTACHAR
+
+Returns:        cb->classbits is updated
+*/
+
+static void
+add_not_list_to_class(uint32_t options, uint32_t xoptions, compile_block *cb,
+  const uint32_t *p)
+{
+if (p[0] > 0)
+  add_to_class(options, xoptions, cb, 0, p[0] - 1);
+while (p[0] < 256)
+  {
+  while (p[1] == p[0] + 1) p++;
+  add_to_class(options, xoptions, cb, p[0] + 1, (p[1] > 255) ? 255 : p[1] - 1);
+  p++;
+  }
+}
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+
+
+
+/*************************************************
+*  Main entry-point to compile a character class *
+*************************************************/
+
+/* This function consumes a "leaf", which is a set of characters that will
+become a single OP_CLASS OP_NCLASS, OP_XCLASS, or OP_ALLANY. */
+
+uint32_t *
+PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
+  uint32_t *start_ptr, PCRE2_UCHAR **pcode, BOOL negate_class, BOOL* has_bitmap,
+  int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
+{
+uint32_t *pptr = start_ptr;
+PCRE2_UCHAR *code = *pcode;
+BOOL should_flip_negation;
+const uint8_t *cbits = cb->cbits;
+/* Some functions such as add_to_class() or eclass processing
+expects that the bitset is stored in cb->classbits.classbits. */
+uint8_t *const classbits = cb->classbits.classbits;
+
+#ifdef SUPPORT_UNICODE
+BOOL utf = (options & PCRE2_UTF) != 0;
+#else  /* No Unicode support */
+BOOL utf = FALSE;
+#endif
+
+/* Helper variables for OP_XCLASS opcode (for characters > 255). */
+
+#ifdef SUPPORT_WIDE_CHARS
+uint32_t xclass_props;
+PCRE2_UCHAR *class_uchardata;
+class_ranges* cranges;
+#endif
+
+/* If an XClass contains a negative special such as \S, we need to flip the
+negation flag at the end, so that support for characters > 255 works correctly
+(they are all included in the class). An XClass may need to insert specific
+matching or non-matching code for wide characters.
+*/
+
+should_flip_negation = FALSE;
+
+/* XClass will be used when characters > 255 might match. */
+
+#ifdef SUPPORT_WIDE_CHARS
+xclass_props = 0;
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+cranges = NULL;
+
+if (utf)
+#endif
+  {
+  if (lengthptr != NULL)
+    {
+    cranges = compile_optimize_class(pptr, options, xoptions, cb);
+
+    if (cranges == NULL)
+      {
+      *errorcodeptr = ERR21;
+      return NULL;
+      }
+
+    /* Caching the pre-processed character ranges. */
+    if (cb->next_cranges != NULL)
+      cb->next_cranges->next = cranges;
+    else
+      cb->cranges = cranges;
+
+    cb->next_cranges = cranges;
+    }
+  else
+    {
+    /* Reuse the pre-processed character ranges. */
+    cranges = cb->cranges;
+    PCRE2_ASSERT(cranges != NULL);
+    cb->cranges = cranges->next;
+    }
+
+  if (cranges->range_list_size > 0)
+    {
+    const uint32_t *ranges = (const uint32_t*)(cranges + 1);
+
+    if (ranges[0] <= 255)
+      xclass_props |= XCLASS_HAS_8BIT_CHARS;
+
+    if (ranges[cranges->range_list_size - 1] == GET_MAX_CHAR_VALUE(utf) &&
+        ranges[cranges->range_list_size - 2] <= 256)
+      xclass_props |= XCLASS_HIGH_ANY;
+    }
+  }
+
+class_uchardata = code + LINK_SIZE + 2;   /* For XCLASS items */
+#endif /* SUPPORT_WIDE_CHARS */
+
+/* Initialize the 256-bit (32-byte) bit map to all zeros. We build the map
+in a temporary bit of memory, in case the class contains fewer than two
+8-bit characters because in that case the compiled code doesn't use the bit
+map. */
+
+memset(classbits, 0, 32);
+
+/* Process items until end_ptr is reached. */
+
+while (TRUE)
+  {
+  uint32_t meta = *(pptr++);
+  BOOL local_negate;
+  int posix_class;
+  int taboffset, tabopt;
+  class_bits_storage pbits;
+  uint32_t escape, c;
+
+  /* Handle POSIX classes such as [:alpha:] etc. */
+  switch (META_CODE(meta))
+    {
+    case META_POSIX:
+    case META_POSIX_NEG:
+
+    local_negate = (meta == META_POSIX_NEG);
+    posix_class = *(pptr++);
+
+    if (local_negate) should_flip_negation = TRUE;  /* Note negative special */
+
+    /* If matching is caseless, upper and lower are converted to alpha.
+    This relies on the fact that the class table starts with alpha,
+    lower, upper as the first 3 entries. */
+
+    if ((options & PCRE2_CASELESS) != 0 && posix_class <= 2)
+      posix_class = 0;
+
+    /* When PCRE2_UCP is set, some of the POSIX classes are converted to
+    different escape sequences that use Unicode properties \p or \P.
+    Others that are not available via \p or \P have to generate
+    XCL_PROP/XCL_NOTPROP directly, which is done here. */
+
+#ifdef SUPPORT_UNICODE
+    /* TODO This entire block of code here appears to be unreachable!? I simply
+    can't see how it can be hit, given that the frontend parser doesn't emit
+    META_POSIX for GRAPH/PRINT/PUNCT when UCP is set. */
+    if ((options & PCRE2_UCP) != 0 &&
+        (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
+      {
+      uint32_t ptype;
+
+      switch(posix_class)
+        {
+        case PC_GRAPH:
+        case PC_PRINT:
+        case PC_PUNCT:
+        ptype = (posix_class == PC_GRAPH)? PT_PXGRAPH :
+                (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT;
+
+        PRIV(update_classbits)(ptype, 0, local_negate, classbits);
+
+        if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+          {
+          if (lengthptr != NULL)
+            *lengthptr += 3;
+          else
+            {
+            *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+            *class_uchardata++ = (PCRE2_UCHAR)ptype;
+            *class_uchardata++ = 0;
+            }
+          xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+          }
+        continue;
+
+        /* For the other POSIX classes (ex: ascii) we are going to
+        fall through to the non-UCP case and build a bit map for
+        characters with code points less than 256. However, if we are in
+        a negated POSIX class, characters with code points greater than
+        255 must either all match or all not match, depending on whether
+        the whole class is not or is negated. For example, for
+        [[:^ascii:]... they must all match, whereas for [^[:^ascii:]...
+        they must not.
+
+        In the special case where there are no xclass items, this is
+        automatically handled by the use of OP_CLASS or OP_NCLASS, but an
+        explicit range is needed for OP_XCLASS. Setting a flag here
+        causes the range to be generated later when it is known that
+        OP_XCLASS is required. In the 8-bit library this is relevant only in
+        utf mode, since no wide characters can exist otherwise. */
+
+        default:
+        break;
+        }
+      }
+#endif  /* SUPPORT_UNICODE */
+
+    /* In the non-UCP case, or when UCP makes no difference, we build the
+    bit map for the POSIX class in a chunk of local store because we may
+    be adding and subtracting from it, and we don't want to subtract bits
+    that may be in the main map already. At the end we or the result into
+    the bit map that is being built. */
+
+    posix_class *= 3;
+
+    /* Copy in the first table (always present) */
+
+    memcpy(pbits.classbits, cbits + PRIV(posix_class_maps)[posix_class], 32);
+
+    /* If there is a second table, add or remove it as required. */
+
+    taboffset = PRIV(posix_class_maps)[posix_class + 1];
+    tabopt = PRIV(posix_class_maps)[posix_class + 2];
+
+    if (taboffset >= 0)
+      {
+      if (tabopt >= 0)
+        for (int i = 0; i < 32; i++)
+          pbits.classbits[i] |= cbits[i + taboffset];
+      else
+        for (int i = 0; i < 32; i++)
+          pbits.classbits[i] &= (uint8_t)(~cbits[i + taboffset]);
+      }
+
+    /* Now see if we need to remove any special characters. An option
+    value of 1 removes vertical space and 2 removes underscore. */
+
+    if (tabopt < 0) tabopt = -tabopt;
+    if (tabopt == 1) pbits.classbits[1] &= ~0x3c;
+      else if (tabopt == 2) pbits.classbits[11] &= 0x7f;
+
+    /* Add the POSIX table or its complement into the main table that is
+    being built and we are done. */
+
+      {
+      uint32_t *classwords = cb->classbits.classwords;
+
+      if (local_negate)
+        for (int i = 0; i < 8; i++)
+          classwords[i] |= (uint32_t)(~pbits.classwords[i]);
+      else
+        for (int i = 0; i < 8; i++)
+          classwords[i] |= pbits.classwords[i];
+      }
+
+#ifdef SUPPORT_WIDE_CHARS
+    /* Every class contains at least one < 256 character. */
+    xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
+    continue;               /* End of POSIX handling */
+
+    /* Other than POSIX classes, the only items we should encounter are
+    \d-type escapes and literal characters (possibly as ranges). */
+    case META_BIGVALUE:
+    meta = *(pptr++);
+    break;
+
+    case META_ESCAPE:
+    escape = META_DATA(meta);
+
+    switch(escape)
+      {
+      case ESC_d:
+      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
+      break;
+
+      case ESC_D:
+      should_flip_negation = TRUE;
+      for (int i = 0; i < 32; i++)
+        classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
+      break;
+
+      case ESC_w:
+      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
+      break;
+
+      case ESC_W:
+      should_flip_negation = TRUE;
+      for (int i = 0; i < 32; i++)
+        classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
+      break;
+
+      /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
+      5.18. Before PCRE 8.34, we had to preserve the VT bit if it was
+      previously set by something earlier in the character class.
+      Luckily, the value of CHAR_VT is 0x0b in both ASCII and EBCDIC, so
+      we could just adjust the appropriate bit. From PCRE 8.34 we no
+      longer treat \s and \S specially. */
+
+      case ESC_s:
+      for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
+      break;
+
+      case ESC_S:
+      should_flip_negation = TRUE;
+      for (int i = 0; i < 32; i++)
+        classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
+      break;
+
+      /* When adding the horizontal or vertical space lists to a class, or
+      their complements, disable PCRE2_CASELESS, because it justs wastes
+      time, and in the "not-x" UTF cases can create unwanted duplicates in
+      the XCLASS list (provoked by characters that have more than one other
+      case and by both cases being in the same "not-x" sublist). */
+
+      case ESC_h:
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+      if (cranges != NULL) break;
+#endif
+      add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
+        cb, PRIV(hspace_list));
+#else
+      PCRE2_ASSERT(cranges != NULL);
+#endif
+      break;
+
+      case ESC_H:
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+      if (cranges != NULL) break;
+#endif
+      add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
+        cb, PRIV(hspace_list));
+#else
+      PCRE2_ASSERT(cranges != NULL);
+#endif
+      break;
+
+      case ESC_v:
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+      if (cranges != NULL) break;
+#endif
+      add_list_to_class(options & ~PCRE2_CASELESS, xoptions,
+        cb, PRIV(vspace_list));
+#else
+      PCRE2_ASSERT(cranges != NULL);
+#endif
+      break;
+
+      case ESC_V:
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+      if (cranges != NULL) break;
+#endif
+      add_not_list_to_class(options & ~PCRE2_CASELESS, xoptions,
+        cb, PRIV(vspace_list));
+#else
+      PCRE2_ASSERT(cranges != NULL);
+#endif
+      break;
+
+      /* If Unicode is not supported, \P and \p are not allowed and are
+      faulted at parse time, so will never appear here. */
+
+#ifdef SUPPORT_UNICODE
+      case ESC_p:
+      case ESC_P:
+        {
+        uint32_t ptype = *pptr >> 16;
+        uint32_t pdata = *(pptr++) & 0xffff;
+
+        /* The "Any" is processed by PRIV(update_classbits)(). */
+        if (ptype == PT_ANY)
+          {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+          if (!utf && escape == ESC_p) memset(classbits, 0xff, 32);
+#endif
+          continue;
+          }
+
+        PRIV(update_classbits)(ptype, pdata, (escape == ESC_P), classbits);
+
+        if ((xclass_props & XCLASS_HIGH_ANY) == 0)
+          {
+          if (lengthptr != NULL)
+            *lengthptr += 3;
+          else
+            {
+            *class_uchardata++ = (escape == ESC_p)? XCL_PROP : XCL_NOTPROP;
+            *class_uchardata++ = ptype;
+            *class_uchardata++ = pdata;
+            }
+          xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_PROPS;
+          }
+        }
+      continue;
+#endif
+      }
+
+#ifdef SUPPORT_WIDE_CHARS
+    /* Every non-property class contains at least one < 256 character. */
+    xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
+    /* End handling \d-type escapes */
+    continue;
+
+    CLASS_END_CASES(meta)
+    /* Literals. */
+    if (meta < META_END) break;
+    /* Non-literals: end of class contents. */
+    goto END_PROCESSING;
+    }
+
+  /* A literal character may be followed by a range meta. At parse time
+  there are checks for out-of-order characters, for ranges where the two
+  characters are equal, and for hyphens that cannot indicate a range. At
+  this point, therefore, no checking is needed. */
+
+  c = meta;
+
+  /* Remember if \r or \n were explicitly used */
+
+  if (c == CHAR_CR || c == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
+
+  /* Process a character range */
+
+  if (*pptr == META_RANGE_LITERAL || *pptr == META_RANGE_ESCAPED)
+    {
+    uint32_t d;
+
+#ifdef EBCDIC
+    BOOL range_is_literal = (*pptr == META_RANGE_LITERAL);
+#endif
+    ++pptr;
+    d = *(pptr++);
+    if (d == META_BIGVALUE) d = *(pptr++);
+
+    /* Remember an explicit \r or \n, and add the range to the class. */
+
+    if (d == CHAR_CR || d == CHAR_NL) cb->external_flags |= PCRE2_HASCRORLF;
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+    if (cranges != NULL) continue;
+    xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
+
+    /* In an EBCDIC environment, Perl treats alphabetic ranges specially
+    because there are holes in the encoding, and simply using the range
+    A-Z (for example) would include the characters in the holes. This
+    applies only to literal ranges; [\xC1-\xE9] is different to [A-Z]. */
+
+#ifdef EBCDIC
+    if (range_is_literal &&
+         (cb->ctypes[c] & ctype_letter) != 0 &&
+         (cb->ctypes[d] & ctype_letter) != 0 &&
+         (c <= CHAR_z) == (d <= CHAR_z))
+      {
+      uint32_t uc = (d <= CHAR_z)? 0 : 64;
+      uint32_t C = c - uc;
+      uint32_t D = d - uc;
+
+      if (C <= CHAR_i)
+        {
+        add_to_class(options, xoptions, cb, C + uc,
+          ((D < CHAR_i)? D : CHAR_i) + uc);
+        C = CHAR_j;
+        }
+
+      if (C <= D && C <= CHAR_r)
+        {
+        add_to_class(options, xoptions, cb, C + uc,
+          ((D < CHAR_r)? D : CHAR_r) + uc);
+        C = CHAR_s;
+        }
+
+      if (C <= D)
+        add_to_class(options, xoptions, cb, C + uc, D + uc);
+      }
+    else
+#endif
+    /* Not an EBCDIC special range */
+
+    add_to_class(options, xoptions, cb, c, d);
+#else
+    PCRE2_ASSERT(cranges != NULL);
+#endif
+    continue;
+    }  /* End of range handling */
+
+  /* Character ranges are ignored when class_ranges is present. */
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#ifdef SUPPORT_UNICODE
+  if (cranges != NULL) continue;
+  xclass_props |= XCLASS_HAS_8BIT_CHARS;
+#endif
+  /* Handle a single character. */
+
+  add_to_class(options, xoptions, cb, meta, meta);
+#else
+  PCRE2_ASSERT(cranges != NULL);
+#endif
+  }   /* End of main class-processing loop */
+
+END_PROCESSING:
+
+#ifdef SUPPORT_WIDE_CHARS
+PCRE2_ASSERT((xclass_props & XCLASS_HAS_PROPS) == 0 ||
+             (xclass_props & XCLASS_HIGH_ANY) == 0);
+
+if (cranges != NULL)
+  {
+  uint32_t *range = (uint32_t*)(cranges + 1);
+  uint32_t *end = range + cranges->range_list_size;
+
+  while (range < end && range[0] < 256)
+    {
+    PCRE2_ASSERT((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0);
+    /* Add range to bitset. If we are in UTF or UCP mode, then clear the
+    caseless bit, because the cranges handle caselessness (only) in this
+    condition; see the condition for PARSE_CLASS_CASELESS_UTF in
+    compile_optimize_class(). */
+    add_to_class(((options & (PCRE2_UTF|PCRE2_UCP)) != 0)?
+        (options & ~PCRE2_CASELESS) : options, xoptions, cb, range[0], range[1]);
+
+    if (range[1] > 255) break;
+    range += 2;
+    }
+
+  if (cranges->char_lists_size > 0)
+    {
+    /* The cranges structure is still used and freed later. */
+    PCRE2_ASSERT((xclass_props & XCLASS_HIGH_ANY) == 0);
+    xclass_props |= XCLASS_REQUIRED | XCLASS_HAS_CHAR_LISTS;
+    }
+  else
+    {
+    if ((xclass_props & XCLASS_HIGH_ANY) != 0)
+      {
+      PCRE2_ASSERT(range + 2 == end && range[0] <= 256 &&
+        range[1] >= GET_MAX_CHAR_VALUE(utf));
+      should_flip_negation = TRUE;
+      range = end;
+      }
+
+    while (range < end)
+      {
+      uint32_t range_start = range[0];
+      uint32_t range_end = range[1];
+
+      range += 2;
+      xclass_props |= XCLASS_REQUIRED;
+
+      if (range_start < 256) range_start = 256;
+
+      if (lengthptr != NULL)
+        {
+#ifdef SUPPORT_UNICODE
+        if (utf)
+          {
+          *lengthptr += 1;
+
+          if (range_start < range_end)
+            *lengthptr += PRIV(ord2utf)(range_start, class_uchardata);
+
+          *lengthptr += PRIV(ord2utf)(range_end, class_uchardata);
+          continue;
+          }
+#endif  /* SUPPORT_UNICODE */
+
+        *lengthptr += range_start < range_end ? 3 : 2;
+        continue;
+        }
+
+#ifdef SUPPORT_UNICODE
+      if (utf)
+        {
+        if (range_start < range_end)
+          {
+          *class_uchardata++ = XCL_RANGE;
+          class_uchardata += PRIV(ord2utf)(range_start, class_uchardata);
+          }
+        else
+          *class_uchardata++ = XCL_SINGLE;
+
+        class_uchardata += PRIV(ord2utf)(range_end, class_uchardata);
+        continue;
+        }
+#endif  /* SUPPORT_UNICODE */
+
+      /* Without UTF support, character values are constrained
+      by the bit length, and can only be > 256 for 16-bit and
+      32-bit libraries. */
+#if PCRE2_CODE_UNIT_WIDTH != 8
+      if (range_start < range_end)
+        {
+        *class_uchardata++ = XCL_RANGE;
+        *class_uchardata++ = range_start;
+        }
+      else
+        *class_uchardata++ = XCL_SINGLE;
+
+      *class_uchardata++ = range_end;
+#endif  /* PCRE2_CODE_UNIT_WIDTH == 8 */
+      }
+
+    if (lengthptr == NULL)
+      cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
+    }
+  }
+#endif /* SUPPORT_WIDE_CHARS */
+
+/* If there are characters with values > 255, or Unicode property settings
+(\p or \P), we have to compile an extended class, with its own opcode,
+unless there were no property settings and there was a negated special such
+as \S in the class, and PCRE2_UCP is not set, because in that case all
+characters > 255 are in or not in the class, so any that were explicitly
+given as well can be ignored.
+
+In the UCP case, if certain negated POSIX classes (ex: [:^ascii:]) were
+were present in a class, we either have to match or not match all wide
+characters (depending on whether the whole class is or is not negated).
+This requirement is indicated by match_all_or_no_wide_chars being true.
+We do this by including an explicit range, which works in both cases.
+This applies only in UTF and 16-bit and 32-bit non-UTF modes, since there
+cannot be any wide characters in 8-bit non-UTF mode.
+
+When there *are* properties in a positive UTF-8 or any 16-bit or 32_bit
+class where \S etc is present without PCRE2_UCP, causing an extended class
+to be compiled, we make sure that all characters > 255 are included by
+forcing match_all_or_no_wide_chars to be true.
+
+If, when generating an xclass, there are no characters < 256, we can omit
+the bitmap in the actual compiled code. */
+
+#ifdef SUPPORT_WIDE_CHARS  /* Defined for 16/32 bits, or 8-bit with Unicode */
+if ((xclass_props & XCLASS_REQUIRED) != 0)
+  {
+  PCRE2_UCHAR *previous = code;
+
+  if ((xclass_props & XCLASS_HAS_CHAR_LISTS) == 0)
+    *class_uchardata++ = XCL_END;    /* Marks the end of extra data */
+  *code++ = OP_XCLASS;
+  code += LINK_SIZE;
+  *code = negate_class? XCL_NOT:0;
+  if ((xclass_props & XCLASS_HAS_PROPS) != 0) *code |= XCL_HASPROP;
+
+  /* If the map is required, move up the extra data to make room for it;
+  otherwise just move the code pointer to the end of the extra data. */
+
+  if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0 || has_bitmap != NULL)
+    {
+    if (negate_class)
+      {
+      uint32_t *classwords = cb->classbits.classwords;
+      for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
+      }
+
+    if (has_bitmap == NULL)
+      {
+      *code++ |= XCL_MAP;
+      (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
+        CU2BYTES(class_uchardata - code));
+      memcpy(code, classbits, 32);
+      code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
+      }
+    else
+      {
+      code = class_uchardata;
+      if ((xclass_props & XCLASS_HAS_8BIT_CHARS) != 0)
+        *has_bitmap = TRUE;
+      }
+    }
+  else code = class_uchardata;
+
+  if ((xclass_props & XCLASS_HAS_CHAR_LISTS) != 0)
+    {
+    /* Char lists size is an even number, because all items are 16 or 32
+    bit values. The character list data is always aligned to 32 bits. */
+    size_t char_lists_size = cranges->char_lists_size;
+    PCRE2_ASSERT((char_lists_size & 0x1) == 0 &&
+                 (cb->char_lists_size & 0x3) == 0);
+
+    if (lengthptr != NULL)
+      {
+      char_lists_size = CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      *lengthptr += 2 + LINK_SIZE;
+#else
+      *lengthptr += 1 + LINK_SIZE;
+#endif
+
+      cb->char_lists_size += char_lists_size;
+
+      char_lists_size /= sizeof(PCRE2_UCHAR);
+
+      /* Storage space for character lists is included
+      in the maximum pattern size. */
+      if (*lengthptr > MAX_PATTERN_SIZE ||
+          MAX_PATTERN_SIZE - *lengthptr < char_lists_size)
+        {
+        *errorcodeptr = ERR20;   /* Pattern is too large */
+        return NULL;
+        }
+      }
+    else
+      {
+      uint8_t *data;
+
+      PCRE2_ASSERT(cranges->char_lists_types <= XCL_TYPE_MASK);
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      /* Encode as high / low bytes. */
+      code[0] = (uint8_t)(XCL_LIST |
+        (cranges->char_lists_types >> 8));
+      code[1] = (uint8_t)cranges->char_lists_types;
+      code += 2;
+#else
+      *code++ = (PCRE2_UCHAR)(XCL_LIST | cranges->char_lists_types);
+#endif
+
+      /* Character lists are stored in backwards direction from
+      byte code start. The non-dfa/dfa matchers can access these
+      lists using the byte code start stored in match blocks.
+      Each list is aligned to 32 bit with an optional unused
+      16 bit value at the beginning of the character list. */
+
+      cb->char_lists_size += char_lists_size;
+      data = (uint8_t*)cb->start_code - cb->char_lists_size;
+
+      memcpy(data, (uint8_t*)(cranges + 1) + cranges->char_lists_start,
+        char_lists_size);
+
+      /* Since character lists total size is less than MAX_PATTERN_SIZE,
+      their starting offset fits into a value which size is LINK_SIZE. */
+
+      char_lists_size = cb->char_lists_size;
+      PUT(code, 0, (uint32_t)(char_lists_size >> 1));
+      code += LINK_SIZE;
+
+#if defined PCRE2_DEBUG || defined SUPPORT_VALGRIND
+      if ((char_lists_size & 0x2) != 0)
+        {
+        /* In debug the unused 16 bit value is set
+        to a fixed value and marked unused. */
+        ((uint16_t*)data)[-1] = 0x5555;
+#ifdef SUPPORT_VALGRIND
+        VALGRIND_MAKE_MEM_NOACCESS(data - 2, 2);
+#endif
+        }
+#endif
+
+      cb->char_lists_size =
+        CLIST_ALIGN_TO(char_lists_size, sizeof(uint32_t));
+
+      cb->cx->memctl.free(cranges, cb->cx->memctl.memory_data);
+      }
+    }
+
+  /* Now fill in the complete length of the item */
+
+  PUT(previous, 1, (int)(code - previous));
+  goto DONE;   /* End of class handling */
+  }
+#endif  /* SUPPORT_WIDE_CHARS */
+
+/* If there are no characters > 255, or they are all to be included or
+excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
+whole class was negated and whether there were negative specials such as \S
+(non-UCP) in the class. Then copy the 32-byte map into the code vector,
+negating it if necessary. */
+
+if (negate_class)
+  {
+  uint32_t *classwords = cb->classbits.classwords;
+
+  for (int i = 0; i < 8; i++) classwords[i] = ~classwords[i];
+  }
+
+if ((SELECT_VALUE8(!utf, 0) || negate_class != should_flip_negation) &&
+    cb->classbits.classwords[0] == ~(uint32_t)0)
+  {
+  const uint32_t *classwords = cb->classbits.classwords;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    if (classwords[i] != ~(uint32_t)0) break;
+
+  if (i == 8)
+    {
+    *code++ = OP_ALLANY;
+    goto DONE;   /* End of class handling */
+    }
+  }
+
+*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
+memcpy(code, classbits, 32);
+code += 32 / sizeof(PCRE2_UCHAR);
+
+DONE:
+*pcode = code;
+return pptr - 1;
+}
+
+
+
+/* ===================================================================*/
+/* Here follows a block of ECLASS-compiling functions. You may well want to
+read them from top to bottom; they are ordered from leafmost (at the top) to
+outermost parser (at the bottom of the file). */
+
+/* This function folds one operand using the negation operator.
+The new, combined chunk of stack code is written out to *pop_info. */
+
+static void
+fold_negation(eclass_op_info *pop_info, PCRE2_SIZE *lengthptr,
+  BOOL preserve_classbits)
+{
+/* If the chunk of stack code is already composed of multiple ops, we won't
+descend in and try and propagate the negation down the tree. (That would lead
+to O(n^2) compile-time, which could be exploitable with a malicious regex -
+although maybe that's not really too much of a worry in a library that offers
+an exponential-time matching function!) */
+
+if (pop_info->op_single_type == 0)
+  {
+  if (lengthptr != NULL)
+    *lengthptr += 1;
+  else
+    pop_info->code_start[pop_info->length] = ECL_NOT;
+  pop_info->length += 1;
+  }
+
+/* Otherwise, it's a nice single-op item, so we can easily fold in the negation
+without needing to produce an ECL_NOT. */
+
+else if (pop_info->op_single_type == ECL_ANY ||
+         pop_info->op_single_type == ECL_NONE)
+  {
+  pop_info->op_single_type = (pop_info->op_single_type == ECL_NONE)?
+      ECL_ANY : ECL_NONE;
+  if (lengthptr == NULL)
+    *(pop_info->code_start) = pop_info->op_single_type;
+  }
+else
+  {
+  PCRE2_ASSERT(pop_info->op_single_type == ECL_XCLASS &&
+               pop_info->length >= 1 + LINK_SIZE + 1);
+  if (lengthptr == NULL)
+    pop_info->code_start[1 + LINK_SIZE] ^= XCL_NOT;
+  }
+
+if (!preserve_classbits)
+  {
+  for (int i = 0; i < 8; i++)
+    pop_info->bits.classwords[i] = ~pop_info->bits.classwords[i];
+  }
+}
+
+
+
+/* This function folds together two operands using a binary operator.
+The new, combined chunk of stack code is written out to *lhs_op_info. */
+
+static void
+fold_binary(int op, eclass_op_info *lhs_op_info, eclass_op_info *rhs_op_info,
+  PCRE2_SIZE *lengthptr)
+{
+switch (op)
+  {
+  /* ECL_AND truth table:
+
+     LHS  RHS  RESULT
+     ----------------
+     ANY  *    RHS
+     *    ANY  LHS
+     NONE *    NONE
+     *    NONE NONE
+     X    Y    X & Y
+  */
+
+  case ECL_AND:
+  if (rhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* no-op: drop the RHS */
+    }
+  else if (lhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* no-op: drop the LHS, and memmove the RHS into its place */
+    if (lengthptr == NULL)
+      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
+              CU2BYTES(rhs_op_info->length));
+    lhs_op_info->length = rhs_op_info->length;
+    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
+    }
+  else if (rhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* the result is ECL_NONE: write into the LHS */
+    if (lengthptr == NULL)
+      lhs_op_info->code_start[0] = ECL_NONE;
+    lhs_op_info->length = 1;
+    lhs_op_info->op_single_type = ECL_NONE;
+    }
+  else if (lhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* the result is ECL_NONE: drop the RHS */
+    }
+  else
+    {
+    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
+    if (lengthptr != NULL)
+      *lengthptr += 1;
+    else
+      {
+      PCRE2_ASSERT(rhs_op_info->code_start ==
+          lhs_op_info->code_start + lhs_op_info->length);
+      rhs_op_info->code_start[rhs_op_info->length] = ECL_AND;
+      }
+    lhs_op_info->length += rhs_op_info->length + 1;
+    lhs_op_info->op_single_type = 0;
+    }
+
+  for (int i = 0; i < 8; i++)
+    lhs_op_info->bits.classwords[i] &= rhs_op_info->bits.classwords[i];
+  break;
+
+  /* ECL_OR truth table:
+
+     LHS  RHS  RESULT
+     ----------------
+     ANY  *    ANY
+     *    ANY  ANY
+     NONE *    RHS
+     *    NONE LHS
+     X    Y    X | Y
+  */
+
+  case ECL_OR:
+  if (rhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* no-op: drop the RHS */
+    }
+  else if (lhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* no-op: drop the LHS, and memmove the RHS into its place */
+    if (lengthptr == NULL)
+      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
+              CU2BYTES(rhs_op_info->length));
+    lhs_op_info->length = rhs_op_info->length;
+    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
+    }
+  else if (rhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* the result is ECL_ANY: write into the LHS */
+    if (lengthptr == NULL)
+      lhs_op_info->code_start[0] = ECL_ANY;
+    lhs_op_info->length = 1;
+    lhs_op_info->op_single_type = ECL_ANY;
+    }
+  else if (lhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* the result is ECL_ANY: drop the RHS */
+    }
+  else
+    {
+    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
+    if (lengthptr != NULL)
+      *lengthptr += 1;
+    else
+      {
+      PCRE2_ASSERT(rhs_op_info->code_start ==
+          lhs_op_info->code_start + lhs_op_info->length);
+      rhs_op_info->code_start[rhs_op_info->length] = ECL_OR;
+      }
+    lhs_op_info->length += rhs_op_info->length + 1;
+    lhs_op_info->op_single_type = 0;
+    }
+
+  for (int i = 0; i < 8; i++)
+    lhs_op_info->bits.classwords[i] |= rhs_op_info->bits.classwords[i];
+  break;
+
+  /* ECL_XOR truth table:
+
+     LHS  RHS  RESULT
+     ----------------
+     ANY  *    !RHS
+     *    ANY  !LHS
+     NONE *    RHS
+     *    NONE LHS
+     X    Y    X ^ Y
+  */
+
+  case ECL_XOR:
+  if (rhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* no-op: drop the RHS */
+    }
+  else if (lhs_op_info->op_single_type == ECL_NONE)
+    {
+    /* no-op: drop the LHS, and memmove the RHS into its place */
+    if (lengthptr == NULL)
+      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
+              CU2BYTES(rhs_op_info->length));
+    lhs_op_info->length = rhs_op_info->length;
+    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
+    }
+  else if (rhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* the result is !LHS: fold in the negation, and drop the RHS */
+    /* Preserve the classbits, because we promise to deal with them later. */
+    fold_negation(lhs_op_info, lengthptr, TRUE);
+    }
+  else if (lhs_op_info->op_single_type == ECL_ANY)
+    {
+    /* the result is !RHS: drop the LHS, memmove the RHS into its place, and
+    fold in the negation */
+    if (lengthptr == NULL)
+      memmove(lhs_op_info->code_start, rhs_op_info->code_start,
+              CU2BYTES(rhs_op_info->length));
+    lhs_op_info->length = rhs_op_info->length;
+    lhs_op_info->op_single_type = rhs_op_info->op_single_type;
+
+    /* Preserve the classbits, because we promise to deal with them later. */
+    fold_negation(lhs_op_info, lengthptr, TRUE);
+    }
+  else
+    {
+    /* Both of LHS & RHS are either ECL_XCLASS, or compound operations. */
+    if (lengthptr != NULL)
+      *lengthptr += 1;
+    else
+      {
+      PCRE2_ASSERT(rhs_op_info->code_start ==
+          lhs_op_info->code_start + lhs_op_info->length);
+      rhs_op_info->code_start[rhs_op_info->length] = ECL_XOR;
+      }
+    lhs_op_info->length += rhs_op_info->length + 1;
+    lhs_op_info->op_single_type = 0;
+    }
+
+  for (int i = 0; i < 8; i++)
+    lhs_op_info->bits.classwords[i] ^= rhs_op_info->bits.classwords[i];
+  break;
+
+  default:
+  PCRE2_DEBUG_UNREACHABLE();
+  break;
+  }
+}
+
+
+
+static BOOL
+compile_eclass_nested(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode,
+  eclass_op_info *pop_info, PCRE2_SIZE *lengthptr);
+
+/* This function consumes a group of implicitly-unioned class elements.
+These can be characters, ranges, properties, or nested classes, as long
+as they are all joined by being placed adjacently. */
+
+static BOOL
+compile_class_operand(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
+  PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+uint32_t *prev_ptr;
+PCRE2_UCHAR *code = *pcode;
+PCRE2_UCHAR *code_start = code;
+PCRE2_SIZE prev_length = (lengthptr != NULL)? *lengthptr : 0;
+PCRE2_SIZE extra_length;
+uint32_t meta = META_CODE(*ptr);
+
+switch (meta)
+  {
+  case META_CLASS_EMPTY_NOT:
+  case META_CLASS_EMPTY:
+  ++ptr;
+  pop_info->length = 1;
+  if ((meta == META_CLASS_EMPTY) == negated)
+    {
+    *code++ = pop_info->op_single_type = ECL_ANY;
+    memset(pop_info->bits.classbits, 0xff, 32);
+    }
+  else
+    {
+    *code++ = pop_info->op_single_type = ECL_NONE;
+    memset(pop_info->bits.classbits, 0, 32);
+    }
+  break;
+
+  case META_CLASS:
+  case META_CLASS_NOT:
+  if ((*ptr & CLASS_IS_ECLASS) != 0)
+    {
+    if (!compile_eclass_nested(context, negated, &ptr, &code,
+                               pop_info, lengthptr))
+      return FALSE;
+
+    PCRE2_ASSERT(*ptr == META_CLASS_END);
+    ptr++;
+    goto DONE;
+    }
+
+  ptr++;
+  /* Fall through */
+
+  default:
+  /* Scan forward characters, ranges, and properties.
+  For example: inside [a-z_ -- m] we don't have brackets around "a-z_" but
+  we still need to collect that fragment up into a "leaf" OP_CLASS. */
+
+  prev_ptr = ptr;
+  ptr = PRIV(compile_class_not_nested)(
+    context->options, context->xoptions, ptr, &code,
+    (meta != META_CLASS_NOT) == negated, &context->needs_bitmap,
+    context->errorcodeptr, context->cb, lengthptr);
+  if (ptr == NULL) return FALSE;
+
+  /* We must have a 100% guarantee that ptr increases when
+  compile_class_operand() returns, even on Release builds, so that we can
+  statically prove our loops terminate. */
+  if (ptr <= prev_ptr)
+    {
+    PCRE2_DEBUG_UNREACHABLE();
+    return FALSE;
+    }
+
+  /* If we fell through above, consume the closing ']'. */
+  if (meta == META_CLASS || meta == META_CLASS_NOT)
+    {
+    PCRE2_ASSERT(*ptr == META_CLASS_END);
+    ptr++;
+    }
+
+  /* Regardless of whether (lengthptr == NULL), some data will still be written
+  out to *pcode, which we need: we have to peek at it, to transform the opcode
+  into the ECLASS version (since we need to hoist up the bitmaps). */
+  PCRE2_ASSERT(code > code_start);
+  extra_length = (lengthptr != NULL)? *lengthptr - prev_length : 0;
+
+  /* Easiest case: convert OP_ALLANY to ECL_ANY */
+
+  if (*code_start == OP_ALLANY)
+    {
+    PCRE2_ASSERT(code - code_start == 1 && extra_length == 0);
+    pop_info->length = 1;
+    *code_start = pop_info->op_single_type = ECL_ANY;
+    memset(pop_info->bits.classbits, 0xff, 32);
+    }
+
+  /* For OP_CLASS and OP_NCLASS, we hoist out the bitmap and convert to
+  ECL_NONE / ECL_ANY respectively. */
+
+  else if (*code_start == OP_CLASS || *code_start == OP_NCLASS)
+    {
+    PCRE2_ASSERT(code - code_start == 1 + 32 / sizeof(PCRE2_UCHAR) &&
+                 extra_length == 0);
+    pop_info->length = 1;
+    *code_start = pop_info->op_single_type =
+        (*code_start == OP_CLASS)? ECL_NONE : ECL_ANY;
+    memcpy(pop_info->bits.classbits, code_start + 1, 32);
+    /* Rewind the code pointer, but make sure we adjust *lengthptr, because we
+    do need to reserve that space (even though we only use it temporarily). */
+    if (lengthptr != NULL)
+      *lengthptr += code - (code_start + 1);
+    code = code_start + 1;
+
+    if (!context->needs_bitmap && *code_start == ECL_NONE)
+      {
+      uint32_t *classwords = pop_info->bits.classwords;
+
+      for (int i = 0; i < 8; i++)
+        if (classwords[i] != 0)
+          {
+          context->needs_bitmap = TRUE;
+          break;
+          }
+      }
+    else
+      context->needs_bitmap = TRUE;
+    }
+
+  /* Finally, for OP_XCLASS we hoist out the bitmap (if any), and convert to
+  ECL_XCLASS. */
+
+  else
+    {
+    PCRE2_ASSERT(*code_start == OP_XCLASS);
+    *code_start = pop_info->op_single_type = ECL_XCLASS;
+
+    PCRE2_ASSERT(code - code_start >= 1 + LINK_SIZE + 1);
+
+    memcpy(pop_info->bits.classbits, context->cb->classbits.classbits, 32);
+    pop_info->length = (code - code_start) + extra_length;
+    }
+
+  break;
+  }  /* End of switch(meta) */
+
+pop_info->code_start = (lengthptr == NULL)? code_start : NULL;
+
+if (lengthptr != NULL)
+  {
+  *lengthptr += code - code_start;
+  code = code_start;
+  }
+
+DONE:
+PCRE2_ASSERT(lengthptr == NULL || (code == code_start));
+
+*pptr = ptr;
+*pcode = code;
+return TRUE;
+}
+
+
+
+/* This function consumes a group of implicitly-unioned class elements.
+These can be characters, ranges, properties, or nested classes, as long
+as they are all joined by being placed adjacently. */
+
+static BOOL
+compile_class_juxtaposition(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
+  PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+PCRE2_UCHAR *code = *pcode;
+#ifdef PCRE2_DEBUG
+PCRE2_UCHAR *start_code = *pcode;
+#endif
+
+/* See compile_class_binary_loose() for comments on compile-time folding of
+the "negated" flag. */
+
+/* Because it's a non-empty class, there must be an operand at the start. */
+if (!compile_class_operand(context, negated, &ptr, &code, pop_info, lengthptr))
+  return FALSE;
+
+while (*ptr != META_CLASS_END &&
+       !(*ptr >= META_ECLASS_AND && *ptr <= META_ECLASS_NOT))
+  {
+  uint32_t op;
+  BOOL rhs_negated;
+  eclass_op_info rhs_op_info;
+
+  if (negated)
+    {
+    /* !(A juxtapose B)  ->  !A && !B */
+    op = ECL_AND;
+    rhs_negated = TRUE;
+    }
+  else
+    {
+    /* A juxtapose B  ->  A || B */
+    op = ECL_OR;
+    rhs_negated = FALSE;
+    }
+
+  /* An operand must follow the operator. */
+  if (!compile_class_operand(context, rhs_negated, &ptr, &code,
+                             &rhs_op_info, lengthptr))
+    return FALSE;
+
+  /* Convert infix to postfix (RPN). */
+  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
+  if (lengthptr == NULL)
+    code = pop_info->code_start + pop_info->length;
+  }
+
+PCRE2_ASSERT(lengthptr == NULL || code == start_code);
+
+*pptr = ptr;
+*pcode = code;
+return TRUE;
+}
+
+
+
+/* This function consumes unary prefix operators. */
+
+static BOOL
+compile_class_unary(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
+  PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+#ifdef PCRE2_DEBUG
+PCRE2_UCHAR *start_code = *pcode;
+#endif
+
+while (*ptr == META_ECLASS_NOT)
+  {
+  ++ptr;
+  negated = !negated;
+  }
+
+*pptr = ptr;
+/* Because it's a non-empty class, there must be an operand. */
+if (!compile_class_juxtaposition(context, negated, pptr, pcode,
+                                 pop_info, lengthptr))
+  return FALSE;
+
+PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
+return TRUE;
+}
+
+
+
+/* This function consumes tightly-binding binary operators. */
+
+static BOOL
+compile_class_binary_tight(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
+  PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+PCRE2_UCHAR *code = *pcode;
+#ifdef PCRE2_DEBUG
+PCRE2_UCHAR *start_code = *pcode;
+#endif
+
+/* See compile_class_binary_loose() for comments on compile-time folding of
+the "negated" flag. */
+
+/* Because it's a non-empty class, there must be an operand at the start. */
+if (!compile_class_unary(context, negated, &ptr, &code, pop_info, lengthptr))
+  return FALSE;
+
+while (*ptr == META_ECLASS_AND)
+  {
+  uint32_t op;
+  BOOL rhs_negated;
+  eclass_op_info rhs_op_info;
+
+  if (negated)
+    {
+    /* !(A && B)  ->  !A || !B */
+    op = ECL_OR;
+    rhs_negated = TRUE;
+    }
+  else
+    {
+    /* A && B  ->  A && B */
+    op = ECL_AND;
+    rhs_negated = FALSE;
+    }
+
+  ++ptr;
+
+  /* An operand must follow the operator. */
+  if (!compile_class_unary(context, rhs_negated, &ptr, &code,
+                           &rhs_op_info, lengthptr))
+    return FALSE;
+
+  /* Convert infix to postfix (RPN). */
+  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
+  if (lengthptr == NULL)
+    code = pop_info->code_start + pop_info->length;
+  }
+
+PCRE2_ASSERT(lengthptr == NULL || code == start_code);
+
+*pptr = ptr;
+*pcode = code;
+return TRUE;
+}
+
+
+
+/* This function consumes loosely-binding binary operators. */
+
+static BOOL
+compile_class_binary_loose(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, eclass_op_info *pop_info,
+  PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+PCRE2_UCHAR *code = *pcode;
+#ifdef PCRE2_DEBUG
+PCRE2_UCHAR *start_code = *pcode;
+#endif
+
+/* We really want to fold the negation operator, if at all possible, so that
+simple cases can be reduced down. In particular, in 8-bit no-UTF mode, we want
+to produce a fully-folded expression, so that we can guarantee not to emit any
+OP_ECLASS codes (in the same way that we never emit OP_XCLASS in this mode).
+
+This has the consequence that with a little ingenuity, we can in fact avoid
+emitting (nearly...) all cases of the "NOT" operator. Imagine that we have:
+    !(A ...
+We have parsed the preceding "!", and we are about to parse the "A" operand. We
+don't know yet whether there will even be a following binary operand! Both of
+these are possibilities for what follows:
+    !(A && B)
+    !(A)
+However, we can still fold the "!" into the "A" operand, because no matter what
+the following binary operator will be, we can produce an expression which is
+equivalent. */
+
+/* Because it's a non-empty class, there must be an operand at the start. */
+if (!compile_class_binary_tight(context, negated, &ptr, &code,
+                                pop_info, lengthptr))
+  return FALSE;
+
+while (*ptr >= META_ECLASS_OR && *ptr <= META_ECLASS_XOR)
+  {
+  uint32_t op;
+  BOOL op_neg;
+  BOOL rhs_negated;
+  eclass_op_info rhs_op_info;
+
+  if (negated)
+    {
+    /* The whole expression is being negated; we respond by unconditionally
+    negating the LHS A, before seeing what follows. And hooray! We can recover,
+    no matter what follows. */
+    /* !(A || B)   ->  !A && !B                     */
+    /* !(A -- B)   ->  !(A && !B)    ->  !A || B    */
+    /* !(A XOR B)  ->  !(!A XOR !B)  ->  !A XNOR !B */
+    op = (*ptr == META_ECLASS_OR )? ECL_AND :
+         (*ptr == META_ECLASS_SUB)? ECL_OR  :
+         /*ptr == META_ECLASS_XOR*/ ECL_XOR;
+    op_neg = (*ptr == META_ECLASS_XOR);
+    rhs_negated = *ptr != META_ECLASS_SUB;
+    }
+  else
+    {
+    /* A || B   ->  A || B  */
+    /* A -- B   ->  A && !B */
+    /* A XOR B  ->  A XOR B */
+    op = (*ptr == META_ECLASS_OR )? ECL_OR  :
+         (*ptr == META_ECLASS_SUB)? ECL_AND :
+         /*ptr == META_ECLASS_XOR*/ ECL_XOR;
+    op_neg = FALSE;
+    rhs_negated = *ptr == META_ECLASS_SUB;
+    }
+
+  ++ptr;
+
+  /* An operand must follow the operator. */
+  if (!compile_class_binary_tight(context, rhs_negated, &ptr, &code,
+                                  &rhs_op_info, lengthptr))
+    return FALSE;
+
+  /* Convert infix to postfix (RPN). */
+  fold_binary(op, pop_info, &rhs_op_info, lengthptr);
+  if (op_neg) fold_negation(pop_info, lengthptr, FALSE);
+  if (lengthptr == NULL)
+    code = pop_info->code_start + pop_info->length;
+  }
+
+PCRE2_ASSERT(lengthptr == NULL || code == start_code);
+
+*pptr = ptr;
+*pcode = code;
+return TRUE;
+}
+
+
+
+/* This function converts the META codes in pptr into opcodes written to
+pcode. The pptr must start at a META_CLASS or META_CLASS_NOT.
+
+The class is compiled as a left-associative sequence of operator
+applications.
+
+The pptr will be left pointing at the matching META_CLASS_END. */
+
+static BOOL
+compile_eclass_nested(eclass_context *context, BOOL negated,
+  uint32_t **pptr, PCRE2_UCHAR **pcode,
+  eclass_op_info *pop_info, PCRE2_SIZE *lengthptr)
+{
+uint32_t *ptr = *pptr;
+#ifdef PCRE2_DEBUG
+PCRE2_UCHAR *start_code = *pcode;
+#endif
+
+/* The CLASS_IS_ECLASS bit must be set since it is a nested class. */
+PCRE2_ASSERT(*ptr == (META_CLASS | CLASS_IS_ECLASS) ||
+             *ptr == (META_CLASS_NOT | CLASS_IS_ECLASS));
+
+if (*ptr++ == (META_CLASS_NOT | CLASS_IS_ECLASS))
+  negated = !negated;
+
+(*pptr)++;
+
+/* Because it's a non-empty class, there must be an operand at the start. */
+if (!compile_class_binary_loose(context, negated, pptr, pcode,
+                                pop_info, lengthptr))
+  return FALSE;
+
+PCRE2_ASSERT(**pptr == META_CLASS_END);
+PCRE2_ASSERT(lengthptr == NULL || *pcode == start_code);
+return TRUE;
+}
+
+BOOL
+PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
+  uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
+  compile_block *cb, PCRE2_SIZE *lengthptr)
+{
+eclass_context context;
+eclass_op_info op_info;
+PCRE2_SIZE previous_length = (lengthptr != NULL)? *lengthptr : 0;
+PCRE2_UCHAR *code = *pcode;
+PCRE2_UCHAR *previous;
+BOOL allbitsone = TRUE;
+
+context.needs_bitmap = FALSE;
+context.options = options;
+context.xoptions = xoptions;
+context.errorcodeptr = errorcodeptr;
+context.cb = cb;
+
+previous = code;
+*code++ = OP_ECLASS;
+code += LINK_SIZE;
+*code++ = 0;  /* Flags, currently zero. */
+if (!compile_eclass_nested(&context, FALSE, pptr, &code, &op_info, lengthptr))
+  return FALSE;
+
+if (lengthptr != NULL)
+  {
+  *lengthptr += code - previous;
+  code = previous;
+  /* (*lengthptr - previous_length) now holds the amount of buffer that
+  we require to make the call to compile_class_nested() with
+  lengthptr = NULL, and including the (1+LINK_SIZE+1) that we write out
+  before that call. */
+  }
+
+/* Do some useful counting of what's in the bitmap. */
+for (int i = 0; i < 8; i++)
+  if (op_info.bits.classwords[i] != 0xffffffff)
+    {
+    allbitsone = FALSE;
+    break;
+    }
+
+/* After constant-folding the extended class syntax, it may turn out to be
+a simple class after all. In that case, we can unwrap it from the
+OP_ECLASS container - and in fact, we must do so, because in 8-bit
+no-Unicode mode the matcher is compiled without support for OP_ECLASS. */
+
+#ifndef SUPPORT_WIDE_CHARS
+PCRE2_ASSERT(op_info.op_single_type != 0);
+#else
+if (op_info.op_single_type != 0)
+#endif
+  {
+  /* Rewind back over the OP_ECLASS. */
+  code = previous;
+
+  /* If the bits are all ones, and the "high characters" are all matched
+  too, we use a special-cased encoding of OP_ALLANY. */
+
+  if (op_info.op_single_type == ECL_ANY && allbitsone)
+    {
+    /* Advancing code means rewinding lengthptr, at this point. */
+    if (lengthptr != NULL) *lengthptr -= 1;
+    *code++ = OP_ALLANY;
+    }
+
+  /* If the high bits are all matched / all not-matched, then we emit an
+  OP_NCLASS/OP_CLASS respectively. */
+
+  else if (op_info.op_single_type == ECL_ANY ||
+           op_info.op_single_type == ECL_NONE)
+    {
+    PCRE2_SIZE required_len = 1 + (32 / sizeof(PCRE2_UCHAR));
+
+    if (lengthptr != NULL)
+      {
+      if (required_len > (*lengthptr - previous_length))
+      *lengthptr = previous_length + required_len;
+      }
+
+    /* Advancing code means rewinding lengthptr, at this point. */
+    if (lengthptr != NULL) *lengthptr -= required_len;
+    *code++ = (op_info.op_single_type == ECL_ANY)? OP_NCLASS : OP_CLASS;
+    memcpy(code, op_info.bits.classbits, 32);
+    code += 32 / sizeof(PCRE2_UCHAR);
+    }
+
+  /* Otherwise, we have an ECL_XCLASS, so we have the OP_XCLASS data
+  there, but, we pulled out its bitmap into op_info, so now we have to
+  put that back into the OP_XCLASS. */
+
+  else
+    {
+#ifndef SUPPORT_WIDE_CHARS
+    PCRE2_DEBUG_UNREACHABLE();
+#else
+    BOOL need_map = context.needs_bitmap;
+    PCRE2_SIZE required_len;
+
+    PCRE2_ASSERT(op_info.op_single_type == ECL_XCLASS);
+    required_len = op_info.length + (need_map? 32/sizeof(PCRE2_UCHAR) : 0);
+
+    if (lengthptr != NULL)
+      {
+      /* Don't unconditionally request all the space we need - we may
+      already have asked for more during processing of the ECLASS. */
+      if (required_len > (*lengthptr - previous_length))
+        *lengthptr = previous_length + required_len;
+
+      /* The code we write out here won't be ignored, even during the
+      (lengthptr != NULL) phase, because if there's a following quantifier
+      it will peek backwards. So we do have to write out a (truncated)
+      OP_XCLASS, even on this branch. */
+      *lengthptr -= 1 + LINK_SIZE + 1;
+      *code++ = OP_XCLASS;
+      PUT(code, 0, 1 + LINK_SIZE + 1);
+      code += LINK_SIZE;
+      *code++ = 0;
+      }
+    else
+      {
+      PCRE2_UCHAR *rest;
+      PCRE2_SIZE rest_len;
+      PCRE2_UCHAR flags;
+
+      /* 1 unit: OP_XCLASS | LINK_SIZE units | 1 unit: flags | ...rest */
+      PCRE2_ASSERT(op_info.length >= 1 + LINK_SIZE + 1);
+      rest = op_info.code_start + 1 + LINK_SIZE + 1;
+      rest_len = (op_info.code_start + op_info.length) - rest;
+
+      /* First read any data we use, before memmove splats it. */
+      flags = op_info.code_start[1 + LINK_SIZE];
+      PCRE2_ASSERT((flags & XCL_MAP) == 0);
+
+      /* Next do the memmove before any writes. */
+      memmove(code + 1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0),
+              rest, CU2BYTES(rest_len));
+
+      /* Finally write the header data. */
+      *code++ = OP_XCLASS;
+      PUT(code, 0, (int)required_len);
+      code += LINK_SIZE;
+      *code++ = flags | (need_map? XCL_MAP : 0);
+      if (need_map)
+        {
+        memcpy(code, op_info.bits.classbits, 32);
+        code += 32 / sizeof(PCRE2_UCHAR);
+        }
+      code += rest_len;
+      }
+#endif /* SUPPORT_WIDE_CHARS */
+    }
+  }
+
+/* Otherwise, we're going to keep the OP_ECLASS. However, again we need
+to do some adjustment to insert the bitmap if we have one. */
+
+#ifdef SUPPORT_WIDE_CHARS
+else
+  {
+  BOOL need_map = context.needs_bitmap;
+  PCRE2_SIZE required_len =
+    1 + LINK_SIZE + 1 + (need_map? 32/sizeof(PCRE2_UCHAR) : 0) + op_info.length;
+
+  if (lengthptr != NULL)
+    {
+    if (required_len > (*lengthptr - previous_length))
+      *lengthptr = previous_length + required_len;
+
+    /* As for the XCLASS branch above, we do have to write out a dummy
+    OP_ECLASS, because of the backwards peek by the quantifier code. Write
+    out a (truncated) OP_ECLASS, even on this branch. */
+    *lengthptr -= 1 + LINK_SIZE + 1;
+    *code++ = OP_ECLASS;
+    PUT(code, 0, 1 + LINK_SIZE + 1);
+    code += LINK_SIZE;
+    *code++ = 0;
+    }
+  else
+    {
+    if (need_map)
+      {
+      PCRE2_UCHAR *map_start = previous + 1 + LINK_SIZE + 1;
+      previous[1 + LINK_SIZE] |= ECL_MAP;
+      memmove(map_start + 32/sizeof(PCRE2_UCHAR), map_start,
+              CU2BYTES(code - map_start));
+      memcpy(map_start, op_info.bits.classbits, 32);
+      code += 32 / sizeof(PCRE2_UCHAR);
+      }
+    PUT(previous, 1, (int)(code - previous));
+    }
+  }
+#endif /* SUPPORT_WIDE_CHARS */
+
+*pcode = code;
+return TRUE;
+}
+
+/* End of pcre2_compile_class.c */

+ 2 - 2
thirdparty/pcre2/src/pcre2_config.c

@@ -224,8 +224,8 @@ switch (what)
   XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
   There are problems using an "obvious" approach like this:
 
-     XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
-     XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
+     XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE2_MINOR)
+     XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE2_DATE)
 
   because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
   of STRING(). The C standard states: "If (before argument substitution) any

+ 60 - 6
thirdparty/pcre2/src/pcre2_context.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -130,17 +130,19 @@ return gcontext;
 /* A default compile context is set up to save having to initialize at run time
 when no context is supplied to the compile function. */
 
-const pcre2_compile_context PRIV(default_compile_context) = {
+pcre2_compile_context PRIV(default_compile_context) = {
   { default_malloc, default_free, NULL },    /* Default memory handling */
   NULL,                                      /* Stack guard */
   NULL,                                      /* Stack guard data */
   PRIV(default_tables),                      /* Character tables */
   PCRE2_UNSET,                               /* Max pattern length */
+  PCRE2_UNSET,                               /* Max pattern compiled length */
   BSR_DEFAULT,                               /* Backslash R default */
   NEWLINE_DEFAULT,                           /* Newline convention */
   PARENS_NEST_LIMIT,                         /* As it says */
   0,                                         /* Extra options */
-  MAX_VARLOOKBEHIND                          /* As it says */
+  MAX_VARLOOKBEHIND,                         /* As it says */
+  PCRE2_OPTIMIZATION_ALL                     /* All optimizations enabled */
   };
 
 /* The create function copies the default into the new memory, but must
@@ -162,7 +164,7 @@ return ccontext;
 /* A default match context is set up to save having to initialize at run time
 when no context is supplied to a match function. */
 
-const pcre2_match_context PRIV(default_match_context) = {
+pcre2_match_context PRIV(default_match_context) = {
   { default_malloc, default_free, NULL },
 #ifdef SUPPORT_JIT
   NULL,          /* JIT callback */
@@ -172,6 +174,8 @@ const pcre2_match_context PRIV(default_match_context) = {
   NULL,          /* Callout data */
   NULL,          /* Substitute callout function */
   NULL,          /* Substitute callout data */
+  NULL,          /* Substitute case callout function */
+  NULL,          /* Substitute case callout data */
   PCRE2_UNSET,   /* Offset limit */
   HEAP_LIMIT,
   MATCH_LIMIT,
@@ -196,7 +200,7 @@ return mcontext;
 /* A default convert context is set up to save having to initialize at run time
 when no context is supplied to the convert function. */
 
-const pcre2_convert_context PRIV(default_convert_context) = {
+pcre2_convert_context PRIV(default_convert_context) = {
   { default_malloc, default_free, NULL },    /* Default memory handling */
 #ifdef _WIN32
   CHAR_BACKSLASH,                            /* Default path separator */
@@ -352,6 +356,13 @@ ccontext->max_pattern_length = length;
 return 0;
 }
 
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_max_pattern_compiled_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
+{
+ccontext->max_pattern_compiled_length = length;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
 {
@@ -401,6 +412,38 @@ ccontext->stack_guard_data = user_data;
 return 0;
 }
 
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_optimize(pcre2_compile_context *ccontext, uint32_t directive)
+{
+if (ccontext == NULL)
+  return PCRE2_ERROR_NULL;
+
+switch (directive)
+  {
+  case PCRE2_OPTIMIZATION_NONE:
+  ccontext->optimization_flags = 0;
+  break;
+
+  case PCRE2_OPTIMIZATION_FULL:
+  ccontext->optimization_flags = PCRE2_OPTIMIZATION_ALL;
+  break;
+
+  default:
+  if (directive >= PCRE2_AUTO_POSSESS && directive <= PCRE2_START_OPTIMIZE_OFF)
+    {
+    /* Even directive numbers starting from 64 switch a bit on;
+     * Odd directive numbers starting from 65 switch a bit off */
+    if ((directive & 1) != 0)
+      ccontext->optimization_flags &= ~(1u << ((directive >> 1) - 32));
+    else
+      ccontext->optimization_flags |= 1u << ((directive >> 1) - 32);
+    return 0;
+    }
+  return PCRE2_ERROR_BADOPTION;
+  }
+
+return 0;
+}
 
 /* ------------ Match context ------------ */
 
@@ -416,13 +459,24 @@ return 0;
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_substitute_callout(pcre2_match_context *mcontext,
   int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
-    void *substitute_callout_data)
+  void *substitute_callout_data)
 {
 mcontext->substitute_callout = substitute_callout;
 mcontext->substitute_callout_data = substitute_callout_data;
 return 0;
 }
 
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_substitute_case_callout(pcre2_match_context *mcontext,
+  PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
+                                        PCRE2_SIZE, int, void *),
+  void *substitute_case_callout_data)
+{
+mcontext->substitute_case_callout = substitute_case_callout;
+mcontext->substitute_case_callout_data = substitute_case_callout_data;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
 {

+ 10 - 8
thirdparty/pcre2/src/pcre2_convert.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -74,7 +74,7 @@ enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
 
 #define PUTCHARS(string) \
   { \
-  for (s = (char *)(string); *s != 0; s++) \
+  for (const char *s = string; *s != 0; s++) \
     { \
     if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
     *p++ = *s; \
@@ -125,7 +125,6 @@ convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
   BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
   PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
 {
-char *s;
 PCRE2_SPTR posix = pattern;
 PCRE2_UCHAR *p = use_buffer;
 PCRE2_UCHAR *pp = p;
@@ -1065,7 +1064,7 @@ pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
   PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
   pcre2_convert_context *ccontext)
 {
-int i, rc;
+int rc;
 PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
 PCRE2_UCHAR *use_buffer = dummy_buffer;
 PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
@@ -1119,7 +1118,7 @@ if (buffptr != NULL && *buffptr != NULL)
 /* Call an individual converter, either just once (if a buffer was provided or
 just the length is needed), or twice (if a memory allocation is required). */
 
-for (i = 0; i < 2; i++)
+for (int i = 0; i < 2; i++)
   {
   PCRE2_UCHAR *allocated;
   BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
@@ -1138,8 +1137,7 @@ for (i = 0; i < 2; i++)
     break;
 
     default:
-    *bufflenptr = 0;  /* Error offset */
-    return PCRE2_ERROR_INTERNAL;
+    goto EXIT;
     }
 
   if (rc != 0 ||           /* Error */
@@ -1159,8 +1157,12 @@ for (i = 0; i < 2; i++)
   use_length = *bufflenptr + 1;
   }
 
-/* Control should never get here. */
+/* Something went terribly wrong. Trigger an assert and return an error */
+PCRE2_DEBUG_UNREACHABLE();
 
+EXIT:
+
+*bufflenptr = 0;  /* Error offset */
 return PCRE2_ERROR_INTERNAL;
 }
 

+ 49 - 58
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -156,6 +156,7 @@ static const uint8_t coptable[] = {
   0,                             /* CLASS                                  */
   0,                             /* NCLASS                                 */
   0,                             /* XCLASS - variable length               */
+  0,                             /* ECLASS - variable length               */
   0,                             /* REF                                    */
   0,                             /* REFI                                   */
   0,                             /* DNREF                                  */
@@ -175,6 +176,7 @@ static const uint8_t coptable[] = {
   0,                             /* Assert behind not                      */
   0,                             /* NA assert                              */
   0,                             /* NA assert behind                       */
+  0,                             /* Assert scan substring                  */
   0,                             /* ONCE                                   */
   0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
@@ -188,7 +190,7 @@ static const uint8_t coptable[] = {
   0, 0,                          /* COMMIT, COMMIT_ARG                     */
   0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
   0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
-  0, 0                           /* \B and \b in UCP mode                  */
+  0, 0,                          /* \B and \b in UCP mode                  */
 };
 
 /* This table identifies those opcodes that inspect a character. It is used to
@@ -234,6 +236,7 @@ static const uint8_t poptable[] = {
   1,                             /* CLASS                                  */
   1,                             /* NCLASS                                 */
   1,                             /* XCLASS - variable length               */
+  1,                             /* ECLASS - variable length               */
   0,                             /* REF                                    */
   0,                             /* REFI                                   */
   0,                             /* DNREF                                  */
@@ -253,6 +256,7 @@ static const uint8_t poptable[] = {
   0,                             /* Assert behind not                      */
   0,                             /* NA assert                              */
   0,                             /* NA assert behind                       */
+  0,                             /* Assert scan substring                  */
   0,                             /* ONCE                                   */
   0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
@@ -266,9 +270,13 @@ static const uint8_t poptable[] = {
   0, 0,                          /* COMMIT, COMMIT_ARG                     */
   0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
   0, 0, 0,                       /* CLOSE, SKIPZERO, DEFINE                */
-  1, 1                           /* \B and \b in UCP mode                  */
+  1, 1,                          /* \B and \b in UCP mode                  */
 };
 
+/* Compile-time check that these tables have the correct size. */
+STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable);
+STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable);
+
 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
 and \w */
 
@@ -695,7 +703,6 @@ for (;;)
   int i, j;
   int clen, dlen;
   uint32_t c, d;
-  int forced_fail = 0;
   BOOL partial_newline = FALSE;
   BOOL could_continue = reset_could_continue;
   reset_could_continue = FALSE;
@@ -841,19 +848,6 @@ for (;;)
 
     switch (codevalue)
       {
-/* ========================================================================== */
-      /* These cases are never obeyed. This is a fudge that causes a compile-
-      time error if the vectors coptable or poptable, which are indexed by
-      opcode, are not the correct length. It seems to be the only way to do
-      such a check at compile time, as the sizeof() operator does not work
-      in the C preprocessor. */
-
-      case OP_TABLE_LENGTH:
-      case OP_TABLE_LENGTH +
-        ((sizeof(coptable) == OP_TABLE_LENGTH) &&
-         (sizeof(poptable) == OP_TABLE_LENGTH)):
-      return 0;
-
 /* ========================================================================== */
       /* Reached a closing bracket. If not at the end of the pattern, carry
       on with the next opcode. For repeating opcodes, also add the repeat
@@ -1179,10 +1173,6 @@ for (;;)
         const ucd_record * prop = GET_UCD(c);
         switch(code[1])
           {
-          case PT_ANY:
-          OK = TRUE;
-          break;
-
           case PT_LAMP:
           chartype = prop->chartype;
           OK = chartype == ucp_Lu || chartype == ucp_Ll ||
@@ -1462,10 +1452,6 @@ for (;;)
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
-          case PT_ANY:
-          OK = TRUE;
-          break;
-
           case PT_LAMP:
           chartype = prop->chartype;
           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -1727,10 +1713,6 @@ for (;;)
         const ucd_record * prop = GET_UCD(c);
         switch(code[2])
           {
-          case PT_ANY:
-          OK = TRUE;
-          break;
-
           case PT_LAMP:
           chartype = prop->chartype;
           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -2017,10 +1999,6 @@ for (;;)
         const ucd_record * prop = GET_UCD(c);
         switch(code[1 + IMM2_SIZE + 1])
           {
-          case PT_ANY:
-          OK = TRUE;
-          break;
-
           case PT_LAMP:
           chartype = prop->chartype;
           OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -2663,35 +2641,54 @@ for (;;)
 
       case OP_CLASS:
       case OP_NCLASS:
+#ifdef SUPPORT_WIDE_CHARS
       case OP_XCLASS:
+      case OP_ECLASS:
+#endif
         {
         BOOL isinclass = FALSE;
         int next_state_offset;
         PCRE2_SPTR ecode;
 
+#ifdef SUPPORT_WIDE_CHARS
+        /* An extended class may have a table or a list of single characters,
+        ranges, or both, and it may be positive or negative. There's a
+        function that sorts all this out. */
+
+        if (codevalue == OP_XCLASS)
+         {
+         ecode = code + GET(code, 1);
+         if (clen > 0)
+           isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
+             (const uint8_t*)mb->start_code, utf);
+         }
+
+        /* A nested set-based class has internal opcodes for performing
+        set operations. */
+
+        else if (codevalue == OP_ECLASS)
+         {
+         ecode = code + GET(code, 1);
+         if (clen > 0)
+           isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
+             (const uint8_t*)mb->start_code, utf);
+         }
+
+        else
+#endif /* SUPPORT_WIDE_CHARS */
+
         /* For a simple class, there is always just a 32-byte table, and we
         can set isinclass from it. */
 
-        if (codevalue != OP_XCLASS)
           {
           ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
           if (clen > 0)
             {
             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
-              ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
+              ((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
             }
           }
 
-        /* An extended class may have a table or a list of single characters,
-        ranges, or both, and it may be positive or negative. There's a
-        function that sorts all this out. */
-
-        else
-         {
-         ecode = code + GET(code, 1);
-         if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
-         }
-
         /* At this point, isinclass is set for all kinds of class, and ecode
         points to the byte after the end of the class. If there is a
         quantifier, this is where it will be. */
@@ -2784,7 +2781,6 @@ for (;;)
       though the other "backtracking verbs" are not supported. */
 
       case OP_FAIL:
-      forced_fail++;    /* Count FAILs for multiple states */
       break;
 
       case OP_ASSERT:
@@ -3058,7 +3054,7 @@ for (;;)
         if (codevalue == OP_BRAPOSZERO)
           {
           allow_zero = TRUE;
-          codevalue = *(++code);  /* Codevalue will be one of above BRAs */
+          ++code;  /* The following opcode will be one of the above BRAs */
           }
         else allow_zero = FALSE;
 
@@ -3271,18 +3267,12 @@ for (;;)
   matches that we are going to find. If partial matching has been requested,
   check for appropriate conditions.
 
-  The "forced_ fail" variable counts the number of (*F) encountered for the
-  character. If it is equal to the original active_count (saved in
-  workspace[1]) it means that (*F) was found on every active state. In this
-  case we don't want to give a partial match.
-
   The "could_continue" variable is true if a state could have continued but
   for the fact that the end of the subject was reached. */
 
   if (new_count <= 0)
     {
     if (could_continue &&                            /* Some could go on, and */
-        forced_fail != workspace[1] &&               /* Not all forced fail & */
         (                                            /* either... */
         (mb->moptions & PCRE2_PARTIAL_HARD) != 0      /* Hard partial */
         ||                                           /* or... */
@@ -3438,7 +3428,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
 /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
 options variable for this function. Users of PCRE2 who are not calling the
 function directly would like to have a way of setting these flags, in the same
-way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
+way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
 constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
 (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
 transferred to the options for this function. The bits are guaranteed to be
@@ -3528,8 +3518,7 @@ if (mb->match_limit_depth > re->limit_depth)
 if (mb->heap_limit > re->limit_heap)
   mb->heap_limit = re->limit_heap;
 
-mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
-  re->name_count * re->name_entry_size;
+mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
 mb->tables = re->tables;
 mb->start_subject = subject;
 mb->end_subject = end_subject;
@@ -3576,7 +3565,9 @@ switch(re->newline_convention)
   mb->nltype = NLTYPE_ANYCRLF;
   break;
 
-  default: return PCRE2_ERROR_INTERNAL;
+  default:
+  PCRE2_DEBUG_UNREACHABLE();
+  return PCRE2_ERROR_INTERNAL;
   }
 
 /* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
@@ -3705,7 +3696,7 @@ for (;;)
   these, for testing and for ensuring that all callouts do actually occur.
   The optimizations must also be avoided when restarting a DFA match. */
 
-  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
+  if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
       (options & PCRE2_DFA_RESTART) == 0)
     {
     /* If firstline is TRUE, the start of the match is constrained to the first

+ 28 - 5
thirdparty/pcre2/src/pcre2_error.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -96,7 +96,7 @@ static const unsigned char compile_error_texts[] =
   "length of lookbehind assertion is not limited\0"
   "a relative value of zero is not allowed\0"
   "conditional subpattern contains more than two branches\0"
-  "assertion expected after (?( or (?(?C)\0"
+  "atomic assertion expected after (?( or (?(?C)\0"
   "digit expected after (?+ or (?-\0"
   /* 30 */
   "unknown POSIX class name\0"
@@ -161,7 +161,7 @@ static const unsigned char compile_error_texts[] =
   "using UCP is disabled by the application\0"
   "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
   "character code point value in \\u.... sequence is too large\0"
-  "digits missing in \\x{} or \\o{} or \\N{U+}\0"
+  "digits missing after \\x or in \\x{} or \\o{} or \\N{U+}\0"
   "syntax error or number too big in (?(VERSION condition\0"
   /* 80 */
   "internal error: unknown opcode in auto_possessify()\0"
@@ -185,10 +185,29 @@ static const unsigned char compile_error_texts[] =
   "(*alpha_assertion) not recognized\0"
   "script runs require Unicode support, which this version of PCRE2 does not have\0"
   "too many capturing groups (maximum 65535)\0"
-  "atomic assertion expected after (?( or (?(?C)\0"
+  "octal digit missing after \\0 (PCRE2_EXTRA_NO_BS0 is set)\0"
   "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0"
   /* 100 */
   "branch too long in variable-length lookbehind assertion\0"
+  "compiled pattern would be longer than the limit set by the application\0"
+  "octal value given by \\ddd is greater than \\377 (forbidden by PCRE2_EXTRA_PYTHON_OCTAL)\0"
+  "using callouts is disabled by the application\0"
+  "PCRE2_EXTRA_TURKISH_CASING require Unicode (UTF or UCP) mode\0"
+  /* 105 */
+  "PCRE2_EXTRA_TURKISH_CASING requires UTF in 8-bit mode\0"
+  "PCRE2_EXTRA_TURKISH_CASING and PCRE2_EXTRA_CASELESS_RESTRICT are not compatible\0"
+  "extended character class nesting is too deep\0"
+  "invalid operator in extended character class\0"
+  "unexpected operator in extended character class (no preceding operand)\0"
+  /* 110 */
+  "expected operand after operator in extended character class\0"
+  "square brackets needed to clarify operator precedence in extended character class\0"
+  "missing terminating ] for extended character class (note '[' must be escaped under PCRE2_ALT_EXTENDED_CLASS)\0"
+  "unexpected expression in extended character class (no preceding operator)\0"
+  "empty expression in extended character class\0"
+  /* 115 */
+  "terminating ] with no following closing parenthesis in (?[...]\0"
+  "unexpected character in (?[...]) extended character class\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */
@@ -275,6 +294,10 @@ static const unsigned char match_error_texts[] =
   "internal error - duplicate substitution match\0"
   "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
   "INTERNAL ERROR: invalid substring offset\0"
+  "feature is not supported by the JIT compiler\0"
+  "error performing replacement case transformation\0"
+  /* 70 */
+  "replacement too large (longer than PCRE2_SIZE)\0"
   ;
 
 
@@ -317,7 +340,7 @@ else if (enumber < 0)               /* Match or UTF error */
   }
 else                                /* Invalid error number */
   {
-  message = (unsigned char *)"\0";  /* Empty message list */
+  message = (const unsigned char *)"\0";  /* Empty message list */
   n = 1;
   }
 

+ 22 - 8
thirdparty/pcre2/src/pcre2_extuni.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* This module contains an internal function that is used to match a Unicode
 extended grapheme sequence. It is used by both pcre2_match() and
-pcre2_def_match(). However, it is called only when Unicode support is being
+pcre2_dfa_match(). However, it is called only when Unicode support is being
 compiled. Nevertheless, we provide a dummy function when there is no Unicode
 support, because some compilers do not like functionless source files. */
 
@@ -75,7 +75,11 @@ return NULL;
 *      Match an extended grapheme sequence       *
 *************************************************/
 
-/*
+/* NOTE: The logic contained in this function is replicated in three special-
+purpose functions in the pcre2_jit_compile.c module. If the logic below is
+changed, they must be kept in step so that the interpreter and the JIT have the
+same behaviour.
+
 Arguments:
   c              the first character
   eptr           pointer to next character
@@ -92,6 +96,7 @@ PCRE2_SPTR
 PRIV(extuni)(uint32_t c, PCRE2_SPTR eptr, PCRE2_SPTR start_subject,
   PCRE2_SPTR end_subject, BOOL utf, int *xcount)
 {
+BOOL was_ep_ZWJ = FALSE;
 int lgb = UCD_GRAPHBREAK(c);
 
 while (eptr < end_subject)
@@ -102,6 +107,12 @@ while (eptr < end_subject)
   rgb = UCD_GRAPHBREAK(c);
   if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
 
+  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+  preceded by Extended Pictographic. */
+
+  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+    break;
+
   /* Not breaking between Regional Indicators is allowed only if there
   are an even number of preceding RIs. */
 
@@ -129,12 +140,15 @@ while (eptr < end_subject)
     if ((ricount & 1) != 0) break;  /* Grapheme break required */
     }
 
-  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
-  allows any number of them before a following Extended_Pictographic. */
+  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+  between; see next statement). */
+
+  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+  any number of them before a following ZWJ. */
 
-  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
-       lgb != ucp_gbExtended_Pictographic)
-    lgb = rgb;
+  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic) lgb = rgb;
 
   eptr += len;
   if (xcount != NULL) *xcount += 1;

+ 8 - 7
thirdparty/pcre2/src/pcre2_find_bracket.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -76,18 +76,19 @@ for (;;)
   if (c == OP_END) return NULL;
 
   /* XCLASS is used for classes that cannot be represented just by a bit map.
-  This includes negated single high-valued characters. CALLOUT_STR is used for
-  callouts with string arguments. In both cases the length in the table is
+  This includes negated single high-valued characters. ECLASS is used for
+  classes that use set operations internally. CALLOUT_STR is used for
+  callouts with string arguments. In each case the length in the table is
   zero; the actual length is stored in the compiled code. */
 
-  if (c == OP_XCLASS) code += GET(code, 1);
-    else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
+  if (c == OP_XCLASS || c == OP_ECLASS) code += GET(code, 1);
+  else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
 
   /* Handle lookbehind */
 
   else if (c == OP_REVERSE || c == OP_VREVERSE)
     {
-    if (number < 0) return (PCRE2_UCHAR *)code;
+    if (number < 0) return code;
     code += PRIV(OP_lengths)[c];
     }
 
@@ -97,7 +98,7 @@ for (;;)
            c == OP_CBRAPOS || c == OP_SCBRAPOS)
     {
     int n = (int)GET2(code, 1+LINK_SIZE);
-    if (n == number) return (PCRE2_UCHAR *)code;
+    if (n == number) return code;
     code += PRIV(OP_lengths)[c];
     }
 

+ 258 - 107
thirdparty/pcre2/src/pcre2_internal.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -88,6 +88,12 @@ typedef int BOOL;
 #define TRUE    1
 #endif
 
+/* Helper macro for static (compile-time) assertions. Can be used inside
+functions, or at the top-level of a file. */
+#define STATIC_ASSERT_JOIN(a,b) a ## b
+#define STATIC_ASSERT(cond, msg) \
+  typedef int STATIC_ASSERT_JOIN(static_assertion_,msg)[(cond)?1:-1]
+
 /* Valgrind (memcheck) support */
 
 #ifdef SUPPORT_VALGRIND
@@ -523,29 +529,29 @@ start/end of string field names are. */
 three must not be changed, because whichever is set is actually the number of
 bytes in a code unit in that mode. */
 
-#define PCRE2_MODE8         0x00000001  /* compiled in 8 bit mode */
-#define PCRE2_MODE16        0x00000002  /* compiled in 16 bit mode */
-#define PCRE2_MODE32        0x00000004  /* compiled in 32 bit mode */
-#define PCRE2_FIRSTSET      0x00000010  /* first_code unit is set */
-#define PCRE2_FIRSTCASELESS 0x00000020  /* caseless first code unit */
-#define PCRE2_FIRSTMAPSET   0x00000040  /* bitmap of first code units is set */
-#define PCRE2_LASTSET       0x00000080  /* last code unit is set */
-#define PCRE2_LASTCASELESS  0x00000100  /* caseless last code unit */
-#define PCRE2_STARTLINE     0x00000200  /* start after \n for multiline */
-#define PCRE2_JCHANGED      0x00000400  /* j option used in pattern */
-#define PCRE2_HASCRORLF     0x00000800  /* explicit \r or \n in pattern */
-#define PCRE2_HASTHEN       0x00001000  /* pattern contains (*THEN) */
-#define PCRE2_MATCH_EMPTY   0x00002000  /* pattern can match empty string */
-#define PCRE2_BSR_SET       0x00004000  /* BSR was set in the pattern */
-#define PCRE2_NL_SET        0x00008000  /* newline was set in the pattern */
-#define PCRE2_NOTEMPTY_SET  0x00010000  /* (*NOTEMPTY) used        ) keep */
-#define PCRE2_NE_ATST_SET   0x00020000  /* (*NOTEMPTY_ATSTART) used) together */
-#define PCRE2_DEREF_TABLES  0x00040000  /* release character tables */
-#define PCRE2_NOJIT         0x00080000  /* (*NOJIT) used */
-#define PCRE2_HASBKPORX     0x00100000  /* contains \P, \p, or \X */
-#define PCRE2_DUPCAPUSED    0x00200000  /* contains (?| */
-#define PCRE2_HASBKC        0x00400000  /* contains \C */
-#define PCRE2_HASACCEPT     0x00800000  /* contains (*ACCEPT) */
+#define PCRE2_MODE8         0x00000001u /* compiled in 8 bit mode */
+#define PCRE2_MODE16        0x00000002u /* compiled in 16 bit mode */
+#define PCRE2_MODE32        0x00000004u /* compiled in 32 bit mode */
+#define PCRE2_FIRSTSET      0x00000010u /* first_code unit is set */
+#define PCRE2_FIRSTCASELESS 0x00000020u /* caseless first code unit */
+#define PCRE2_FIRSTMAPSET   0x00000040u /* bitmap of first code units is set */
+#define PCRE2_LASTSET       0x00000080u /* last code unit is set */
+#define PCRE2_LASTCASELESS  0x00000100u /* caseless last code unit */
+#define PCRE2_STARTLINE     0x00000200u /* start after \n for multiline */
+#define PCRE2_JCHANGED      0x00000400u /* j option used in pattern */
+#define PCRE2_HASCRORLF     0x00000800u /* explicit \r or \n in pattern */
+#define PCRE2_HASTHEN       0x00001000u /* pattern contains (*THEN) */
+#define PCRE2_MATCH_EMPTY   0x00002000u /* pattern can match empty string */
+#define PCRE2_BSR_SET       0x00004000u /* BSR was set in the pattern */
+#define PCRE2_NL_SET        0x00008000u /* newline was set in the pattern */
+#define PCRE2_NOTEMPTY_SET  0x00010000u /* (*NOTEMPTY) used        ) keep */
+#define PCRE2_NE_ATST_SET   0x00020000u /* (*NOTEMPTY_ATSTART) used) together */
+#define PCRE2_DEREF_TABLES  0x00040000u /* release character tables */
+#define PCRE2_NOJIT         0x00080000u /* (*NOJIT) used */
+#define PCRE2_HASBKPORX     0x00100000u /* contains \P, \p, or \X */
+#define PCRE2_DUPCAPUSED    0x00200000u /* contains (?| */
+#define PCRE2_HASBKC        0x00400000u /* contains \C */
+#define PCRE2_HASACCEPT     0x00800000u /* contains (*ACCEPT) */
 
 #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
 
@@ -574,6 +580,16 @@ modes. */
 #define REQ_CU_MAX       2000
 #endif
 
+/* The maximum nesting depth for Unicode character class sets.
+Currently fixed. Warning: the interpreter relies on this so it can encode
+the operand stack in a uint32_t. A nesting limit of 15 implies (15*2+1)=31
+stack operands required, due to the fact that we have two (and only two)
+levels of operator precedence. In the UTS#18 syntax, you can write 'x&&y[z]'
+and in Perl syntax you can write '(?[ x - y & (z) ])', both of which imply
+pushing the match results for x & y to the stack. */
+
+#define ECLASS_NEST_LIMIT  15
+
 /* Offsets for the bitmap tables in the cbits set of tables. Each table
 contains a set of bits for a class map. Some classes are built by combining
 these tables. */
@@ -609,6 +625,13 @@ total length of the tables. */
 #define ctypes_offset (cbits_offset + cbit_length)  /* Character types */
 #define TABLES_LENGTH (ctypes_offset + 256)
 
+/* Private flags used in compile_context.optimization_flags */
+
+#define PCRE2_OPTIM_AUTO_POSSESS    0x00000001u
+#define PCRE2_OPTIM_DOTSTAR_ANCHOR  0x00000002u
+#define PCRE2_OPTIM_START_OPTIMIZE  0x00000004u
+
+#define PCRE2_OPTIMIZATION_ALL      0x00000007u
 
 /* -------------------- Character and string names ------------------------ */
 
@@ -915,6 +938,7 @@ a positive value. */
 #define STRING_naplb0                "naplb\0"
 #define STRING_nla0                  "nla\0"
 #define STRING_nlb0                  "nlb\0"
+#define STRING_scs0                  "scs\0"
 #define STRING_sr0                   "sr\0"
 #define STRING_asr0                  "asr\0"
 #define STRING_positive_lookahead0   "positive_lookahead\0"
@@ -925,6 +949,7 @@ a positive value. */
 #define STRING_negative_lookbehind0  "negative_lookbehind\0"
 #define STRING_script_run0           "script_run\0"
 #define STRING_atomic_script_run     "atomic_script_run"
+#define STRING_scan_substring0       "scan_substring\0"
 
 #define STRING_alpha0                "alpha\0"
 #define STRING_lower0                "lower\0"
@@ -965,6 +990,8 @@ a positive value. */
 #define STRING_NO_START_OPT_RIGHTPAR      "NO_START_OPT)"
 #define STRING_NOTEMPTY_RIGHTPAR          "NOTEMPTY)"
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  "NOTEMPTY_ATSTART)"
+#define STRING_CASELESS_RESTRICT_RIGHTPAR "CASELESS_RESTRICT)"
+#define STRING_TURKISH_CASING_RIGHTPAR    "TURKISH_CASING)"
 #define STRING_LIMIT_HEAP_EQ              "LIMIT_HEAP="
 #define STRING_LIMIT_MATCH_EQ             "LIMIT_MATCH="
 #define STRING_LIMIT_DEPTH_EQ             "LIMIT_DEPTH="
@@ -1216,6 +1243,7 @@ only. */
 #define STRING_naplb0                STR_n STR_a STR_p STR_l STR_b "\0"
 #define STRING_nla0                  STR_n STR_l STR_a "\0"
 #define STRING_nlb0                  STR_n STR_l STR_b "\0"
+#define STRING_scs0                  STR_s STR_c STR_s "\0"
 #define STRING_sr0                   STR_s STR_r "\0"
 #define STRING_asr0                  STR_a STR_s STR_r "\0"
 #define STRING_positive_lookahead0   STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
@@ -1226,6 +1254,7 @@ only. */
 #define STRING_negative_lookbehind0  STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
 #define STRING_script_run0           STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
 #define STRING_atomic_script_run     STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
+#define STRING_scan_substring0       STR_s STR_c STR_a STR_n STR_UNDERSCORE STR_s STR_u STR_b STR_s STR_t STR_r STR_i STR_n STR_g "\0"
 
 #define STRING_alpha0                STR_a STR_l STR_p STR_h STR_a "\0"
 #define STRING_lower0                STR_l STR_o STR_w STR_e STR_r "\0"
@@ -1266,6 +1295,8 @@ only. */
 #define STRING_NO_START_OPT_RIGHTPAR      STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
 #define STRING_NOTEMPTY_RIGHTPAR          STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
 #define STRING_NOTEMPTY_ATSTART_RIGHTPAR  STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
+#define STRING_CASELESS_RESTRICT_RIGHTPAR STR_C STR_A STR_S STR_E STR_L STR_E STR_S STR_S STR_UNDERSCORE STR_R STR_E STR_S STR_T STR_R STR_I STR_C STR_T STR_RIGHT_PARENTHESIS
+#define STRING_TURKISH_CASING_RIGHTPAR    STR_T STR_U STR_R STR_K STR_I STR_S STR_H STR_UNDERSCORE STR_C STR_A STR_S STR_I STR_N STR_G STR_RIGHT_PARENTHESIS
 #define STRING_LIMIT_HEAP_EQ              STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
 #define STRING_LIMIT_MATCH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
 #define STRING_LIMIT_DEPTH_EQ             STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
@@ -1290,21 +1321,22 @@ only. */
 changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
 match. */
 
-#define PT_ANY        0    /* Any property - matches all chars */
-#define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
-#define PT_GC         2    /* Specified general characteristic (e.g. L) */
-#define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
-#define PT_SC         4    /* Script only (e.g. Han) */
-#define PT_SCX        5    /* Script extensions (includes SC) */
-#define PT_ALNUM      6    /* Alphanumeric - the union of L and N */
-#define PT_SPACE      7    /* Perl space - general category Z plus 9,10,12,13 */
-#define PT_PXSPACE    8    /* POSIX space - Z plus 9,10,11,12,13 */
-#define PT_WORD       9    /* Word - L, N, Mn, or Pc */
-#define PT_CLIST     10    /* Pseudo-property: match character list */
-#define PT_UCNC      11    /* Universal Character nameable character */
-#define PT_BIDICL    12    /* Specified bidi class */
-#define PT_BOOL      13    /* Boolean property */
-#define PT_TABSIZE   14    /* Size of square table for autopossessify tests */
+#define PT_LAMP       0    /* L& - the union of Lu, Ll, Lt */
+#define PT_GC         1    /* Specified general characteristic (e.g. L) */
+#define PT_PC         2    /* Specified particular characteristic (e.g. Lu) */
+#define PT_SC         3    /* Script only (e.g. Han) */
+#define PT_SCX        4    /* Script extensions (includes SC) */
+#define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
+#define PT_SPACE      6    /* Perl space - general category Z plus 9,10,12,13 */
+#define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
+#define PT_WORD       8    /* Word - L, N, Mn, or Pc */
+#define PT_CLIST      9    /* Pseudo-property: match character list */
+#define PT_UCNC      10    /* Universal Character nameable character */
+#define PT_BIDICL    11    /* Specified bidi class */
+#define PT_BOOL      12    /* Boolean property */
+#define PT_ANY       13    /* Must be the last entry!
+                              Any property - matches all chars */
+#define PT_TABSIZE PT_ANY  /* Size of square table for autopossessify tests */
 
 /* The following special properties are used only in XCLASS items, when POSIX
 classes are specified and PCRE2_UCP is set - in other words, for Unicode
@@ -1334,6 +1366,94 @@ contain characters with values greater than 255. */
 #define XCL_RANGE    2     /* A range (two multibyte chars) follows */
 #define XCL_PROP     3     /* Unicode property (2-byte property code follows) */
 #define XCL_NOTPROP  4     /* Unicode inverted property (ditto) */
+/* This value represents the beginning of character lists. The value
+is 16 bit long, and stored as a high and low byte pair in 8 bit mode.
+The lower 12 bit contains information about character lists (see later). */
+#define XCL_LIST     (sizeof(PCRE2_UCHAR) == 1 ? 0x10 : 0x1000)
+
+/* When a character class contains many characters/ranges,
+they are stored in character lists. There are four character
+lists which contain characters/ranges within a given range.
+
+The name, character range and item size for each list:
+Low16    [0x100 - 0x7fff]            16 bit items
+High16   [0x8000 - 0xffff]           16 bit items
+Low32    [0x10000 - 0x7fffffff]      32 bit items
+High32   [0x80000000 - 0xffffffff]   32 bit items
+
+The Low32 character list is used only when utf encoding or 32 bit
+character width is enabled, and the High32 character is used only
+when 32 bit character width is enabled.
+
+Each character list contain items. The lowest bit represents that
+an item is the beginning of a range (bit is cleared), or not (bit
+is set). The other bits represent the character shifted left by
+one, so its highest bit is discarded. Due to the layout of character
+lists, the highest bit of a character is always known:
+
+Low16 and Low32: the highest bit is always zero
+High16 and High32: the highest bit is always one
+
+The items are ordered in increasing order, so binary search can be
+used to find the lower bound of an input character. The lower bound
+is the highest item, which value is less or equal than the input
+character. If the lower bit of the item is cleard, or the character
+stored in the item equals to the input character, the input
+character is in the character list. */
+
+/* Character list constants. */
+#define XCL_CHAR_LIST_LOW_16_START 0x100
+#define XCL_CHAR_LIST_LOW_16_END 0x7fff
+#define XCL_CHAR_LIST_LOW_16_ADD 0x0
+
+#define XCL_CHAR_LIST_HIGH_16_START 0x8000
+#define XCL_CHAR_LIST_HIGH_16_END 0xffff
+#define XCL_CHAR_LIST_HIGH_16_ADD 0x8000
+
+#define XCL_CHAR_LIST_LOW_32_START 0x10000
+#define XCL_CHAR_LIST_LOW_32_END 0x7fffffff
+#define XCL_CHAR_LIST_LOW_32_ADD 0x0
+
+#define XCL_CHAR_LIST_HIGH_32_START 0x80000000
+#define XCL_CHAR_LIST_HIGH_32_END 0xffffffff
+#define XCL_CHAR_LIST_HIGH_32_ADD 0x80000000
+
+/* Mask for getting the descriptors of character list ranges.
+Each descriptor has XCL_TYPE_BIT_LEN bits, and can be processed
+by XCL_BEGIN_WITH_RANGE and XCL_ITEM_COUNT_MASK macros. */
+#define XCL_TYPE_MASK 0xfff
+#define XCL_TYPE_BIT_LEN 3
+/* If this bit is set, the first item of the character list is the
+end of a range, which started before the starting character of the
+character list. */
+#define XCL_BEGIN_WITH_RANGE 0x4
+/* Number of items in the character list: 0, 1, or 2. The value 3
+represents that the item count is stored at the begining of the
+character list. The item count has the same width as the items
+in the character list (e.g. 16 bit for Low16 and High16 lists). */
+#define XCL_ITEM_COUNT_MASK 0x3
+/* Shift and flag for constructing character list items. The XCL_CHAR_END
+is set, when the item is not the beginning of a range. The XCL_CHAR_SHIFT
+can be used to encode / decode the character value stored in an item. */
+#define XCL_CHAR_END 0x1
+#define XCL_CHAR_SHIFT 1
+
+/* Flag bits for an extended class (OP_ECLASS), which is used for complex
+character matches such as [\p{Greek} && \p{Ll}]. */
+
+#define ECL_MAP     0x01  /* Flag: a 32-byte map is present */
+
+/* Type tags for the items stored in an extended class (OP_ECLASS). These items
+follow the OP_ECLASS's flag char and bitmap, and represent a Reverse Polish
+Notation list of operands and operators manipulating a stack of bits. */
+
+#define ECL_AND     1 /* Pop two from the stack, AND, and push result. */
+#define ECL_OR      2 /* Pop two from the stack, OR, and push result. */
+#define ECL_XOR     3 /* Pop two from the stack, XOR, and push result. */
+#define ECL_NOT     4 /* Pop one from the stack, NOT, and push result. */
+#define ECL_XCLASS  5 /* XCLASS nested within ECLASS; match and push result. */
+#define ECL_ANY     6 /* Temporary, only used during compilation. */
+#define ECL_NONE    7 /* Temporary, only used during compilation. */
 
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
@@ -1555,102 +1675,105 @@ enum {
                               character > 255 is encountered. */
   OP_XCLASS,         /* 112 Extended class for handling > 255 chars within the
                               class. This does both positive and negative. */
-  OP_REF,            /* 113 Match a back reference, casefully */
-  OP_REFI,           /* 114 Match a back reference, caselessly */
-  OP_DNREF,          /* 115 Match a duplicate name backref, casefully */
-  OP_DNREFI,         /* 116 Match a duplicate name backref, caselessly */
-  OP_RECURSE,        /* 117 Match a numbered subpattern (possibly recursive) */
-  OP_CALLOUT,        /* 118 Call out to external function if provided */
-  OP_CALLOUT_STR,    /* 119 Call out with string argument */
-
-  OP_ALT,            /* 120 Start of alternation */
-  OP_KET,            /* 121 End of group that doesn't have an unbounded repeat */
-  OP_KETRMAX,        /* 122 These two must remain together and in this */
-  OP_KETRMIN,        /* 123 order. They are for groups the repeat for ever. */
-  OP_KETRPOS,        /* 124 Possessive unlimited repeat. */
+  OP_ECLASS,         /* 113 Really-extended class, for handling logical
+                              expressions computed over characters. */
+  OP_REF,            /* 114 Match a back reference, casefully */
+  OP_REFI,           /* 115 Match a back reference, caselessly */
+  OP_DNREF,          /* 116 Match a duplicate name backref, casefully */
+  OP_DNREFI,         /* 117 Match a duplicate name backref, caselessly */
+  OP_RECURSE,        /* 118 Match a numbered subpattern (possibly recursive) */
+  OP_CALLOUT,        /* 119 Call out to external function if provided */
+  OP_CALLOUT_STR,    /* 120 Call out with string argument */
+
+  OP_ALT,            /* 121 Start of alternation */
+  OP_KET,            /* 122 End of group that doesn't have an unbounded repeat */
+  OP_KETRMAX,        /* 123 These two must remain together and in this */
+  OP_KETRMIN,        /* 124 order. They are for groups the repeat for ever. */
+  OP_KETRPOS,        /* 125 Possessive unlimited repeat. */
 
   /* The assertions must come before BRA, CBRA, ONCE, and COND. */
 
-  OP_REVERSE,        /* 125 Move pointer back - used in lookbehind assertions */
-  OP_VREVERSE,       /* 126 Move pointer back - variable */
-  OP_ASSERT,         /* 127 Positive lookahead */
-  OP_ASSERT_NOT,     /* 128 Negative lookahead */
-  OP_ASSERTBACK,     /* 129 Positive lookbehind */
-  OP_ASSERTBACK_NOT, /* 130 Negative lookbehind */
-  OP_ASSERT_NA,      /* 131 Positive non-atomic lookahead */
-  OP_ASSERTBACK_NA,  /* 132 Positive non-atomic lookbehind */
+  OP_REVERSE,        /* 126 Move pointer back - used in lookbehind assertions */
+  OP_VREVERSE,       /* 127 Move pointer back - variable */
+  OP_ASSERT,         /* 128 Positive lookahead */
+  OP_ASSERT_NOT,     /* 129 Negative lookahead */
+  OP_ASSERTBACK,     /* 130 Positive lookbehind */
+  OP_ASSERTBACK_NOT, /* 131 Negative lookbehind */
+  OP_ASSERT_NA,      /* 132 Positive non-atomic lookahead */
+  OP_ASSERTBACK_NA,  /* 133 Positive non-atomic lookbehind */
+  OP_ASSERT_SCS,     /* 134 Scan substring */
 
   /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
   immediately after the assertions, with ONCE first, as there's a test for >=
   ONCE for a subpattern that isn't an assertion. The POS versions must
   immediately follow the non-POS versions in each case. */
 
-  OP_ONCE,           /* 133 Atomic group, contains captures */
-  OP_SCRIPT_RUN,     /* 134 Non-capture, but check characters' scripts */
-  OP_BRA,            /* 135 Start of non-capturing bracket */
-  OP_BRAPOS,         /* 136 Ditto, with unlimited, possessive repeat */
-  OP_CBRA,           /* 137 Start of capturing bracket */
-  OP_CBRAPOS,        /* 138 Ditto, with unlimited, possessive repeat */
-  OP_COND,           /* 139 Conditional group */
+  OP_ONCE,           /* 135 Atomic group, contains captures */
+  OP_SCRIPT_RUN,     /* 136 Non-capture, but check characters' scripts */
+  OP_BRA,            /* 137 Start of non-capturing bracket */
+  OP_BRAPOS,         /* 138 Ditto, with unlimited, possessive repeat */
+  OP_CBRA,           /* 139 Start of capturing bracket */
+  OP_CBRAPOS,        /* 140 Ditto, with unlimited, possessive repeat */
+  OP_COND,           /* 141 Conditional group */
 
   /* These five must follow the previous five, in the same order. There's a
   check for >= SBRA to distinguish the two sets. */
 
-  OP_SBRA,           /* 140 Start of non-capturing bracket, check empty  */
-  OP_SBRAPOS,        /* 141 Ditto, with unlimited, possessive repeat */
-  OP_SCBRA,          /* 142 Start of capturing bracket, check empty */
-  OP_SCBRAPOS,       /* 143 Ditto, with unlimited, possessive repeat */
-  OP_SCOND,          /* 144 Conditional group, check empty */
+  OP_SBRA,           /* 142 Start of non-capturing bracket, check empty  */
+  OP_SBRAPOS,        /* 143 Ditto, with unlimited, possessive repeat */
+  OP_SCBRA,          /* 144 Start of capturing bracket, check empty */
+  OP_SCBRAPOS,       /* 145 Ditto, with unlimited, possessive repeat */
+  OP_SCOND,          /* 146 Conditional group, check empty */
 
   /* The next two pairs must (respectively) be kept together. */
 
-  OP_CREF,           /* 145 Used to hold a capture number as condition */
-  OP_DNCREF,         /* 146 Used to point to duplicate names as a condition */
-  OP_RREF,           /* 147 Used to hold a recursion number as condition */
-  OP_DNRREF,         /* 148 Used to point to duplicate names as a condition */
-  OP_FALSE,          /* 149 Always false (used by DEFINE and VERSION) */
-  OP_TRUE,           /* 150 Always true (used by VERSION) */
+  OP_CREF,           /* 147 Used to hold a capture number as condition */
+  OP_DNCREF,         /* 148 Used to point to duplicate names as a condition */
+  OP_RREF,           /* 149 Used to hold a recursion number as condition */
+  OP_DNRREF,         /* 150 Used to point to duplicate names as a condition */
+  OP_FALSE,          /* 151 Always false (used by DEFINE and VERSION) */
+  OP_TRUE,           /* 152 Always true (used by VERSION) */
 
-  OP_BRAZERO,        /* 151 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 152 order. */
-  OP_BRAPOSZERO,     /* 153 */
+  OP_BRAZERO,        /* 153 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 154 order. */
+  OP_BRAPOSZERO,     /* 155 */
 
   /* These are backtracking control verbs */
 
-  OP_MARK,           /* 154 always has an argument */
-  OP_PRUNE,          /* 155 */
-  OP_PRUNE_ARG,      /* 156 same, but with argument */
-  OP_SKIP,           /* 157 */
-  OP_SKIP_ARG,       /* 158 same, but with argument */
-  OP_THEN,           /* 159 */
-  OP_THEN_ARG,       /* 160 same, but with argument */
-  OP_COMMIT,         /* 161 */
-  OP_COMMIT_ARG,     /* 162 same, but with argument */
+  OP_MARK,           /* 156 always has an argument */
+  OP_PRUNE,          /* 157 */
+  OP_PRUNE_ARG,      /* 158 same, but with argument */
+  OP_SKIP,           /* 159 */
+  OP_SKIP_ARG,       /* 160 same, but with argument */
+  OP_THEN,           /* 161 */
+  OP_THEN_ARG,       /* 162 same, but with argument */
+  OP_COMMIT,         /* 163 */
+  OP_COMMIT_ARG,     /* 164 same, but with argument */
 
   /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
   argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
   without the need for a special opcode. */
 
-  OP_FAIL,           /* 163 */
-  OP_ACCEPT,         /* 164 */
-  OP_ASSERT_ACCEPT,  /* 165 Used inside assertions */
-  OP_CLOSE,          /* 166 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 165 */
+  OP_ACCEPT,         /* 166 */
+  OP_ASSERT_ACCEPT,  /* 167 Used inside assertions */
+  OP_CLOSE,          /* 168 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO,       /* 167 */
+  OP_SKIPZERO,       /* 169 */
 
   /* This is used to identify a DEFINE group during compilation so that it can
   be checked for having only one branch. It is changed to OP_FALSE before
   compilation finishes. */
 
-  OP_DEFINE,         /* 168 */
+  OP_DEFINE,         /* 170 */
 
   /* These opcodes replace their normal counterparts in UCP mode when
   PCRE2_EXTRA_ASCII_BSW is not set. */
 
-  OP_NOT_UCP_WORD_BOUNDARY, /* 169 */
-  OP_UCP_WORD_BOUNDARY,     /* 170 */
+  OP_NOT_UCP_WORD_BOUNDARY, /* 171 */
+  OP_UCP_WORD_BOUNDARY,     /* 172 */
 
   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1693,19 +1816,21 @@ some cases doesn't actually use these names at all). */
   "*+","++", "?+", "{",                                           \
   "*", "*?", "+", "+?", "?", "??", "{", "{",                      \
   "*+","++", "?+", "{",                                           \
-  "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi",  \
+  "class", "nclass", "xclass", "eclass",                          \
+  "Ref", "Refi", "DnRef", "DnRefi",                               \
   "Recurse", "Callout", "CalloutStr",                             \
   "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
   "Reverse", "VReverse", "Assert", "Assert not",                  \
   "Assert back", "Assert back not",                               \
   "Non-atomic assert", "Non-atomic assert back",                  \
+  "Scan substring",                                               \
   "Once",                                                         \
   "Script run",                                                   \
   "Bra", "BraPos", "CBra", "CBraPos",                             \
   "Cond",                                                         \
   "SBra", "SBraPos", "SCBra", "SCBraPos",                         \
   "SCond",                                                        \
-  "Cond ref", "Cond dnref", "Cond rec", "Cond dnrec",             \
+  "Capture ref", "Capture dnref", "Cond rec", "Cond dnrec",       \
   "Cond false", "Cond true",                                      \
   "Brazero", "Braminzero", "Braposzero",                          \
   "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
@@ -1766,10 +1891,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+(32/sizeof(PCRE2_UCHAR)),    /* CLASS                                  */ \
   1+(32/sizeof(PCRE2_UCHAR)),    /* NCLASS                                 */ \
   0,                             /* XCLASS - variable length               */ \
+  0,                             /* ECLASS - variable length               */ \
   1+IMM2_SIZE,                   /* REF                                    */ \
-  1+IMM2_SIZE,                   /* REFI                                   */ \
+  1+IMM2_SIZE+1,                 /* REFI                                   */ \
   1+2*IMM2_SIZE,                 /* DNREF                                  */ \
-  1+2*IMM2_SIZE,                 /* DNREFI                                 */ \
+  1+2*IMM2_SIZE+1,               /* DNREFI                                 */ \
   1+LINK_SIZE,                   /* RECURSE                                */ \
   1+2*LINK_SIZE+1,               /* CALLOUT                                */ \
   0,                             /* CALLOUT_STR - variable length          */ \
@@ -1786,6 +1912,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* Assert behind not                      */ \
   1+LINK_SIZE,                   /* NA Assert                              */ \
   1+LINK_SIZE,                   /* NA Assert behind                       */ \
+  1+LINK_SIZE,                   /* Scan substring                         */ \
   1+LINK_SIZE,                   /* ONCE                                   */ \
   1+LINK_SIZE,                   /* SCRIPT_RUN                             */ \
   1+LINK_SIZE,                   /* BRA                                    */ \
@@ -1815,6 +1942,11 @@ in UTF-8 mode. The code that uses this table must know about such things. */
 
 #define RREF_ANY  0xffff
 
+/* Constants used by OP_REFI and OP_DNREFI to control matching behaviour. */
+
+#define REFI_FLAG_CASELESS_RESTRICT  0x1
+#define REFI_FLAG_TURKISH_CASING     0x2
+
 
 /* ---------- Private structures that are mode-independent. ---------- */
 
@@ -1890,6 +2022,14 @@ typedef struct {
 #define UCD_SCRIPTX(ch)     UCD_SCRIPTX_PROP(GET_UCD(ch))
 #define UCD_BPROPS(ch)      UCD_BPROPS_PROP(GET_UCD(ch))
 #define UCD_BIDICLASS(ch)   UCD_BIDICLASS_PROP(GET_UCD(ch))
+#define UCD_ANY_I(ch) \
+  /* match any of the four characters 'i', 'I', U+0130, U+0131 */ \
+  (((uint32_t)(ch) | 0x20u) == 0x69u || ((uint32_t)(ch) | 1u) == 0x0131u)
+#define UCD_DOTTED_I(ch) \
+  ((uint32_t)(ch) == 0x69u || (uint32_t)(ch) == 0x0130u)
+#define UCD_FOLD_I_TURKISH(ch) \
+  ((uint32_t)(ch) == 0x0130u ?   0x69u : \
+   (uint32_t)(ch) ==   0x49u ? 0x0131u : (uint32_t)(ch))
 
 /* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
 that form a bitmap representing a list of scripts or boolean properties. These
@@ -1955,6 +2095,9 @@ extern const uint8_t          PRIV(utf8_table4)[];
 #define _pcre2_vspace_list             PCRE2_SUFFIX(_pcre2_vspace_list_)
 #define _pcre2_ucd_boolprop_sets       PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
 #define _pcre2_ucd_caseless_sets       PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
+#define _pcre2_ucd_turkish_dotted_i_caseset  PCRE2_SUFFIX(_pcre2_ucd_turkish_dotted_i_caseset_)
+#define _pcre2_ucd_nocase_ranges       PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_)
+#define _pcre2_ucd_nocase_ranges_size  PCRE2_SUFFIX(_pcre2_ucd_nocase_ranges_size_)
 #define _pcre2_ucd_digit_sets          PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
 #define _pcre2_ucd_script_sets         PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
 #define _pcre2_ucd_records             PCRE2_SUFFIX(_pcre2_ucd_records_)
@@ -1971,14 +2114,17 @@ extern const uint8_t          PRIV(utf8_table4)[];
 extern const uint8_t                   PRIV(OP_lengths)[];
 extern const uint32_t                  PRIV(callout_end_delims)[];
 extern const uint32_t                  PRIV(callout_start_delims)[];
-extern const pcre2_compile_context     PRIV(default_compile_context);
-extern const pcre2_convert_context     PRIV(default_convert_context);
-extern const pcre2_match_context       PRIV(default_match_context);
+extern pcre2_compile_context           PRIV(default_compile_context);
+extern pcre2_convert_context           PRIV(default_convert_context);
+extern pcre2_match_context             PRIV(default_match_context);
 extern const uint8_t                   PRIV(default_tables)[];
 extern const uint32_t                  PRIV(hspace_list)[];
 extern const uint32_t                  PRIV(vspace_list)[];
 extern const uint32_t                  PRIV(ucd_boolprop_sets)[];
 extern const uint32_t                  PRIV(ucd_caseless_sets)[];
+extern const uint32_t                  PRIV(ucd_turkish_dotted_i_caseset);
+extern const uint32_t                  PRIV(ucd_nocase_ranges)[];
+extern const uint32_t                  PRIV(ucd_nocase_ranges_size);
 extern const uint32_t                  PRIV(ucd_digit_sets)[];
 extern const uint32_t                  PRIV(ucd_script_sets)[];
 extern const ucd_record                PRIV(ucd_records)[];
@@ -2039,11 +2185,12 @@ is available. */
 #define _pcre2_valid_utf             PCRE2_SUFFIX(_pcre2_valid_utf_)
 #define _pcre2_was_newline           PCRE2_SUFFIX(_pcre2_was_newline_)
 #define _pcre2_xclass                PCRE2_SUFFIX(_pcre2_xclass_)
+#define _pcre2_eclass                PCRE2_SUFFIX(_pcre2_eclass_)
 
 extern int          _pcre2_auto_possessify(PCRE2_UCHAR *,
                       const compile_block *);
 extern int          _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
-                      int *, uint32_t, uint32_t, BOOL, compile_block *);
+                      int *, uint32_t, uint32_t, uint32_t, BOOL, compile_block *);
 extern PCRE2_SPTR   _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
                       BOOL, int *);
 extern PCRE2_SPTR   _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
@@ -2066,7 +2213,9 @@ extern int          _pcre2_study(pcre2_real_code *);
 extern int          _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
 extern BOOL         _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
                       uint32_t *, BOOL);
-extern BOOL         _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
+extern BOOL         _pcre2_xclass(uint32_t, PCRE2_SPTR, const uint8_t *, BOOL);
+extern BOOL         _pcre2_eclass(uint32_t, PCRE2_SPTR, PCRE2_SPTR,
+                      const uint8_t *, BOOL);
 
 /* This function is needed only when memmove() is not available. */
 
@@ -2079,6 +2228,8 @@ extern void *       _pcre2_memmove(void *, const void *, size_t);
 
 extern BOOL         PRIV(ckd_smul)(PCRE2_SIZE *, int, int);
 
+#include "pcre2_util.h"
+
 #endif  /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
 
 /* End of pcre2_internal.h */

+ 50 - 16
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -47,7 +47,7 @@ to have access to the hidden structures at all supported widths.
 
 Some of the mode-dependent macros are required at different widths for
 different parts of the pcre2test code (in particular, the included
-pcre_printint.c file). We undefine them here so that they can be re-defined for
+pcre2_printint.c file). We undefine them here so that they can be re-defined for
 multiple inclusions. Not all of these are used in pcre2test, but it's easier
 just to undefine them all. */
 
@@ -435,7 +435,7 @@ UTF-16 mode. */
   c = *eptr; \
   if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
 
-/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
+/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the
 pointer, incrementing length if there is a low surrogate. This is called when
 we do not know if we are in UTF-16 mode. */
 
@@ -556,6 +556,11 @@ code that uses them is simpler because it assumes this. */
 /* The real general context structure. At present it holds only data for custom
 memory control. */
 
+/* WARNING: if this is ever changed, code in pcre2_substitute.c will have to be
+changed because it builds a general context "by hand" in order to avoid the
+malloc() call in pcre2_general_context)_create(). There is also code in
+pcre2_match.c that makes the same assumption. */
+
 typedef struct pcre2_real_general_context {
   pcre2_memctl memctl;
 } pcre2_real_general_context;
@@ -568,11 +573,13 @@ typedef struct pcre2_real_compile_context {
   void *stack_guard_data;
   const uint8_t *tables;
   PCRE2_SIZE max_pattern_length;
+  PCRE2_SIZE max_pattern_compiled_length;
   uint16_t bsr_convention;
   uint16_t newline_convention;
   uint32_t parens_nest_limit;
   uint32_t extra_options;
   uint32_t max_varlookbehind;
+  uint32_t optimization_flags;
 } pcre2_real_compile_context;
 
 /* The real match context structure. */
@@ -583,10 +590,13 @@ typedef struct pcre2_real_match_context {
   pcre2_jit_callback jit_callback;
   void *jit_callback_data;
 #endif
-  int    (*callout)(pcre2_callout_block *, void *);
-  void    *callout_data;
-  int    (*substitute_callout)(pcre2_substitute_callout_block *, void *);
-  void    *substitute_callout_data;
+  int        (*callout)(pcre2_callout_block *, void *);
+  void        *callout_data;
+  int        (*substitute_callout)(pcre2_substitute_callout_block *, void *);
+  void        *substitute_callout_data;
+  PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *,
+                                        PCRE2_SIZE, int, void *);
+  void        *substitute_case_callout_data;
   PCRE2_SIZE offset_limit;
   uint32_t heap_limit;
   uint32_t match_limit;
@@ -622,6 +632,7 @@ typedef struct pcre2_real_code {
   void    *executable_jit;        /* Pointer to JIT code */
   uint8_t  start_bitmap[32];      /* Bitmap for starting code unit < 256 */
   CODE_BLOCKSIZE_TYPE blocksize;  /* Total (bytes) that was malloc-ed */
+  CODE_BLOCKSIZE_TYPE code_start; /* Byte code start offset */
   uint32_t magic_number;          /* Paranoid and endianness check */
   uint32_t compile_options;       /* Options passed to pcre2_compile() */
   uint32_t overall_options;       /* Options after processing the pattern */
@@ -640,6 +651,7 @@ typedef struct pcre2_real_code {
   uint16_t top_backref;           /* Highest numbered back reference */
   uint16_t name_entry_size;       /* Size (code units) of table entries */
   uint16_t name_count;            /* Number of name entries in the table */
+  uint32_t optimization_flags;    /* Optimizations enabled at compile time */
 } pcre2_real_code;
 
 /* The real match data structure. Define ovector as large as it can ever
@@ -715,6 +727,23 @@ typedef struct named_group {
   uint16_t     isdup;         /* TRUE if a duplicate */
 } named_group;
 
+/* Structure for caching sorted ranges. This improves the performance
+of translating META code to byte code. */
+
+typedef struct class_ranges {
+  struct class_ranges *next;       /* Next class ranges */
+  size_t char_lists_size;          /* Total size of encoded char lists */
+  size_t char_lists_start;         /* Start offset of encoded char lists */
+  uint16_t range_list_size;        /* Size of ranges array */
+  uint16_t char_lists_types;       /* The XCL_LIST header of char lists */
+  /* Followed by the list of ranges (start/end pairs) */
+} class_ranges;
+
+typedef union class_bits_storage {
+  uint8_t classbits[32];
+  uint32_t classwords[8];
+} class_bits_storage;
+
 /* Structure for passing "static" information around between the functions
 doing the compiling, so that they are thread-safe. */
 
@@ -724,14 +753,15 @@ typedef struct compile_block {
   const uint8_t *fcc;              /* Points to case-flipping table */
   const uint8_t *cbits;            /* Points to character type table */
   const uint8_t *ctypes;           /* Points to table of type maps */
-  PCRE2_SPTR start_workspace;      /* The start of working space */
-  PCRE2_SPTR start_code;           /* The start of the compiled code */
+  PCRE2_UCHAR *start_workspace;    /* The start of working space */
+  PCRE2_UCHAR *start_code;         /* The start of the compiled code */
   PCRE2_SPTR start_pattern;        /* The start of the pattern */
   PCRE2_SPTR end_pattern;          /* The end of the pattern */
   PCRE2_UCHAR *name_table;         /* The name/number table */
   PCRE2_SIZE workspace_size;       /* Size of workspace */
   PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
   PCRE2_SIZE erroroffset;          /* Offset of error in pattern */
+  class_bits_storage classbits;    /* Temporary store for classbits */
   uint16_t names_found;            /* Number of entries so far */
   uint16_t name_entry_size;        /* Size of each entry */
   uint16_t parens_depth;           /* Depth of nested parentheses */
@@ -749,9 +779,9 @@ typedef struct compile_block {
   uint32_t backref_map;            /* Bitmap of low back refs */
   uint32_t nltype;                 /* Newline type */
   uint32_t nllen;                  /* Newline string length */
-  uint32_t class_range_start;      /* Overall class range start */
-  uint32_t class_range_end;        /* Overall class range end */
   PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
+  uint8_t class_op_used[ECLASS_NEST_LIMIT]; /* Operation used for
+                                               extended classes */
   uint32_t req_varyopt;            /* "After variable item" flag for reqbyte */
   uint32_t max_varlookbehind;      /* Limit for variable lookbehinds */
   int  max_lookbehind;             /* Maximum lookbehind encountered (characters) */
@@ -759,6 +789,11 @@ typedef struct compile_block {
   BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
   BOOL had_recurse;                /* Had a pattern recursion or subroutine call */
   BOOL dupnames;                   /* Duplicate names exist */
+#ifdef SUPPORT_WIDE_CHARS
+  class_ranges *cranges;           /* First class range. */
+  class_ranges *next_cranges;      /* Next class range. */
+  size_t char_lists_size;          /* Current size of character lists */
+#endif
 } compile_block;
 
 /* Structure for keeping the properties of the in-memory stack used
@@ -792,7 +827,7 @@ typedef struct heapframe {
   to RRMATCH(), but which do not need to be copied to new frames. */
 
   PCRE2_SPTR ecode;          /* The current position in the pattern */
-  PCRE2_SPTR temp_sptr[2];   /* Used for short-term PCRE_SPTR values */
+  PCRE2_SPTR temp_sptr[2];   /* Used for short-term PCRE2_SPTR values */
   PCRE2_SIZE length;         /* Used for character, string, or code lengths */
   PCRE2_SIZE back_frame;     /* Amount to subtract on RRETURN */
   PCRE2_SIZE temp_size;      /* Used for short-term PCRE2_SIZE values */
@@ -840,11 +875,10 @@ typedef struct heapframe {
   PCRE2_SIZE ovector[131072];   /* Must be last in the structure */
 } heapframe;
 
-/* This typedef is a check that the size of the heapframe structure is a
-multiple of PCRE2_SIZE. See various comments above. */
+/* Assert that the size of the heapframe structure is a multiple of PCRE2_SIZE.
+See various comments above. */
 
-typedef char check_heapframe_size[
-  ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
+STATIC_ASSERT((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0, heapframe_size);
 
 /* Structure for computing the alignment of heapframe. */
 

+ 2280 - 0
thirdparty/pcre2/src/pcre2_jit_char_inc.h

@@ -0,0 +1,2280 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+                    This module by Zoltan Herczeg
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* XClass matching code. */
+
+#ifdef SUPPORT_WIDE_CHARS
+
+#define ECLASS_CHAR_DATA STACK_TOP
+#define ECLASS_STACK_DATA STACK_LIMIT
+
+#define SET_CHAR_OFFSET(value) \
+  if ((value) != charoffset) \
+    { \
+    if ((value) < charoffset) \
+      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
+    else \
+      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
+    } \
+  charoffset = (value);
+
+#define READ_FROM_CHAR_LIST(destination) \
+  if (list_ind <= 1) \
+    { \
+    destination = *(const uint16_t*)next_char; \
+    next_char += 2; \
+    } \
+  else \
+    { \
+    destination = *(const uint32_t*)next_char; \
+    next_char += 4; \
+    }
+
+#define XCLASS_LOCAL_RANGES_SIZE 32
+#define XCLASS_LOCAL_RANGES_LOG2_SIZE 5
+
+typedef struct xclass_stack_item {
+  sljit_u32 first_item;
+  sljit_u32 last_item;
+  struct sljit_jump *jump;
+} xclass_stack_item;
+
+typedef struct xclass_ranges {
+  size_t range_count;
+  /* Pointer to ranges. A stack area is provided when a small buffer is enough. */
+  uint32_t *ranges;
+  uint32_t local_ranges[XCLASS_LOCAL_RANGES_SIZE * 2];
+  /* Stack size must be log2(ranges / 2). */
+  xclass_stack_item *stack;
+  xclass_stack_item local_stack[XCLASS_LOCAL_RANGES_LOG2_SIZE];
+} xclass_ranges;
+
+static void xclass_compute_ranges(compiler_common *common, PCRE2_SPTR cc, xclass_ranges *ranges)
+{
+DEFINE_COMPILER;
+size_t range_count = 0, est_range_count;
+size_t est_stack_size, tmp;
+uint32_t type, list_ind;
+uint32_t est_type;
+uint32_t char_list_add, range_start, range_end;
+const uint8_t *next_char;
+const uint8_t *est_next_char;
+#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+BOOL utf = common->utf;
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+
+if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
+  {
+  /* Only a few ranges are present. */
+  do
+    {
+    type = *cc++;
+    SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
+    GETCHARINCTEST(range_end, cc);
+    ranges->ranges[range_count] = range_end;
+
+    if (type == XCL_RANGE)
+      {
+      GETCHARINCTEST(range_end, cc);
+      }
+
+    ranges->ranges[range_count + 1] = range_end;
+    range_count += 2;
+    }
+  while (*cc != XCL_END);
+
+  SLJIT_ASSERT(range_count <= XCLASS_LOCAL_RANGES_SIZE);
+  ranges->range_count = range_count;
+  return;
+  }
+
+SLJIT_ASSERT(cc[0] >= XCL_LIST);
+#if PCRE2_CODE_UNIT_WIDTH == 8
+type = (uint32_t)(cc[0] << 8) | cc[1];
+cc += 2;
+#else
+type = cc[0];
+cc++;
+#endif  /* CODE_UNIT_WIDTH */
+
+/* Align characters. */
+next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
+type &= XCL_TYPE_MASK;
+
+/* Estimate size. */
+est_next_char = next_char;
+est_type = type;
+est_range_count = 0;
+list_ind = 0;
+
+while (est_type > 0)
+  {
+  uint32_t item_count = est_type & XCL_ITEM_COUNT_MASK;
+
+  if (item_count == XCL_ITEM_COUNT_MASK)
+    {
+    if (list_ind <= 1)
+      {
+      item_count = *(const uint16_t*)est_next_char;
+      est_next_char += 2;
+      }
+    else
+      {
+      item_count = *(const uint32_t*)est_next_char;
+      est_next_char += 4;
+      }
+    }
+
+  est_type >>= XCL_TYPE_BIT_LEN;
+  est_next_char += (size_t)item_count << (list_ind <= 1 ? 1 : 2);
+  list_ind++;
+  est_range_count += item_count + 1;
+  }
+
+if (est_range_count > XCLASS_LOCAL_RANGES_SIZE)
+  {
+  est_stack_size = 0;
+  tmp = est_range_count - 1;
+
+  /* Compute log2(est_range_count) */
+  while (tmp > 0)
+    {
+    est_stack_size++;
+    tmp >>= 1;
+    }
+
+  ranges->stack = (xclass_stack_item*)SLJIT_MALLOC((sizeof(xclass_stack_item) * est_stack_size)
+    + ((sizeof(uint32_t) << 1) * (size_t)est_range_count), compiler->allocator_data);
+
+  if (ranges->stack == NULL)
+    {
+    sljit_set_compiler_memory_error(compiler);
+    ranges->ranges = NULL;
+    return;
+    }
+
+  ranges->ranges = (uint32_t*)(ranges->stack + est_stack_size);
+  }
+
+char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
+range_start = ~(uint32_t)0;
+list_ind = 0;
+
+if ((type & XCL_BEGIN_WITH_RANGE) != 0)
+  range_start = XCL_CHAR_LIST_LOW_16_START;
+
+while (type > 0)
+  {
+  uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
+
+  if (item_count == XCL_ITEM_COUNT_MASK)
+    {
+    READ_FROM_CHAR_LIST(item_count);
+    SLJIT_ASSERT(item_count >= XCL_ITEM_COUNT_MASK);
+    }
+
+  while (item_count > 0)
+    {
+    READ_FROM_CHAR_LIST(range_end);
+
+    if ((range_end & XCL_CHAR_END) != 0)
+      {
+      range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
+
+      if (range_start == ~(uint32_t)0)
+        range_start = range_end;
+
+      ranges->ranges[range_count] = range_start;
+      ranges->ranges[range_count + 1] = range_end;
+      range_count += 2;
+      range_start = ~(uint32_t)0;
+      }
+    else
+      range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
+
+    item_count--;
+    }
+
+  list_ind++;
+  type >>= XCL_TYPE_BIT_LEN;
+
+  if (range_start == ~(uint32_t)0)
+    {
+    if ((type & XCL_BEGIN_WITH_RANGE) != 0)
+      {
+      if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+      else if (list_ind == 2) range_start = XCL_CHAR_LIST_LOW_32_START;
+      else range_start = XCL_CHAR_LIST_HIGH_32_START;
+#else
+      else range_start = XCL_CHAR_LIST_LOW_32_START;
+#endif
+      }
+    }
+  else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
+    {
+    if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
+    else if (list_ind == 2) range_end = XCL_CHAR_LIST_HIGH_16_END;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+    else if (list_ind == 3) range_end = XCL_CHAR_LIST_LOW_32_END;
+    else range_end = XCL_CHAR_LIST_HIGH_32_END;
+#else
+    else range_end = XCL_CHAR_LIST_LOW_32_END;
+#endif
+
+    ranges->ranges[range_count] = range_start;
+    ranges->ranges[range_count + 1] = range_end;
+    range_count += 2;
+    range_start = ~(uint32_t)0;
+    }
+
+  if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+  else if (list_ind == 2) char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
+  else char_list_add = XCL_CHAR_LIST_HIGH_32_ADD;
+#else
+  else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
+#endif
+  }
+
+SLJIT_ASSERT(range_count > 0 && range_count <= (est_range_count << 1));
+SLJIT_ASSERT(next_char <= (const uint8_t*)common->start);
+ranges->range_count = range_count;
+}
+
+static void xclass_check_bitset(compiler_common *common, const sljit_u8 *bitset, jump_list **found, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+
+jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+if (!optimize_class(common, bitset, (bitset[31] & 0x80) != 0, TRUE, found))
+  {
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)bitset);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
+  add_jump(compiler, found, JUMP(SLJIT_NOT_ZERO));
+  }
+
+add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+JUMPHERE(jump);
+}
+
+#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+
+static void xclass_update_min_max(compiler_common *common, PCRE2_SPTR cc, sljit_u32 *min_ptr, sljit_u32 *max_ptr)
+{
+uint32_t type, list_ind, c;
+sljit_u32 min = *min_ptr;
+sljit_u32 max = *max_ptr;
+uint32_t char_list_add;
+const uint8_t *next_char;
+BOOL utf = TRUE;
+
+/* This function is pointless without utf 8/16. */
+SLJIT_ASSERT(common->utf);
+if (*cc == XCL_SINGLE || *cc == XCL_RANGE)
+  {
+  /* Only a few ranges are present. */
+  do
+    {
+    type = *cc++;
+    SLJIT_ASSERT(type == XCL_SINGLE || type == XCL_RANGE);
+    GETCHARINCTEST(c, cc);
+
+    if (c < min)
+      min = c;
+
+    if (type == XCL_RANGE)
+      {
+      GETCHARINCTEST(c, cc);
+      }
+
+    if (c > max)
+      max = c;
+    }
+  while (*cc != XCL_END);
+
+  SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
+  *min_ptr = min;
+  *max_ptr = max;
+  return;
+  }
+
+SLJIT_ASSERT(cc[0] >= XCL_LIST);
+#if PCRE2_CODE_UNIT_WIDTH == 8
+type = (uint32_t)(cc[0] << 8) | cc[1];
+cc += 2;
+#else
+type = cc[0];
+cc++;
+#endif  /* CODE_UNIT_WIDTH */
+
+/* Align characters. */
+next_char = (const uint8_t*)common->start - (GET(cc, 0) << 1);
+type &= XCL_TYPE_MASK;
+
+SLJIT_ASSERT(type != 0);
+
+/* Detect minimum. */
+
+/* Skip unused ranges. */
+list_ind = 0;
+while ((type & (XCL_BEGIN_WITH_RANGE | XCL_ITEM_COUNT_MASK)) == 0)
+  {
+  type >>= XCL_TYPE_BIT_LEN;
+  list_ind++;
+  }
+
+SLJIT_ASSERT(list_ind <= 2);
+switch (list_ind)
+  {
+  case 0:
+  char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
+  c = XCL_CHAR_LIST_LOW_16_START;
+  break;
+
+  case 1:
+  char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
+  c = XCL_CHAR_LIST_HIGH_16_START;
+  break;
+
+  default:
+  char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
+  c = XCL_CHAR_LIST_LOW_32_START;
+  break;
+  }
+
+if ((type & XCL_BEGIN_WITH_RANGE) != 0)
+  {
+  if (c < min)
+    min = c;
+  }
+else
+  {
+  if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
+    {
+    if (list_ind <= 1)
+      c = *(const uint16_t*)(next_char + 2);
+    else
+      c = *(const uint32_t*)(next_char + 4);
+    }
+  else
+    {
+    if (list_ind <= 1)
+      c = *(const uint16_t*)next_char;
+    else
+      c = *(const uint32_t*)next_char;
+    }
+
+  c = char_list_add + (c >> XCL_CHAR_SHIFT);
+  if (c < min)
+    min = c;
+  }
+
+/* Detect maximum. */
+
+/* Skip intermediate ranges. */
+while (TRUE)
+  {
+  if ((type & XCL_ITEM_COUNT_MASK) == XCL_ITEM_COUNT_MASK)
+    {
+    if (list_ind <= 1)
+      {
+      c = *(const uint16_t*)next_char;
+      next_char += (c + 1) << 1;
+      }
+    else
+      {
+      c = *(const uint32_t*)next_char;
+      next_char += (c + 1) << 2;
+      }
+    }
+  else
+    next_char += (type & XCL_ITEM_COUNT_MASK) << (list_ind <= 1 ? 1 : 2);
+
+  if ((type >> XCL_TYPE_BIT_LEN) == 0)
+    break;
+
+  list_ind++;
+  type >>= XCL_TYPE_BIT_LEN;
+  }
+
+SLJIT_ASSERT(list_ind <= 2 && type != 0);
+switch (list_ind)
+  {
+  case 0:
+  char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
+  c = XCL_CHAR_LIST_LOW_16_END;
+  break;
+
+  case 1:
+  char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
+  c = XCL_CHAR_LIST_HIGH_16_END;
+  break;
+
+  default:
+  char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
+  c = XCL_CHAR_LIST_LOW_32_END;
+  break;
+  }
+
+if ((type & XCL_ITEM_COUNT_MASK) != 0)
+  {
+  /* Type is reused as temporary. */
+  if (list_ind <= 1)
+    type = *(const uint16_t*)(next_char - 2);
+  else
+    type = *(const uint32_t*)(next_char - 4);
+
+  if (type & XCL_CHAR_END)
+    c = char_list_add + (type >> XCL_CHAR_SHIFT);
+  }
+
+if (c > max)
+  max = c;
+
+SLJIT_ASSERT(min <= MAX_UTF_CODE_POINT && max <= MAX_UTF_CODE_POINT && min <= max);
+*min_ptr = min;
+*max_ptr = max;
+}
+
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+
+#define XCLASS_IS_ECLASS 0x001
+#ifdef SUPPORT_UNICODE
+#define XCLASS_SAVE_CHAR 0x002
+#define XCLASS_HAS_TYPE 0x004
+#define XCLASS_HAS_SCRIPT 0x008
+#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
+#define XCLASS_HAS_BOOL 0x020
+#define XCLASS_HAS_BIDICL 0x040
+#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
+#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
+#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
+#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0 0x200
+#endif /* SUPPORT_UNICODE */
+
+static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
+
+/* TMP3 must be preserved because it is used by compile_iterator_matchingpath. */
+static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, sljit_u32 status)
+{
+DEFINE_COMPILER;
+jump_list *found = NULL;
+jump_list *check_result = NULL;
+jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
+sljit_uw c, charoffset;
+sljit_u32 max = READ_CHAR_MAX, min = 0;
+struct sljit_jump *jump = NULL;
+PCRE2_UCHAR flags;
+PCRE2_SPTR ccbegin;
+sljit_u32 compares, invertcmp, depth;
+sljit_u32 first_item, last_item, mid_item;
+sljit_u32 range_start, range_end;
+xclass_ranges ranges;
+BOOL has_cmov, last_range_set;
+
+#ifdef SUPPORT_UNICODE
+sljit_u32 category_list = 0;
+sljit_u32 items;
+int typereg = TMP1;
+#endif /* SUPPORT_UNICODE */
+
+SLJIT_ASSERT(common->locals_size >= SSIZE_OF(sw));
+/* Scanning the necessary info. */
+flags = *cc++;
+ccbegin = cc;
+compares = 0;
+
+if (flags & XCL_MAP)
+  cc += 32 / sizeof(PCRE2_UCHAR);
+
+#ifdef SUPPORT_UNICODE
+while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+  {
+  compares++;
+  cc++;
+
+  items = 0;
+
+  switch(*cc)
+    {
+    case PT_LAMP:
+    items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
+    break;
+
+    case PT_GC:
+    items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
+    break;
+
+    case PT_PC:
+    items = UCPCAT(cc[1]);
+    break;
+
+    case PT_WORD:
+    items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
+    break;
+
+    case PT_ALNUM:
+    items = UCPCAT_L | UCPCAT_N;
+    break;
+
+    case PT_SCX:
+    status |= XCLASS_HAS_SCRIPT_EXTENSION;
+    if (cc[-1] == XCL_NOTPROP)
+      {
+      status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
+      break;
+      }
+    compares++;
+    /* Fall through */
+
+    case PT_SC:
+    status |= XCLASS_HAS_SCRIPT;
+    break;
+
+    case PT_SPACE:
+    case PT_PXSPACE:
+    case PT_PXGRAPH:
+    case PT_PXPRINT:
+    case PT_PXPUNCT:
+    status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
+    break;
+
+    case PT_UCNC:
+    case PT_PXXDIGIT:
+    status |= XCLASS_SAVE_CHAR;
+    break;
+
+    case PT_BOOL:
+    status |= XCLASS_HAS_BOOL;
+    break;
+
+    case PT_BIDICL:
+    status |= XCLASS_HAS_BIDICL;
+    break;
+
+    default:
+    SLJIT_UNREACHABLE();
+    break;
+    }
+
+  if (items > 0)
+    {
+    if (cc[-1] == XCL_NOTPROP)
+      items ^= UCPCAT_ALL;
+    category_list |= items;
+    status |= XCLASS_HAS_TYPE;
+    compares--;
+    }
+
+  cc += 2;
+  }
+
+if (category_list == UCPCAT_ALL)
+  {
+  /* All or no characters are accepted, same as dotall. */
+  if (status & XCLASS_IS_ECLASS)
+    {
+    if (list != backtracks)
+      OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    return;
+    }
+
+  compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
+  if (list == backtracks)
+    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+  return;
+  }
+
+if (category_list != 0)
+  compares++;
+#endif
+
+if (*cc != XCL_END)
+  {
+#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+  if (common->utf && compares == 0 && !(status & XCLASS_IS_ECLASS))
+    {
+    SLJIT_ASSERT(category_list == 0);
+    max = 0;
+    min = (flags & XCL_MAP) != 0 ? 0 : READ_CHAR_MAX;
+    xclass_update_min_max(common, cc, &min, &max);
+    }
+#endif
+  compares++;
+#ifdef SUPPORT_UNICODE
+  status |= XCLASS_SAVE_CHAR;
+#endif /* SUPPORT_UNICODE */
+  }
+
+#ifdef SUPPORT_UNICODE
+SLJIT_ASSERT(compares > 0 || category_list != 0);
+#else /* !SUPPORT_UNICODE */
+SLJIT_ASSERT(compares > 0);
+#endif /* SUPPORT_UNICODE */
+
+/* We are not necessary in utf mode even in 8 bit mode. */
+cc = ccbegin;
+if (!(status & XCLASS_IS_ECLASS))
+  {
+  if ((flags & XCL_NOT) != 0)
+    read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  else
+    {
+#ifdef SUPPORT_UNICODE
+    read_char(common, min, max, (status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
+#else /* !SUPPORT_UNICODE */
+    read_char(common, min, max, NULL, 0);
+#endif /* SUPPORT_UNICODE */
+    }
+  }
+
+if ((flags & XCL_MAP) != 0)
+  {
+  SLJIT_ASSERT(!(status & XCLASS_IS_ECLASS));
+  xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
+  cc += 32 / sizeof(PCRE2_UCHAR);
+  }
+
+#ifdef SUPPORT_UNICODE
+if (status & XCLASS_NEEDS_UCD)
+  {
+  if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
+    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+  if (!common->utf)
+    {
+    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
+    SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, UNASSIGNED_UTF_CHAR, TMP1);
+    }
+#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
+  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+
+  ccbegin = cc;
+
+  if (status & XCLASS_HAS_BIDICL)
+    {
+    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
+    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
+
+    while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+      {
+      cc++;
+
+      if (*cc == PT_BIDICL)
+        {
+        compares--;
+        invertcmp = (compares == 0 && list != backtracks);
+        if (cc[-1] == XCL_NOTPROP)
+          invertcmp ^= 0x1;
+        jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
+        add_jump(compiler, compares > 0 ? list : backtracks, jump);
+        }
+      cc += 2;
+      }
+
+    cc = ccbegin;
+    }
+
+  if (status & XCLASS_HAS_BOOL)
+    {
+    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
+
+    while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+      {
+      cc++;
+      if (*cc == PT_BOOL)
+        {
+        compares--;
+        invertcmp = (compares == 0 && list != backtracks);
+        if (cc[-1] == XCL_NOTPROP)
+          invertcmp ^= 0x1;
+
+        OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
+        add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+        }
+      cc += 2;
+      }
+
+    cc = ccbegin;
+    }
+
+  if (status & XCLASS_HAS_SCRIPT)
+    {
+    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+
+    while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+      {
+      cc++;
+
+      switch (*cc)
+        {
+        case PT_SCX:
+        if (cc[-1] == XCL_NOTPROP)
+          break;
+        /* Fall through */
+
+        case PT_SC:
+        compares--;
+        invertcmp = (compares == 0 && list != backtracks);
+        if (cc[-1] == XCL_NOTPROP)
+          invertcmp ^= 0x1;
+
+        add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
+        }
+      cc += 2;
+      }
+
+    cc = ccbegin;
+    }
+
+  if (status & XCLASS_HAS_SCRIPT_EXTENSION)
+    {
+    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
+    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
+    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
+
+    if (status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
+      {
+      if (status & XCLASS_HAS_TYPE)
+        {
+        if ((status & (XCLASS_SAVE_CHAR | XCLASS_IS_ECLASS)) == XCLASS_SAVE_CHAR)
+          {
+          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, TMP2, 0);
+          status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0;
+          }
+        else
+          {
+          OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
+          status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
+          }
+        }
+      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+      }
+
+    while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+      {
+      cc++;
+
+      if (*cc == PT_SCX)
+        {
+        compares--;
+        invertcmp = (compares == 0 && list != backtracks);
+
+        jump = NULL;
+        if (cc[-1] == XCL_NOTPROP)
+          {
+          jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
+          if (invertcmp)
+            {
+            add_jump(compiler, backtracks, jump);
+            jump = NULL;
+            }
+          invertcmp ^= 0x1;
+          }
+
+        OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)(1u << (cc[1] & 0x1f)));
+        add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+
+        if (jump != NULL)
+          JUMPHERE(jump);
+        }
+      cc += 2;
+      }
+
+    if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCAL0)
+      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
+    else if (status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
+      OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
+    cc = ccbegin;
+    }
+
+  if (status & XCLASS_SAVE_CHAR)
+    OP1(SLJIT_MOV, TMP1, 0, (status & XCLASS_IS_ECLASS) ? ECLASS_CHAR_DATA : RETURN_ADDR, 0);
+
+  if (status & XCLASS_HAS_TYPE)
+    {
+    if (status & XCLASS_SAVE_CHAR)
+      typereg = RETURN_ADDR;
+
+    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+    OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
+
+    if (category_list > 0)
+      {
+      compares--;
+      invertcmp = (compares == 0 && list != backtracks);
+      OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
+      add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+      }
+    }
+  }
+#endif /* SUPPORT_UNICODE */
+
+/* Generating code. */
+charoffset = 0;
+
+#ifdef SUPPORT_UNICODE
+while (*cc == XCL_PROP || *cc == XCL_NOTPROP)
+  {
+  compares--;
+  invertcmp = (compares == 0 && list != backtracks);
+  jump = NULL;
+
+  if (*cc == XCL_NOTPROP)
+    invertcmp ^= 0x1;
+  cc++;
+  switch(*cc)
+    {
+    case PT_LAMP:
+    case PT_GC:
+    case PT_PC:
+    case PT_SC:
+    case PT_SCX:
+    case PT_BOOL:
+    case PT_BIDICL:
+    case PT_WORD:
+    case PT_ALNUM:
+    compares++;
+    /* Already handled. */
+    break;
+
+    case PT_SPACE:
+    case PT_PXSPACE:
+    SET_CHAR_OFFSET(9);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
+    jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+    break;
+
+    case PT_UCNC:
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    SET_CHAR_OFFSET(0xa0);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+    SET_CHAR_OFFSET(0);
+    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
+    jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+    break;
+
+    case PT_PXGRAPH:
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
+
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
+    jump = JUMP(SLJIT_ZERO);
+
+    c = charoffset;
+    /* In case of ucp_Cf, we overwrite the result. */
+    SET_CHAR_OFFSET(0x2066);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    /* Restore charoffset. */
+    SET_CHAR_OFFSET(c);
+
+    JUMPHERE(jump);
+    jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+    break;
+
+    case PT_PXPRINT:
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
+
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
+    jump = JUMP(SLJIT_ZERO);
+
+    c = charoffset;
+    /* In case of ucp_Cf, we overwrite the result. */
+    SET_CHAR_OFFSET(0x2066);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+
+    /* Restore charoffset. */
+    SET_CHAR_OFFSET(c);
+
+    JUMPHERE(jump);
+    jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+    break;
+
+    case PT_PXPUNCT:
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
+
+    SET_CHAR_OFFSET(0);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
+    OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
+    OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
+    jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+    break;
+
+    case PT_PXXDIGIT:
+    SET_CHAR_OFFSET(CHAR_A);
+    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
+    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    SET_CHAR_OFFSET(CHAR_0);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    SET_CHAR_OFFSET(0xff10);
+    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
+
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    SET_CHAR_OFFSET(0xff21);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    SET_CHAR_OFFSET(0xff41);
+    OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
+    OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+    SET_CHAR_OFFSET(0xff10);
+
+    JUMPHERE(jump);
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
+    jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+    break;
+
+    default:
+    SLJIT_UNREACHABLE();
+    break;
+    }
+
+  cc += 2;
+
+  if (jump != NULL)
+    add_jump(compiler, compares > 0 ? list : backtracks, jump);
+  }
+
+if (compares == 0)
+  {
+  if (found != NULL)
+    set_jumps(found, LABEL());
+
+  if (status & XCLASS_IS_ECLASS)
+    OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+  return;
+  }
+#endif /* SUPPORT_UNICODE */
+
+SLJIT_ASSERT(compares == 1);
+ranges.range_count = 0;
+ranges.ranges = ranges.local_ranges;
+ranges.stack = ranges.local_stack;
+
+xclass_compute_ranges(common, cc, &ranges);
+
+/* Memory error is set for the compiler. */
+if (ranges.stack == NULL)
+  return;
+
+#if (defined SLJIT_DEBUG && SLJIT_DEBUG) && \
+  defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+if (common->utf)
+  {
+  min = READ_CHAR_MAX;
+  max = 0;
+  xclass_update_min_max(common, cc, &min, &max);
+  SLJIT_ASSERT(ranges.ranges[0] == min && ranges.ranges[ranges.range_count - 1] == max);
+  }
+#endif /* SLJIT_DEBUG && SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+
+invertcmp = (list != backtracks);
+
+if (ranges.range_count == 2)
+  {
+  range_start = ranges.ranges[0];
+  range_end = ranges.ranges[1];
+
+  if (range_start < range_end)
+    {
+    SET_CHAR_OFFSET(range_start);
+    jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
+    }
+  else
+    jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
+
+  add_jump(compiler, backtracks, jump);
+
+  SLJIT_ASSERT(ranges.stack == ranges.local_stack);
+  if (found != NULL)
+    set_jumps(found, LABEL());
+
+  if (status & XCLASS_IS_ECLASS)
+    OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+  return;
+  }
+
+range_start = ranges.ranges[0];
+SET_CHAR_OFFSET(range_start);
+if (ranges.range_count >= 6)
+  {
+  /* Early fail. */
+  range_end = ranges.ranges[ranges.range_count - 1];
+  add_jump(compiler, (flags & XCL_NOT) == 0 ? backtracks : &found,
+    CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start)));
+  }
+
+depth = 0;
+first_item = 0;
+last_item = ranges.range_count - 2;
+has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
+
+while (TRUE)
+  {
+  /* At least two items are present. */
+  SLJIT_ASSERT(first_item < last_item && charoffset == ranges.ranges[0]);
+  last_range_set = FALSE;
+
+  if (first_item + 6 <= last_item)
+    {
+    mid_item = ((first_item + last_item) >> 1) & ~(sljit_u32)1;
+    SLJIT_ASSERT(last_item >= mid_item + 4);
+
+    range_end = ranges.ranges[mid_item + 1];
+    if (first_item + 6 > mid_item && ranges.ranges[mid_item] == range_end)
+      {
+      OP2U(SLJIT_SUB | SLJIT_SET_GREATER | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
+      ranges.stack[depth].jump = JUMP(SLJIT_GREATER);
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+      last_range_set = TRUE;
+      }
+    else
+      ranges.stack[depth].jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - charoffset));
+
+    ranges.stack[depth].first_item = (sljit_u32)(mid_item + 2);
+    ranges.stack[depth].last_item = (sljit_u32)last_item;
+
+    depth++;
+    SLJIT_ASSERT(ranges.stack == ranges.local_stack ?
+      depth <= XCLASS_LOCAL_RANGES_LOG2_SIZE : (ranges.stack + depth) <= (xclass_stack_item*)ranges.ranges);
+
+    last_item = mid_item;
+    if (!last_range_set)
+      continue;
+
+    last_item -= 2;
+    }
+
+  if (!last_range_set)
+    {
+    range_start = ranges.ranges[first_item];
+    range_end = ranges.ranges[first_item + 1];
+
+    if (range_start < range_end)
+      {
+      SET_CHAR_OFFSET(range_start);
+      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+      }
+    else
+      {
+      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
+      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+      }
+    first_item += 2;
+    }
+
+  SLJIT_ASSERT(first_item <= last_item);
+
+  do
+    {
+    range_start = ranges.ranges[first_item];
+    range_end = ranges.ranges[first_item + 1];
+
+    if (range_start < range_end)
+      {
+      SET_CHAR_OFFSET(range_start);
+      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_end - range_start));
+
+      if (has_cmov)
+        SELECT(SLJIT_LESS_EQUAL, TMP2, STR_END, 0, TMP2);
+      else
+        OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_LESS_EQUAL);
+      }
+    else
+      {
+      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(range_start - charoffset));
+
+      if (has_cmov)
+        SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
+      else
+        OP_FLAGS(SLJIT_OR | ((first_item == last_item) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
+      }
+
+    first_item += 2;
+    }
+  while (first_item <= last_item);
+
+  if (depth == 0) break;
+
+  add_jump(compiler, &check_result, JUMP(SLJIT_JUMP));
+
+  /* The charoffset resets after the end of a branch is reached. */
+  charoffset = ranges.ranges[0];
+  depth--;
+  first_item = ranges.stack[depth].first_item;
+  last_item = ranges.stack[depth].last_item;
+  JUMPHERE(ranges.stack[depth].jump);
+  }
+
+if (check_result != NULL)
+  set_jumps(check_result, LABEL());
+
+if (has_cmov)
+  jump = CMP(SLJIT_NOT_EQUAL ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
+else
+  {
+  sljit_set_current_flags(compiler, SLJIT_SET_Z);
+  jump = JUMP(SLJIT_NOT_EQUAL ^ invertcmp);
+  }
+
+add_jump(compiler, backtracks, jump);
+
+if (found != NULL)
+  set_jumps(found, LABEL());
+
+if (status & XCLASS_IS_ECLASS)
+  OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+
+if (ranges.stack != ranges.local_stack)
+  SLJIT_FREE(ranges.stack, compiler->allocator_data);
+}
+
+static PCRE2_SPTR compile_eclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+PCRE2_SPTR end = cc + GET(cc, 0) - 1;
+PCRE2_SPTR begin;
+jump_list *not_found;
+jump_list *found = NULL;
+
+cc += LINK_SIZE;
+
+/* Should be optimized later. */
+read_char(common, 0, READ_CHAR_MAX, backtracks, 0);
+
+if (((*cc++) & ECL_MAP) != 0)
+  {
+  xclass_check_bitset(common, (const sljit_u8 *)cc, &found, backtracks);
+  cc += 32 / sizeof(PCRE2_UCHAR);
+  }
+
+begin = cc;
+
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, ECLASS_CHAR_DATA, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, ECLASS_STACK_DATA, 0);
+OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, TMP1, 0);
+
+/* All eclass must start with an xclass. */
+SLJIT_ASSERT(*cc == ECL_XCLASS);
+
+while (cc < end)
+  {
+  switch (*cc)
+    {
+    case ECL_AND:
+    ++cc;
+    OP2(SLJIT_OR, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, ~(sljit_sw)1);
+    OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_AND, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
+    break;
+
+    case ECL_OR:
+    ++cc;
+    OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_OR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
+    break;
+
+    case ECL_XOR:
+    ++cc;
+    OP2(SLJIT_AND, TMP2, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_LSHR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, TMP2, 0);
+    break;
+
+    case ECL_NOT:
+    ++cc;
+    OP2(SLJIT_XOR, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+    break;
+
+    default:
+    SLJIT_ASSERT(*cc == ECL_XCLASS);
+    if (cc != begin)
+      {
+      OP1(SLJIT_MOV, TMP1, 0, ECLASS_CHAR_DATA, 0);
+      OP2(SLJIT_SHL, ECLASS_STACK_DATA, 0, ECLASS_STACK_DATA, 0, SLJIT_IMM, 1);
+      }
+
+    not_found = NULL;
+    compile_xclass_matchingpath(common, cc + 1 + LINK_SIZE, &not_found, XCLASS_IS_ECLASS);
+    set_jumps(not_found, LABEL());
+
+    cc += GET(cc, 1);
+    break;
+    }
+  }
+
+OP2U(SLJIT_SUB | SLJIT_SET_Z, ECLASS_STACK_DATA, 0, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, ECLASS_CHAR_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
+OP1(SLJIT_MOV, ECLASS_STACK_DATA, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
+add_jump(compiler, backtracks, JUMP(SLJIT_EQUAL));
+set_jumps(found, LABEL());
+return end;
+}
+
+/* Generic character matching code. */
+
+#undef SET_CHAR_OFFSET
+#undef READ_FROM_CHAR_LIST
+#undef XCLASS_LOCAL_RANGES_SIZE
+#undef XCLASS_LOCAL_RANGES_LOG2_SIZE
+
+#endif /* SUPPORT_WIDE_CHARS */
+
+static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
+    compare_context *context, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+unsigned int othercasebit = 0;
+PCRE2_SPTR othercasechar = NULL;
+#ifdef SUPPORT_UNICODE
+int utflength;
+#endif
+
+if (caseless && char_has_othercase(common, cc))
+  {
+  othercasebit = char_get_othercase_bit(common, cc);
+  SLJIT_ASSERT(othercasebit);
+  /* Extracting bit difference info. */
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  othercasechar = cc + (othercasebit >> 8);
+  othercasebit &= 0xff;
+#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
+  /* Note that this code only handles characters in the BMP. If there
+  ever are characters outside the BMP whose othercase differs in only one
+  bit from itself (there currently are none), this code will need to be
+  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
+  othercasechar = cc + (othercasebit >> 9);
+  if ((othercasebit & 0x100) != 0)
+    othercasebit = (othercasebit & 0xff) << 8;
+  else
+    othercasebit &= 0xff;
+#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
+  }
+
+if (context->sourcereg == -1)
+  {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else if (context->length >= 2)
+    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  if (context->length >= 4)
+    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+  else
+#endif
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
+  context->sourcereg = TMP2;
+  }
+
+#ifdef SUPPORT_UNICODE
+utflength = 1;
+if (common->utf && HAS_EXTRALEN(*cc))
+  utflength += GET_EXTRALEN(*cc);
+
+do
+  {
+#endif
+
+  context->length -= IN_UCHARS(1);
+#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+
+  /* Unaligned read is supported. */
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
+    context->oc.asuchars[context->ucharptr] = othercasebit;
+    }
+  else
+    {
+    context->c.asuchars[context->ucharptr] = *cc;
+    context->oc.asuchars[context->ucharptr] = 0;
+    }
+  context->ucharptr++;
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
+#else
+  if (context->ucharptr >= 2 || context->length == 0)
+#endif
+    {
+    if (context->length >= 4)
+      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+    else if (context->length >= 2)
+      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    else if (context->length >= 1)
+      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+    switch(context->ucharptr)
+      {
+      case 4 / sizeof(PCRE2_UCHAR):
+      if (context->oc.asint != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
+      break;
+
+      case 2 / sizeof(PCRE2_UCHAR):
+      if (context->oc.asushort != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
+      break;
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      case 1:
+      if (context->oc.asbyte != 0)
+        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
+      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
+      break;
+#endif
+
+      default:
+      SLJIT_UNREACHABLE();
+      break;
+      }
+    context->ucharptr = 0;
+    }
+
+#else
+
+  /* Unaligned read is unsupported or in 32 bit mode. */
+  if (context->length >= 1)
+    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
+
+  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
+
+  if (othercasebit != 0 && othercasechar == cc)
+    {
+    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
+    }
+  else
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
+
+#endif
+
+  cc++;
+#ifdef SUPPORT_UNICODE
+  utflength--;
+  }
+while (utflength > 0);
+#endif
+
+return cc;
+}
+
+#ifdef SUPPORT_UNICODE
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+
+/* The code in this function copies the logic of the interpreter function that
+is defined in the pcre2_extuni.c source. If that code is updated, this
+function, and those below it, must be kept in step (note by PH, June 2024). */
+
+static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
+{
+PCRE2_SPTR start_subject = args->begin;
+PCRE2_SPTR end_subject = args->end;
+int lgb, rgb, ricount;
+PCRE2_SPTR prevcc, endcc, bptr;
+BOOL first = TRUE;
+BOOL was_ep_ZWJ = FALSE;
+uint32_t c;
+
+prevcc = cc;
+endcc = NULL;
+do
+  {
+  GETCHARINC(c, cc);
+  rgb = UCD_GRAPHBREAK(c);
+
+  if (first)
+    {
+    lgb = rgb;
+    endcc = cc;
+    first = FALSE;
+    continue;
+    }
+
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
+    break;
+
+  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+  preceded by Extended Pictographic. */
+
+  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+    break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
+    {
+    ricount = 0;
+    bptr = prevcc;
+
+    /* bptr is pointing to the left-hand character */
+    while (bptr > start_subject)
+      {
+      bptr--;
+      BACKCHAR(bptr);
+      GETCHAR(c, bptr);
+
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
+        break;
+
+      ricount++;
+      }
+
+    if ((ricount & 1) != 0) break;  /* Grapheme break required */
+    }
+
+  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+  between; see next statement). */
+
+  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+  any number of them before a following ZWJ. */
+
+  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
+    lgb = rgb;
+
+  prevcc = endcc;
+  endcc = cc;
+  }
+while (cc < end_subject);
+
+return endcc;
+}
+
+#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+/* The code in this function copies the logic of the interpreter function that
+is defined in the pcre2_extuni.c source. If that code is updated, this
+function, and the one below it, must be kept in step (note by PH, June 2024). */
+
+static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
+{
+PCRE2_SPTR start_subject = args->begin;
+PCRE2_SPTR end_subject = args->end;
+int lgb, rgb, ricount;
+PCRE2_SPTR prevcc, endcc, bptr;
+BOOL first = TRUE;
+BOOL was_ep_ZWJ = FALSE;
+uint32_t c;
+
+prevcc = cc;
+endcc = NULL;
+do
+  {
+  GETCHARINC_INVALID(c, cc, end_subject, break);
+  rgb = UCD_GRAPHBREAK(c);
+
+  if (first)
+    {
+    lgb = rgb;
+    endcc = cc;
+    first = FALSE;
+    continue;
+    }
+
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
+    break;
+
+  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+  preceded by Extended Pictographic. */
+
+  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+    break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
+    {
+    ricount = 0;
+    bptr = prevcc;
+
+    /* bptr is pointing to the left-hand character */
+    while (bptr > start_subject)
+      {
+      GETCHARBACK_INVALID(c, bptr, start_subject, break);
+
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
+        break;
+
+      ricount++;
+      }
+
+    if ((ricount & 1) != 0)
+      break;  /* Grapheme break required */
+    }
+
+  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+  between; see next statement). */
+
+  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+  any number of them before a following ZWJ. */
+
+  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
+    lgb = rgb;
+
+  prevcc = endcc;
+  endcc = cc;
+  }
+while (cc < end_subject);
+
+return endcc;
+}
+
+/* The code in this function copies the logic of the interpreter function that
+is defined in the pcre2_extuni.c source. If that code is updated, this
+function must be kept in step (note by PH, June 2024). */
+
+static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
+{
+PCRE2_SPTR start_subject = args->begin;
+PCRE2_SPTR end_subject = args->end;
+int lgb, rgb, ricount;
+PCRE2_SPTR bptr;
+uint32_t c;
+BOOL was_ep_ZWJ = FALSE;
+
+/* Patch by PH */
+/* GETCHARINC(c, cc); */
+c = *cc++;
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (c >= 0x110000)
+  return cc;
+#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+lgb = UCD_GRAPHBREAK(c);
+
+while (cc < end_subject)
+  {
+  c = *cc;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+  if (c >= 0x110000)
+    break;
+#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+  rgb = UCD_GRAPHBREAK(c);
+
+  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
+    break;
+
+  /* ZWJ followed by Extended Pictographic is allowed only if the ZWJ was
+  preceded by Extended Pictographic. */
+
+  if (lgb == ucp_gbZWJ && rgb == ucp_gbExtended_Pictographic && !was_ep_ZWJ)
+    break;
+
+  /* Not breaking between Regional Indicators is allowed only if there
+  are an even number of preceding RIs. */
+
+  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
+    {
+    ricount = 0;
+    bptr = cc - 1;
+
+    /* bptr is pointing to the left-hand character */
+    while (bptr > start_subject)
+      {
+      bptr--;
+      c = *bptr;
+#if PCRE2_CODE_UNIT_WIDTH == 32
+      if (c >= 0x110000)
+        break;
+#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
+
+      ricount++;
+      }
+
+    if ((ricount & 1) != 0)
+      break;  /* Grapheme break required */
+    }
+
+  /* Set a flag when ZWJ follows Extended Pictographic (with optional Extend in
+  between; see next statement). */
+
+  was_ep_ZWJ = (lgb == ucp_gbExtended_Pictographic && rgb == ucp_gbZWJ);
+
+  /* If Extend follows Extended_Pictographic, do not update lgb; this allows
+  any number of them before a following ZWJ. */
+
+  if (rgb != ucp_gbExtend || lgb != ucp_gbExtended_Pictographic)
+    lgb = rgb;
+
+  cc++;
+  }
+
+return cc;
+}
+
+static void compile_clist(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
+{
+DEFINE_COMPILER;
+const sljit_u32 *other_cases;
+struct sljit_jump *jump;
+sljit_u32 min = 0, max = READ_CHAR_MAX;
+BOOL has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV) != 0;
+
+SLJIT_ASSERT(cc[1] == PT_CLIST);
+
+if (cc[0] == OP_PROP)
+  {
+  other_cases = PRIV(ucd_caseless_sets) + cc[2];
+
+  min = *other_cases++;
+  max = min;
+
+  while (*other_cases != NOTACHAR)
+    {
+    if (*other_cases > max) max = *other_cases;
+    if (*other_cases < min) min = *other_cases;
+    other_cases++;
+    }
+  }
+
+other_cases = PRIV(ucd_caseless_sets) + cc[2];
+SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR);
+/* The NOTACHAR is higher than any character. */
+SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
+
+read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
+
+/* At least two characters are required.
+   Otherwise this case would be handled by the normal code path. */
+/* NOTACHAR is the unsigned maximum. */
+
+/* Optimizing character pairs, if their difference is power of 2. */
+if (is_powerof2(other_cases[1] ^ other_cases[0]))
+  {
+  OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[1] ^ other_cases[0]));
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+  other_cases += 2;
+  }
+else if (is_powerof2(other_cases[2] ^ other_cases[1]))
+  {
+  SLJIT_ASSERT(other_cases[2] != NOTACHAR);
+
+  OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[2] ^ other_cases[1]));
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)other_cases[0]);
+
+  if (has_cmov)
+    SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
+  else
+    OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
+
+  other_cases += 3;
+  }
+else
+  {
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
+  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
+  }
+
+while (*other_cases != NOTACHAR)
+  {
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++));
+
+  if (has_cmov)
+    SELECT(SLJIT_EQUAL, TMP2, STR_END, 0, TMP2);
+  else
+    OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
+  }
+
+if (has_cmov)
+  jump = CMP(cc[0] == OP_PROP ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
+else
+  jump = JUMP(cc[0] == OP_PROP ? SLJIT_ZERO : SLJIT_NOT_ZERO);
+
+add_jump(compiler, backtracks, jump);
+}
+
+#endif /* SUPPORT_UNICODE */
+
+static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
+{
+DEFINE_COMPILER;
+int length;
+unsigned int c, oc, bit;
+compare_context context;
+struct sljit_jump *jump[3];
+jump_list *end_list;
+#ifdef SUPPORT_UNICODE
+PCRE2_UCHAR propdata[5];
+#endif /* SUPPORT_UNICODE */
+
+switch(type)
+  {
+  case OP_NOT_DIGIT:
+  case OP_DIGIT:
+  /* Digits are usually 0-9, so it is worth to optimize them. */
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
+    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
+  else
+#endif
+    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
+    /* Flip the starting bit in the negative case. */
+  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
+  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WHITESPACE:
+  case OP_WHITESPACE:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
+    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
+  else
+#endif
+    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
+  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
+  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_NOT_WORDCHAR:
+  case OP_WORDCHAR:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
+    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
+  else
+#endif
+    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
+  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
+  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
+  return cc;
+
+  case OP_ANY:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
+    {
+    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+    end_list = NULL;
+    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
+      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+    else
+      check_str_end(common, &end_list);
+
+    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
+    set_jumps(end_list, LABEL());
+    JUMPHERE(jump[0]);
+    }
+  else
+    check_newlinechar(common, common->nltype, backtracks, TRUE);
+  return cc;
+
+  case OP_ALLANY:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+#ifdef SUPPORT_UNICODE
+  if (common->utf && common->invalid_utf)
+    {
+    read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
+    return cc;
+    }
+#endif /* SUPPORT_UNICODE */
+
+  skip_valid_char(common);
+  return cc;
+
+  case OP_ANYBYTE:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  return cc;
+
+#ifdef SUPPORT_UNICODE
+  case OP_NOTPROP:
+  case OP_PROP:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  if (cc[0] == PT_CLIST)
+    {
+    compile_clist(common, cc - 1, backtracks);
+    return cc + 2;
+    }
+
+  propdata[0] = 0;
+  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
+  propdata[2] = cc[0];
+  propdata[3] = cc[1];
+  propdata[4] = XCL_END;
+  compile_xclass_matchingpath(common, propdata, backtracks, 0);
+  return cc + 2;
+#endif
+
+  case OP_ANYNL:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
+  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
+  /* We don't need to handle soft partial matching case. */
+  end_list = NULL;
+  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
+    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  else
+    check_str_end(common, &end_list);
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
+  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
+#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
+  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
+#endif
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+  jump[1] = JUMP(SLJIT_JUMP);
+  JUMPHERE(jump[0]);
+  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
+  set_jumps(end_list, LABEL());
+  JUMPHERE(jump[1]);
+  return cc;
+
+  case OP_NOT_HSPACE:
+  case OP_HSPACE:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+
+  if (type == OP_NOT_HSPACE)
+    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  else
+    read_char(common, 0x9, 0x3000, NULL, 0);
+
+  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
+  sljit_set_current_flags(compiler, SLJIT_SET_Z);
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  return cc;
+
+  case OP_NOT_VSPACE:
+  case OP_VSPACE:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+
+  if (type == OP_NOT_VSPACE)
+    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  else
+    read_char(common, 0xa, 0x2029, NULL, 0);
+
+  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
+  sljit_set_current_flags(compiler, SLJIT_SET_Z);
+  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+  return cc;
+
+#ifdef SUPPORT_UNICODE
+  case OP_EXTUNI:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+
+  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
+  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+    common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
+  if (common->invalid_utf)
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+#else
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+    common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
+  if (common->invalid_utf)
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+#endif
+
+  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
+
+  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
+    {
+    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
+    /* Since we successfully read a char above, partial matching must occur. */
+    check_partial(common, TRUE);
+    JUMPHERE(jump[0]);
+    }
+  return cc;
+#endif
+
+  case OP_CHAR:
+  case OP_CHARI:
+  length = 1;
+#ifdef SUPPORT_UNICODE
+  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
+#endif
+
+  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
+    detect_partial_match(common, backtracks);
+
+  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
+    {
+    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
+    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
+      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
+
+    context.length = IN_UCHARS(length);
+    context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+    context.ucharptr = 0;
+#endif
+    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
+    }
+
+#ifdef SUPPORT_UNICODE
+  if (common->utf)
+    {
+    GETCHAR(c, cc);
+    }
+  else
+#endif
+    c = *cc;
+
+  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
+
+  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
+    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+  oc = char_othercase(common, c);
+  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
+
+  SLJIT_ASSERT(!is_powerof2(c ^ oc));
+
+  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
+    {
+    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
+    SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    }
+  else
+    {
+    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
+    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+    JUMPHERE(jump[0]);
+    }
+  return cc + length;
+
+  case OP_NOT:
+  case OP_NOTI:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+
+  length = 1;
+#ifdef SUPPORT_UNICODE
+  if (common->utf)
+    {
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    c = *cc;
+    if (c < 128 && !common->invalid_utf)
+      {
+      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+      if (type == OP_NOT || !char_has_othercase(common, cc))
+        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      else
+        {
+        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
+        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
+        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
+        }
+      /* Skip the variable-length character. */
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+      JUMPHERE(jump[0]);
+      return cc + 1;
+      }
+    else
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+      {
+      GETCHARLEN(c, cc, length);
+      }
+    }
+  else
+#endif /* SUPPORT_UNICODE */
+    c = *cc;
+
+  if (type == OP_NOT || !char_has_othercase(common, cc))
+    {
+    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
+    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+    }
+  else
+    {
+    oc = char_othercase(common, c);
+    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
+    bit = c ^ oc;
+    if (is_powerof2(bit))
+      {
+      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
+      }
+    else
+      {
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
+      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
+      }
+    }
+  return cc + length;
+
+  case OP_CLASS:
+  case OP_NCLASS:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
+  if (type == OP_NCLASS)
+    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  else
+    read_char(common, 0, bit, NULL, 0);
+#else
+  if (type == OP_NCLASS)
+    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
+  else
+    read_char(common, 0, 255, NULL, 0);
+#endif
+
+  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
+    return cc + 32 / sizeof(PCRE2_UCHAR);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+  jump[0] = NULL;
+  if (common->utf)
+    {
+    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
+    if (type == OP_CLASS)
+      {
+      add_jump(compiler, backtracks, jump[0]);
+      jump[0] = NULL;
+      }
+    }
+#elif PCRE2_CODE_UNIT_WIDTH != 8
+  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
+  if (type == OP_CLASS)
+    {
+    add_jump(compiler, backtracks, jump[0]);
+    jump[0] = NULL;
+    }
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
+
+  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
+  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
+  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
+  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
+  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
+  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
+
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+  if (jump[0] != NULL)
+    JUMPHERE(jump[0]);
+#endif
+  return cc + 32 / sizeof(PCRE2_UCHAR);
+
+#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
+  case OP_XCLASS:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks, 0);
+  return cc + GET(cc, 0) - 1;
+
+  case OP_ECLASS:
+  if (check_str_ptr)
+    detect_partial_match(common, backtracks);
+  return compile_eclass_matchingpath(common, cc, backtracks);
+#endif
+  }
+SLJIT_UNREACHABLE();
+return cc;
+}
+
+static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
+{
+/* This function consumes at least one input character. */
+/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
+DEFINE_COMPILER;
+PCRE2_SPTR ccbegin = cc;
+compare_context context;
+int size;
+
+context.length = 0;
+do
+  {
+  if (cc >= ccend)
+    break;
+
+  if (*cc == OP_CHAR)
+    {
+    size = 1;
+#ifdef SUPPORT_UNICODE
+    if (common->utf && HAS_EXTRALEN(cc[1]))
+      size += GET_EXTRALEN(cc[1]);
+#endif
+    }
+  else if (*cc == OP_CHARI)
+    {
+    size = 1;
+#ifdef SUPPORT_UNICODE
+    if (common->utf)
+      {
+      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+        size = 0;
+      else if (HAS_EXTRALEN(cc[1]))
+        size += GET_EXTRALEN(cc[1]);
+      }
+    else
+#endif
+    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
+      size = 0;
+    }
+  else
+    size = 0;
+
+  cc += 1 + size;
+  context.length += IN_UCHARS(size);
+  }
+while (size > 0 && context.length <= 128);
+
+cc = ccbegin;
+if (context.length > 0)
+  {
+  /* We have a fixed-length byte sequence. */
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
+  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
+
+  context.sourcereg = -1;
+#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
+  context.ucharptr = 0;
+#endif
+  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
+  return cc;
+  }
+
+/* A non-fixed length character will be checked if length == 0. */
+return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
+}
+
+

File diff suppressed because it is too large
+ 384 - 155
thirdparty/pcre2/src/pcre2_jit_compile.c


+ 1 - 1
thirdparty/pcre2/src/pcre2_jit_match.c

@@ -83,7 +83,7 @@ Arguments:
 
 Returns:          > 0 => success; value is the number of ovector pairs filled
                   = 0 => success, but ovector is not big enough
-                   -1 => failed to match (PCRE_ERROR_NOMATCH)
+                   -1 => failed to match (PCRE2_ERROR_NOMATCH)
                  < -1 => some kind of unexpected problem
 */
 

+ 2 - 2
thirdparty/pcre2/src/pcre2_jit_misc.c

@@ -141,8 +141,8 @@ if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
   return NULL;
 if (startsize > maxsize)
   startsize = maxsize;
-startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
-maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
+startsize = (startsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
+maxsize = (maxsize + STACK_GROWTH_RATE - 1) & (size_t)(~(STACK_GROWTH_RATE - 1));
 
 jit_stack = PRIV(memctl_malloc)(sizeof(pcre2_real_jit_stack), (pcre2_memctl *)gcontext);
 if (jit_stack == NULL) return NULL;

+ 5 - 5
thirdparty/pcre2/src/pcre2_jit_neon_inc.h

@@ -82,7 +82,7 @@ POSSIBILITY OF SUCH DAMAGE.
 #  endif
 # endif
 
-#if (defined(__GNUC__) && __SANITIZE_ADDRESS__) \
+#if (defined(__GNUC__) && defined(__SANITIZE_ADDRESS__) && __SANITIZE_ADDRESS__ ) \
 	|| (defined(__clang__) \
 	&& ((__clang_major__ == 3 && __clang_minor__ >= 3) || (__clang_major__ > 3)))
 __attribute__((no_sanitize_address))
@@ -198,14 +198,14 @@ vect_t data = VLD1Q(*str_ptr);
 #if PCRE2_CODE_UNIT_WIDTH != 8
 data = VANDQ(data, char_mask);
 #endif
- 
+
 #if defined(FFCS)
 vect_t eq = VCEQQ(data, vc1);
 
 #elif defined(FFCS_2)
 vect_t eq1 = VCEQQ(data, vc1);
 vect_t eq2 = VCEQQ(data, vc2);
-vect_t eq = VORRQ(eq1, eq2);    
+vect_t eq = VORRQ(eq1, eq2);
 
 #elif defined(FFCS_MASK)
 vect_t eq = VORRQ(data, vmask);
@@ -226,7 +226,7 @@ if (p1 < *str_ptr)
   }
 else
   data2 = shift_left_n_lanes(data, offs1 - offs2);
- 
+
 if (compare1_type == compare_match1)
   data = VCEQQ(data, cmp1a);
 else
@@ -281,7 +281,7 @@ while (*str_ptr < str_end)
 #elif defined(FFCS_2)
   eq1 = VCEQQ(data, vc1);
   eq2 = VCEQQ(data, vc2);
-  eq = VORRQ(eq1, eq2);    
+  eq = VORRQ(eq1, eq2);
 
 #elif defined(FFCS_MASK)
   eq = VORRQ(data, vmask);

+ 58 - 57
thirdparty/pcre2/src/pcre2_jit_simd_inc.h

@@ -246,10 +246,10 @@ struct sljit_jump *quit;
 struct sljit_jump *partial_quit[2];
 vector_compare_type compare_type = vector_compare_match1;
 sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
-sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
-sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
-sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
-sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
+sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
+sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
+sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
+sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
 sljit_u32 bit = 0;
 int i;
 
@@ -273,17 +273,17 @@ if (common->mode == PCRE2_JIT_COMPLETE)
 
 /* First part (unaligned start) */
 value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
-sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
 
 if (char1 != char2)
-  sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+  sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
 
 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
 
-sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0);
+sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
 
 if (char1 != char2)
-  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
+  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 restart = LABEL();
@@ -294,12 +294,12 @@ OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
 
 for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
 
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
 
@@ -318,11 +318,11 @@ if (common->mode == PCRE2_JIT_COMPLETE)
   add_jump(compiler, &common->failed_match, partial_quit[1]);
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
 for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
 
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
 
 JUMPHERE(quit);
@@ -380,10 +380,10 @@ struct sljit_jump *quit;
 jump_list *not_found = NULL;
 vector_compare_type compare_type = vector_compare_match1;
 sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
-sljit_s32 data_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
-sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
-sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
-sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
+sljit_s32 data_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
+sljit_s32 cmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
+sljit_s32 cmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
+sljit_s32 tmp_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
 sljit_u32 bit = 0;
 int i;
 
@@ -406,29 +406,29 @@ OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
 /* First part (unaligned start) */
 
 value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
-sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
 
 if (char1 != char2)
-  sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+  sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
 
 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
 
-sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR1, SLJIT_FR1, 0);
+sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR1, SLJIT_VR1, 0);
 
 if (char1 != char2)
-  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
+  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? 0x1f : 0xf;
 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~value);
 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, value);
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
 
 for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
 
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
 
@@ -445,12 +445,12 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
 
 for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare_type, reg_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
 
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
 
 JUMPHERE(quit);
@@ -488,14 +488,14 @@ sljit_u32 bit1 = 0;
 sljit_u32 bit2 = 0;
 sljit_u32 diff = IN_UCHARS(offs1 - offs2);
 sljit_s32 tmp1_reg_ind = sljit_get_register_index(SLJIT_GP_REGISTER, TMP1);
-sljit_s32 data1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR0);
-sljit_s32 data2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR1);
-sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR2);
-sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
-sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4);
-sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5);
-sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6);
-sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0);
+sljit_s32 data1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0);
+sljit_s32 data2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR1);
+sljit_s32 cmp1a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR2);
+sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR3);
+sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR4);
+sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR5);
+sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR6);
+sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG);
 struct sljit_label *start;
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 struct sljit_label *restart;
@@ -541,10 +541,10 @@ else
   }
 
 value = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_LANE_ZERO;
-sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR2, 0, TMP1, 0);
+sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR2, 0, TMP1, 0);
 
 if (char1a != char1b)
-  sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR4, 0, TMP2, 0);
+  sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR4, 0, TMP2, 0);
 
 if (char2a == char2b)
   OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
@@ -566,18 +566,18 @@ else
     }
   }
 
-sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR3, 0, TMP1, 0);
+sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR3, 0, TMP1, 0);
 
 if (char2a != char2b)
-  sljit_emit_simd_lane_mov(compiler, value, SLJIT_FR5, 0, TMP2, 0);
+  sljit_emit_simd_lane_mov(compiler, value, SLJIT_VR5, 0, TMP2, 0);
 
-sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR2, SLJIT_FR2, 0);
+sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR2, SLJIT_VR2, 0);
 if (char1a != char1b)
-  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR4, SLJIT_FR4, 0);
+  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR4, SLJIT_VR4, 0);
 
-sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR3, SLJIT_FR3, 0);
+sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR3, SLJIT_VR3, 0);
 if (char2a != char2b)
-  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_FR5, SLJIT_FR5, 0);
+  sljit_emit_simd_lane_replicate(compiler, reg_type | SLJIT_SIMD_ELEM_32, SLJIT_VR5, SLJIT_VR5, 0);
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
 restart = LABEL();
@@ -589,11 +589,11 @@ value = (reg_type == SLJIT_SIMD_REG_256) ? ~0x1f : ~0xf;
 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
 
 jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
 
-sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
+sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
 jump[1] = JUMP(SLJIT_JUMP);
 
 JUMPHERE(jump[0]);
@@ -668,8 +668,8 @@ for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
   }
 
-sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 
 /* Ignore matches before the first STR_PTR. */
 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
@@ -687,8 +687,8 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, value);
 add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
 
 value = (reg_type == SLJIT_SIMD_REG_256) ? SLJIT_SIMD_MEM_ALIGNED_256 : SLJIT_SIMD_MEM_ALIGNED_128;
-sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_FR0, SLJIT_MEM1(STR_PTR), 0);
-sljit_emit_simd_mov(compiler, reg_type, SLJIT_FR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
+sljit_emit_simd_mov(compiler, reg_type | value, SLJIT_VR0, SLJIT_MEM1(STR_PTR), 0);
+sljit_emit_simd_mov(compiler, reg_type, SLJIT_VR1, SLJIT_MEM1(STR_PTR), -(sljit_sw)diff);
 
 for (i = 0; i < 4; i++)
   {
@@ -696,8 +696,8 @@ for (i = 0; i < 4; i++)
   fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
   }
 
-sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
-sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);
+sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR1, 0);
+sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_VR0, TMP1, 0);
 
 CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
 
@@ -843,12 +843,13 @@ DEFINE_COMPILER;
 int_char ic;
 struct sljit_jump *partial_quit, *quit;
 /* Save temporary registers. */
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
+SLJIT_ASSERT(common->locals_size >= 2 * (int)sizeof(sljit_sw));
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL1, TMP3, 0);
 
 /* Prepare function arguments */
 OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
-GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
+GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
 
 if (char1 == char2)
@@ -910,8 +911,8 @@ else
     }
   }
 /* Restore registers. */
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
-OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCAL1);
 
 /* Check return value. */
 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
@@ -1038,7 +1039,7 @@ SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
 SLJIT_ASSERT(compiler->scratches == 5);
 
 /* Save temporary register STR_PTR. */
-OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCAL0, STR_PTR, 0);
 
 /* Prepare arguments for the function call. */
 if (common->match_end_ptr == 0)
@@ -1052,7 +1053,7 @@ else
   SELECT(SLJIT_LESS, SLJIT_R0, STR_END, 0, SLJIT_R0);
   }
 
-GET_LOCAL_BASE(SLJIT_R1, 0, LOCALS0);
+GET_LOCAL_BASE(SLJIT_R1, 0, LOCAL0);
 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
 OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
 ic.c.c1 = char1a;
@@ -1093,7 +1094,7 @@ if (diff == 1) {
 }
 
 /* Restore STR_PTR register. */
-OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCAL0);
 
 /* Check return value. */
 partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
@@ -2176,7 +2177,7 @@ struct sljit_label *restart;
 struct sljit_jump *jump[2];
 
 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
-SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
+SLJIT_ASSERT(diff <= (unsigned)IN_UCHARS(max_fast_forward_char_pair_offset()));
 
 /* Initialize. */
 if (common->match_end_ptr != 0)

+ 5 - 5
thirdparty/pcre2/src/pcre2_maketables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -155,10 +155,10 @@ return yield;
 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
 pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
 {
-  if (gcontext)
-    gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
-  else
-    free((void *)tables);
+if (gcontext != NULL)
+  gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
+else
+  free((void *)tables);
 }
 #endif
 

File diff suppressed because it is too large
+ 406 - 134
thirdparty/pcre2/src/pcre2_match.c


+ 5 - 3
thirdparty/pcre2/src/pcre2_match_data.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2022 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -77,14 +77,16 @@ return yield;
 *  Create a match data block using pattern data  *
 *************************************************/
 
-/* If no context is supplied, use the memory allocator from the code. */
+/* If no context is supplied, use the memory allocator from the code. This code
+assumes that a general context contains nothing other than a memory allocator.
+If that ever changes, this code will need fixing. */
 
 PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
 pcre2_match_data_create_from_pattern(const pcre2_code *code,
   pcre2_general_context *gcontext)
 {
 if (gcontext == NULL) gcontext = (pcre2_general_context *)code;
-return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
+return pcre2_match_data_create(((const pcre2_real_code *)code)->top_bracket + 1,
   gcontext);
 }
 

+ 1 - 1
thirdparty/pcre2/src/pcre2_ord2utf.c

@@ -117,4 +117,4 @@ return 1;
 }
 #endif  /* SUPPORT_UNICODE */
 
-/* End of pcre_ord2utf.c */
+/* End of pcre2_ord2utf.c */

+ 8 - 6
thirdparty/pcre2/src/pcre2_pattern_info.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -64,7 +64,7 @@ Returns:        0 when data returned
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
 {
-const pcre2_real_code *re = (pcre2_real_code *)code;
+const pcre2_real_code *re = (const pcre2_real_code *)code;
 
 if (where == NULL)   /* Requests field length */
   {
@@ -230,7 +230,8 @@ switch(what)
   break;
 
   case PCRE2_INFO_NAMETABLE:
-  *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
+  *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((const char *)re +
+    sizeof(pcre2_real_code));
   break;
 
   case PCRE2_INFO_NEWLINE:
@@ -268,7 +269,7 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_callout_enumerate(const pcre2_code *code,
   int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
 {
-pcre2_real_code *re = (pcre2_real_code *)code;
+const pcre2_real_code *re = (const pcre2_real_code *)code;
 pcre2_callout_enumerate_block cb;
 PCRE2_SPTR cc;
 #ifdef SUPPORT_UNICODE
@@ -291,7 +292,7 @@ if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
 
 cb.version = 0;
-cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
+cc = (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code))
      + re->name_count * re->name_entry_size;
 
 while (TRUE)
@@ -383,8 +384,9 @@ while (TRUE)
 #endif
     break;
 
-#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+#ifdef SUPPORT_WIDE_CHARS
     case OP_XCLASS:
+    case OP_ECLASS:
     cc += GET(cc, 1);
     break;
 #endif

+ 18 - 18
thirdparty/pcre2/src/pcre2_serialize.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -127,25 +127,25 @@ dst_bytes += TABLES_LENGTH;
 for (i = 0; i < number_of_codes; i++)
   {
   re = (const pcre2_real_code *)(codes[i]);
-  (void)memcpy(dst_bytes, (char *)re, re->blocksize);
-  
-  /* Certain fields in the compiled code block are re-set during 
-  deserialization. In order to ensure that the serialized data stream is always 
-  the same for the same pattern, set them to zero here. We can't assume the 
-  copy of the pattern is correctly aligned for accessing the fields as part of 
+  (void)memcpy(dst_bytes, (const char *)re, re->blocksize);
+
+  /* Certain fields in the compiled code block are re-set during
+  deserialization. In order to ensure that the serialized data stream is always
+  the same for the same pattern, set them to zero here. We can't assume the
+  copy of the pattern is correctly aligned for accessing the fields as part of
   a structure. Note the use of sizeof(void *) in the second of these, to
-  specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a 
-  pointer to uint8_t), gcc gives a warning because the first argument is also a 
-  pointer to uint8_t. Casting the first argument to (void *) can stop this, but 
+  specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a
+  pointer to uint8_t), gcc gives a warning because the first argument is also a
+  pointer to uint8_t. Casting the first argument to (void *) can stop this, but
   it didn't stop Coverity giving the same complaint. */
-  
-  (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, 
+
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0,
     sizeof(pcre2_memctl));
-  (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, 
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0,
     sizeof(void *));
   (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
-    sizeof(void *));        
- 
+    sizeof(void *));
+
   dst_bytes += re->blocksize;
   }
 
@@ -232,10 +232,10 @@ for (i = 0; i < number_of_codes; i++)
   if (dst_re->magic_number != MAGIC_NUMBER ||
       dst_re->name_entry_size > MAX_NAME_SIZE + IMM2_SIZE + 1 ||
       dst_re->name_count > MAX_NAME_COUNT)
-    {   
-    memctl->free(dst_re, memctl->memory_data); 
+    {
+    memctl->free(dst_re, memctl->memory_data);
     return PCRE2_ERROR_BADSERIALIZEDDATA;
-    } 
+    }
 
   /* At the moment only one table is supported. */
 

+ 197 - 43
thirdparty/pcre2/src/pcre2_study.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -114,7 +114,7 @@ uint32_t once_fudge = 0;
 BOOL had_recurse = FALSE;
 BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
 PCRE2_SPTR nextbranch = code + GET(code, 1);
-PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
+PCRE2_SPTR cc = code + 1 + LINK_SIZE;
 recurse_check this_recurse;
 
 /* If this is a "could be empty" group, its minimum length is 0. */
@@ -136,12 +136,13 @@ passes 16-bits, reset to that value and skip the rest of the branch. */
 for (;;)
   {
   int d, min, recno;
-  PCRE2_UCHAR op, *cs, *ce;
+  PCRE2_UCHAR op;
+  PCRE2_SPTR cs, ce;
 
   if (branchlength >= UINT16_MAX)
     {
     branchlength = UINT16_MAX;
-    cc = (PCRE2_UCHAR *)nextbranch;
+    cc = nextbranch;
     }
 
   op = *cc;
@@ -249,6 +250,7 @@ for (;;)
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
     case OP_ASSERT_NA:
+    case OP_ASSERT_SCS:
     case OP_ASSERTBACK_NA:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
     /* Fall through */
@@ -417,15 +419,14 @@ for (;;)
     case OP_NCLASS:
 #ifdef SUPPORT_WIDE_CHARS
     case OP_XCLASS:
+    case OP_ECLASS:
     /* The original code caused an unsigned overflow in 64 bit systems,
     so now we use a conditional statement. */
-    if (op == OP_XCLASS)
+    if (op == OP_XCLASS || op == OP_ECLASS)
       cc += GET(cc, 1);
     else
-      cc += PRIV(OP_lengths)[OP_CLASS];
-#else
-    cc += PRIV(OP_lengths)[OP_CLASS];
 #endif
+      cc += PRIV(OP_lengths)[OP_CLASS];
 
     switch (*cc)
       {
@@ -479,8 +480,8 @@ for (;;)
     if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
       {
       int count = GET2(cc, 1+IMM2_SIZE);
-      PCRE2_UCHAR *slot =
-        (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
+      PCRE2_SPTR slot =
+        (PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) +
           GET2(cc, 1) * re->name_entry_size;
 
       d = INT_MAX;
@@ -496,13 +497,12 @@ for (;;)
           dd = backref_cache[recno];
         else
           {
-          ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
+          ce = cs = PRIV(find_bracket)(startcode, utf, recno);
           if (cs == NULL) return -2;
           do ce += GET(ce, 1); while (*ce == OP_ALT);
 
           dd = 0;
-          if (!dupcapused ||
-              (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
+          if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
             {
             if (cc > cs && cc < ce)    /* Simple recursion */
               {
@@ -539,7 +539,7 @@ for (;;)
         }
       }
     else d = 0;
-    cc += 1 + 2*IMM2_SIZE;
+    cc += PRIV(OP_lengths)[*cc];
     goto REPEAT_BACK_REFERENCE;
 
     /* Single back reference by number. References by name are converted to by
@@ -557,12 +557,11 @@ for (;;)
 
       if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
         {
-        ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
+        ce = cs = PRIV(find_bracket)(startcode, utf, recno);
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
 
-        if (!dupcapused ||
-            (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
+        if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
           {
           if (cc > cs && cc < ce)    /* Simple recursion */
             {
@@ -593,7 +592,7 @@ for (;;)
       backref_cache[0] = recno;
       }
 
-    cc += 1 + IMM2_SIZE;
+    cc += PRIV(OP_lengths)[*cc];
 
     /* Handle repeated back references */
 
@@ -643,7 +642,7 @@ for (;;)
     pattern contains multiple subpatterns with the same number. */
 
     case OP_RECURSE:
-    cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1);
+    cs = ce = startcode + GET(cc, 1);
     recno = GET2(cs, 1+LINK_SIZE);
     if (recno == prev_recurse_recno)
       {
@@ -755,10 +754,13 @@ for (;;)
     new ones get added they are properly considered. */
 
     default:
+    PCRE2_DEBUG_UNREACHABLE();
     return -3;
     }
   }
-/* Control never gets here */
+
+PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
+return -3;                 /* Avoid compiler warnings */
 }
 
 
@@ -919,6 +921,138 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
 
 
 
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+/*************************************************
+*     Set starting bits for a character list.    *
+*************************************************/
+
+/* This function sets starting bits for a character list. It enumerates
+all characters and character ranges in the character list, and sets
+the starting bits accordingly.
+
+Arguments:
+  code           pointer to the code
+  start_bitmap   pointer to the starting bitmap
+
+Returns:         nothing
+*/
+static void
+study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap,
+  const uint8_t *char_lists_end)
+{
+uint32_t type, list_ind;
+uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
+uint32_t range_start = ~(uint32_t)0, range_end = 0;
+const uint8_t *next_char;
+PCRE2_UCHAR start_buffer[6], end_buffer[6];
+PCRE2_UCHAR start, end;
+
+/* Only needed in 8-bit mode at the moment. */
+type = (uint32_t)(code[0] << 8) | code[1];
+code += 2;
+
+/* Align characters. */
+next_char = char_lists_end - (GET(code, 0) << 1);
+type &= XCL_TYPE_MASK;
+list_ind = 0;
+
+if ((type & XCL_BEGIN_WITH_RANGE) != 0)
+  range_start = XCL_CHAR_LIST_LOW_16_START;
+
+while (type > 0)
+  {
+  uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
+
+  if (item_count == XCL_ITEM_COUNT_MASK)
+    {
+    if (list_ind <= 1)
+      {
+      item_count = *(const uint16_t*)next_char;
+      next_char += 2;
+      }
+    else
+      {
+      item_count = *(const uint32_t*)next_char;
+      next_char += 4;
+      }
+    }
+
+  while (item_count > 0)
+    {
+    if (list_ind <= 1)
+      {
+      range_end = *(const uint16_t*)next_char;
+      next_char += 2;
+      }
+    else
+      {
+      range_end = *(const uint32_t*)next_char;
+      next_char += 4;
+      }
+
+    if ((range_end & XCL_CHAR_END) != 0)
+      {
+      range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
+
+      PRIV(ord2utf)(range_end, end_buffer);
+      end = end_buffer[0];
+
+      if (range_start < range_end)
+        {
+        PRIV(ord2utf)(range_start, start_buffer);
+        for (start = start_buffer[0]; start <= end; start++)
+          start_bitmap[start / 8] |= (1u << (start & 7));
+        }
+      else
+        start_bitmap[end / 8] |= (1u << (end & 7));
+
+      range_start = ~(uint32_t)0;
+      }
+    else
+      range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
+
+    item_count--;
+    }
+
+  list_ind++;
+  type >>= XCL_TYPE_BIT_LEN;
+
+  if (range_start == ~(uint32_t)0)
+    {
+    if ((type & XCL_BEGIN_WITH_RANGE) != 0)
+      {
+      /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */
+      if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
+      else range_start = XCL_CHAR_LIST_LOW_32_START;
+      }
+    }
+  else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
+    {
+    PRIV(ord2utf)(range_start, start_buffer);
+
+    /* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and
+    XCL_CHAR_LIST_HIGH_32_END are not possible. */
+    if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
+    else range_end = XCL_CHAR_LIST_HIGH_16_END;
+
+    PRIV(ord2utf)(range_end, end_buffer);
+    end = end_buffer[0];
+
+    for (start = start_buffer[0]; start <= end; start++)
+      start_bitmap[start / 8] |= (1u << (start & 7));
+
+    range_start = ~(uint32_t)0;
+    }
+
+  /* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */
+  if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
+  else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
+  }
+}
+#endif
+
+
+
 /*************************************************
 *      Create bitmap of starting code units      *
 *************************************************/
@@ -980,7 +1114,7 @@ do
     {
     int rc;
     PCRE2_SPTR ncode;
-    uint8_t *classmap = NULL;
+    const uint8_t *classmap = NULL;
 #ifdef SUPPORT_WIDE_CHARS
     PCRE2_UCHAR xclassflags;
 #endif
@@ -1134,6 +1268,7 @@ do
           case OP_ASSERTBACK_NOT:
           case OP_ASSERT_NA:
           case OP_ASSERTBACK_NA:
+          case OP_ASSERT_SCS:
           ncode += GET(ncode, 1);
           while (*ncode == OP_ALT) ncode += GET(ncode, 1);
           ncode += 1 + LINK_SIZE;
@@ -1252,12 +1387,14 @@ do
       tcode += GET(tcode, 1 + 2*LINK_SIZE);
       break;
 
-      /* Skip over lookbehind and negative lookahead assertions */
+      /* Skip over lookbehind, negative lookahead, and scan substring
+      assertions */
 
       case OP_ASSERT_NOT:
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
       case OP_ASSERTBACK_NA:
+      case OP_ASSERT_SCS:
       do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
       tcode += 1 + LINK_SIZE;
       break;
@@ -1578,6 +1715,13 @@ do
       tcode += 2;
       break;
 
+      /* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */
+
+#ifdef SUPPORT_WIDE_CHARS
+      case OP_ECLASS:
+      return SSB_FAIL;
+#endif
+
       /* Extended class: if there are any property checks, or if this is a
       negative XCLASS without a map, give up. If there are no property checks,
       there must be wide characters on the XCLASS list, because otherwise an
@@ -1596,7 +1740,7 @@ do
       map pointer if there is one, and fall through. */
 
       classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
-        (uint8_t *)(tcode + 1 + LINK_SIZE + 1);
+        (const uint8_t *)(tcode + 1 + LINK_SIZE + 1);
 
       /* In UTF-8 mode, scan the character list and set bits for leading bytes,
       then jump to handle the map. */
@@ -1608,6 +1752,13 @@ do
         PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
         tcode += GET(tcode, 1);
 
+        if (*p >= XCL_LIST)
+          {
+          study_char_list(p, re->start_bitmap,
+            ((const uint8_t *)re + re->code_start));
+          goto HANDLE_CLASSMAP;
+          }
+
         for (;;) switch (*p++)
           {
           case XCL_SINGLE:
@@ -1629,6 +1780,7 @@ do
           goto HANDLE_CLASSMAP;
 
           default:
+          PCRE2_DEBUG_UNREACHABLE();
           return SSB_UNKNOWN;   /* Internal error, should not occur */
           }
         }
@@ -1665,7 +1817,7 @@ do
       case OP_CLASS:
       if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
         {
-        classmap = (uint8_t *)(++tcode);
+        classmap = (const uint8_t *)(++tcode);
         tcode += 32 / sizeof(PCRE2_UCHAR);
         }
 
@@ -1768,8 +1920,7 @@ BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
 
 /* Find start of compiled code */
 
-code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
-  re->name_entry_size * re->name_count;
+code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start);
 
 /* For a pattern that has a first code unit, or a multiline pattern that
 matches only at "line start", there is no point in seeking a list of starting
@@ -1779,7 +1930,11 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
   {
   int depth = 0;
   int rc = set_start_bits(re, code, utf, ucp, &depth);
-  if (rc == SSB_UNKNOWN) return 1;
+  if (rc == SSB_UNKNOWN)
+    {
+    PCRE2_DEBUG_UNREACHABLE();
+    return 1;
+    }
 
   /* If a list of starting code units was set up, scan the list to see if only
   one or two were listed. Having only one listed is rare because usually a
@@ -1852,25 +2007,22 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
         }
       }
 
-    /* Replace the start code unit bits with a first code unit, but only if it
-    is not the same as a required later code unit. This is because a search for
-    a required code unit starts after an explicit first code unit, but at a
-    code unit found from the bitmap. Patterns such as /a*a/ don't work
-    if both the start unit and required unit are the same. */
-
-    if (a >= 0 &&
-        (
-        (re->flags & PCRE2_LASTSET) == 0 ||
-          (
-          re->last_codeunit != (uint32_t)a &&
-          (b < 0 || re->last_codeunit != (uint32_t)b)
-          )
-        ))
-      {
+    /* Replace the start code unit bits with a first code unit. If it is the
+    same as a required later code unit, then clear the required later code
+    unit. This is because a search for a required code unit starts after an
+    explicit first code unit, but at a code unit found from the bitmap.
+    Patterns such as /a*a/ don't work if both the start unit and required
+    unit are the same. */
+
+    if (a >= 0) {
+      if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) {
+        re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS);
+        re->last_codeunit = 0;
+      }
       re->first_codeunit = a;
       flags = PCRE2_FIRSTSET;
       if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
-      }
+    }
 
     DONE:
     re->flags |= flags;
@@ -1898,9 +2050,11 @@ if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
     break;    /* Leave minlength unchanged (will be zero) */
 
     case -2:
+    PCRE2_DEBUG_UNREACHABLE();
     return 2; /* missing capturing bracket */
 
     case -3:
+    PCRE2_DEBUG_UNREACHABLE();
     return 3; /* unrecognized opcode */
 
     default:

File diff suppressed because it is too large
+ 851 - 164
thirdparty/pcre2/src/pcre2_substitute.c


+ 2 - 2
thirdparty/pcre2/src/pcre2_substring.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -486,7 +486,7 @@ pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
 uint16_t bot = 0;
 uint16_t top = code->name_count;
 uint16_t entrysize = code->name_entry_size;
-PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
+PCRE2_SPTR nametable = (PCRE2_SPTR)((const char *)code + sizeof(pcre2_real_code));
 
 while (top > bot)
   {

+ 6 - 6
thirdparty/pcre2/src/pcre2_tables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -171,9 +171,9 @@ are implementing).
 6. Do not break after Prepend characters.
 
 7. Do not break within emoji modifier sequences or emoji zwj sequences. That
-   is, do not break between characters with the Extended_Pictographic property.
-   Extend and ZWJ characters are allowed between the characters; this cannot be
-   represented in this table, the code has to deal with it.
+   is, do not break between characters with the Extended_Pictographic property
+   if a ZWJ intervenes. Extend characters are allowed between the characters;
+   this cannot be represented in this table, the code has to deal with it.
 
 8. Do not break within emoji flag sequences. That is, do not break between
    regional indicator (RI) symbols if there are an odd number of RI characters
@@ -203,8 +203,8 @@ const uint32_t PRIV(ucp_gbtable)[] = {
    ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
    (1u<<ucp_gbRegional_Indicator),                      /* 11 Regional Indicator */
    ESZ,                                                 /* 12 Other */
-   ESZ,                                                 /* 13 ZWJ */
-   ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
+   ESZ|(1u<<ucp_gbExtended_Pictographic),               /* 13 ZWJ */
+   ESZ                                                  /* 14 Extended Pictographic */
 };
 
 #undef ESZ

File diff suppressed because it is too large
+ 1879 - 1635
thirdparty/pcre2/src/pcre2_ucd.c


+ 37 - 25
thirdparty/pcre2/src/pcre2_ucp.h

@@ -132,13 +132,18 @@ enum {
   ucp_Hex_Digit,
   ucp_IDS_Binary_Operator,
   ucp_IDS_Trinary_Operator,
+  ucp_IDS_Unary_Operator,
+  ucp_ID_Compat_Math_Continue,
+  ucp_ID_Compat_Math_Start,
   ucp_ID_Continue,
   ucp_ID_Start,
   ucp_Ideographic,
+  ucp_InCB,
   ucp_Join_Control,
   ucp_Logical_Order_Exception,
   ucp_Lowercase,
   ucp_Math,
+  ucp_Modifier_Combining_Mark,
   ucp_Noncharacter_Code_Point,
   ucp_Pattern_Syntax,
   ucp_Pattern_White_Space,
@@ -219,6 +224,8 @@ enum {
   ucp_Latin,
   ucp_Greek,
   ucp_Cyrillic,
+  ucp_Armenian,
+  ucp_Hebrew,
   ucp_Arabic,
   ucp_Syriac,
   ucp_Thaana,
@@ -232,15 +239,21 @@ enum {
   ucp_Kannada,
   ucp_Malayalam,
   ucp_Sinhala,
+  ucp_Thai,
+  ucp_Tibetan,
   ucp_Myanmar,
   ucp_Georgian,
   ucp_Hangul,
+  ucp_Ethiopic,
+  ucp_Cherokee,
+  ucp_Runic,
   ucp_Mongolian,
   ucp_Hiragana,
   ucp_Katakana,
   ucp_Bopomofo,
   ucp_Han,
   ucp_Yi,
+  ucp_Gothic,
   ucp_Tagalog,
   ucp_Hanunoo,
   ucp_Buhid,
@@ -248,21 +261,33 @@ enum {
   ucp_Limbu,
   ucp_Tai_Le,
   ucp_Linear_B,
+  ucp_Shavian,
   ucp_Cypriot,
   ucp_Buginese,
   ucp_Coptic,
   ucp_Glagolitic,
+  ucp_Tifinagh,
   ucp_Syloti_Nagri,
   ucp_Phags_Pa,
   ucp_Nko,
   ucp_Kayah_Li,
+  ucp_Lycian,
+  ucp_Carian,
+  ucp_Lydian,
+  ucp_Avestan,
+  ucp_Samaritan,
+  ucp_Lisu,
   ucp_Javanese,
+  ucp_Old_Turkic,
   ucp_Kaithi,
   ucp_Mandaic,
   ucp_Chakma,
+  ucp_Meroitic_Hieroglyphs,
   ucp_Sharada,
   ucp_Takri,
+  ucp_Caucasian_Albanian,
   ucp_Duployan,
+  ucp_Elbasan,
   ucp_Grantha,
   ucp_Khojki,
   ucp_Linear_A,
@@ -274,7 +299,10 @@ enum {
   ucp_Khudawadi,
   ucp_Tirhuta,
   ucp_Multani,
+  ucp_Old_Hungarian,
   ucp_Adlam,
+  ucp_Osage,
+  ucp_Tangut,
   ucp_Masaram_Gondi,
   ucp_Dogra,
   ucp_Gunjala_Gondi,
@@ -284,31 +312,28 @@ enum {
   ucp_Yezidi,
   ucp_Cypro_Minoan,
   ucp_Old_Uyghur,
+  ucp_Toto,
+  ucp_Garay,
+  ucp_Gurung_Khema,
+  ucp_Ol_Onal,
+  ucp_Sunuwar,
+  ucp_Todhri,
+  ucp_Tulu_Tigalari,
 
   /* Scripts which has no characters in other scripts. */
   ucp_Unknown,
   ucp_Common,
-  ucp_Armenian,
-  ucp_Hebrew,
-  ucp_Thai,
   ucp_Lao,
-  ucp_Tibetan,
-  ucp_Ethiopic,
-  ucp_Cherokee,
   ucp_Canadian_Aboriginal,
   ucp_Ogham,
-  ucp_Runic,
   ucp_Khmer,
   ucp_Old_Italic,
-  ucp_Gothic,
   ucp_Deseret,
   ucp_Inherited,
   ucp_Ugaritic,
-  ucp_Shavian,
   ucp_Osmanya,
   ucp_Braille,
   ucp_New_Tai_Lue,
-  ucp_Tifinagh,
   ucp_Old_Persian,
   ucp_Kharoshthi,
   ucp_Balinese,
@@ -320,32 +345,22 @@ enum {
   ucp_Vai,
   ucp_Saurashtra,
   ucp_Rejang,
-  ucp_Lycian,
-  ucp_Carian,
-  ucp_Lydian,
   ucp_Cham,
   ucp_Tai_Tham,
   ucp_Tai_Viet,
-  ucp_Avestan,
   ucp_Egyptian_Hieroglyphs,
-  ucp_Samaritan,
-  ucp_Lisu,
   ucp_Bamum,
   ucp_Meetei_Mayek,
   ucp_Imperial_Aramaic,
   ucp_Old_South_Arabian,
   ucp_Inscriptional_Parthian,
   ucp_Inscriptional_Pahlavi,
-  ucp_Old_Turkic,
   ucp_Batak,
   ucp_Brahmi,
   ucp_Meroitic_Cursive,
-  ucp_Meroitic_Hieroglyphs,
   ucp_Miao,
   ucp_Sora_Sompeng,
-  ucp_Caucasian_Albanian,
   ucp_Bassa_Vah,
-  ucp_Elbasan,
   ucp_Pahawh_Hmong,
   ucp_Mende_Kikakui,
   ucp_Mro,
@@ -358,13 +373,10 @@ enum {
   ucp_Ahom,
   ucp_Anatolian_Hieroglyphs,
   ucp_Hatran,
-  ucp_Old_Hungarian,
   ucp_SignWriting,
   ucp_Bhaiksuki,
   ucp_Marchen,
   ucp_Newa,
-  ucp_Osage,
-  ucp_Tangut,
   ucp_Nushu,
   ucp_Soyombo,
   ucp_Zanabazar_Square,
@@ -378,10 +390,10 @@ enum {
   ucp_Dives_Akuru,
   ucp_Khitan_Small_Script,
   ucp_Tangsa,
-  ucp_Toto,
   ucp_Vithkuqi,
   ucp_Kawi,
   ucp_Nag_Mundari,
+  ucp_Kirat_Rai,
 
   /* This must be last */
   ucp_Script_Count
@@ -389,7 +401,7 @@ enum {
 
 /* Size of entries in ucd_script_sets[] */
 
-#define ucd_script_sets_item_size 3
+#define ucd_script_sets_item_size 4
 
 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
 

+ 420 - 357
thirdparty/pcre2/src/pcre2_ucptables.c

@@ -199,6 +199,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_extendedpictographic0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_d STR_p STR_i STR_c STR_t STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
 #define STRING_extender0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_r "\0"
 #define STRING_extpict0 STR_e STR_x STR_t STR_p STR_i STR_c STR_t "\0"
+#define STRING_gara0 STR_g STR_a STR_r STR_a "\0"
+#define STRING_garay0 STR_g STR_a STR_r STR_a STR_y "\0"
 #define STRING_geor0 STR_g STR_e STR_o STR_r "\0"
 #define STRING_georgian0 STR_g STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
 #define STRING_glag0 STR_g STR_l STR_a STR_g "\0"
@@ -219,9 +221,11 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_grlink0 STR_g STR_r STR_l STR_i STR_n STR_k "\0"
 #define STRING_gujarati0 STR_g STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
 #define STRING_gujr0 STR_g STR_u STR_j STR_r "\0"
+#define STRING_gukh0 STR_g STR_u STR_k STR_h "\0"
 #define STRING_gunjalagondi0 STR_g STR_u STR_n STR_j STR_a STR_l STR_a STR_g STR_o STR_n STR_d STR_i "\0"
 #define STRING_gurmukhi0 STR_g STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
 #define STRING_guru0 STR_g STR_u STR_r STR_u "\0"
+#define STRING_gurungkhema0 STR_g STR_u STR_r STR_u STR_n STR_g STR_k STR_h STR_e STR_m STR_a "\0"
 #define STRING_han0 STR_h STR_a STR_n "\0"
 #define STRING_hang0 STR_h STR_a STR_n STR_g "\0"
 #define STRING_hangul0 STR_h STR_a STR_n STR_g STR_u STR_l "\0"
@@ -242,6 +246,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_hmnp0 STR_h STR_m STR_n STR_p "\0"
 #define STRING_hung0 STR_h STR_u STR_n STR_g "\0"
 #define STRING_idc0 STR_i STR_d STR_c "\0"
+#define STRING_idcompatmathcontinue0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
+#define STRING_idcompatmathstart0 STR_i STR_d STR_c STR_o STR_m STR_p STR_a STR_t STR_m STR_a STR_t STR_h STR_s STR_t STR_a STR_r STR_t "\0"
 #define STRING_idcontinue0 STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
 #define STRING_ideo0 STR_i STR_d STR_e STR_o "\0"
 #define STRING_ideographic0 STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
@@ -251,7 +257,10 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_idst0 STR_i STR_d STR_s STR_t "\0"
 #define STRING_idstart0 STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0"
 #define STRING_idstrinaryoperator0 STR_i STR_d STR_s STR_t STR_r STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
+#define STRING_idsu0 STR_i STR_d STR_s STR_u "\0"
+#define STRING_idsunaryoperator0 STR_i STR_d STR_s STR_u STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
 #define STRING_imperialaramaic0 STR_i STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_a STR_r STR_a STR_m STR_a STR_i STR_c "\0"
+#define STRING_incb0 STR_i STR_n STR_c STR_b "\0"
 #define STRING_inherited0 STR_i STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
 #define STRING_inscriptionalpahlavi0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0"
 #define STRING_inscriptionalparthian0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
@@ -275,8 +284,10 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_khoj0 STR_k STR_h STR_o STR_j "\0"
 #define STRING_khojki0 STR_k STR_h STR_o STR_j STR_k STR_i "\0"
 #define STRING_khudawadi0 STR_k STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
+#define STRING_kiratrai0 STR_k STR_i STR_r STR_a STR_t STR_r STR_a STR_i "\0"
 #define STRING_kits0 STR_k STR_i STR_t STR_s "\0"
 #define STRING_knda0 STR_k STR_n STR_d STR_a "\0"
+#define STRING_krai0 STR_k STR_r STR_a STR_i "\0"
 #define STRING_kthi0 STR_k STR_t STR_h STR_i "\0"
 #define STRING_l0 STR_l "\0"
 #define STRING_l_AMPERSAND0 STR_l STR_AMPERSAND "\0"
@@ -323,6 +334,7 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_masaramgondi0 STR_m STR_a STR_s STR_a STR_r STR_a STR_m STR_g STR_o STR_n STR_d STR_i "\0"
 #define STRING_math0 STR_m STR_a STR_t STR_h "\0"
 #define STRING_mc0 STR_m STR_c "\0"
+#define STRING_mcm0 STR_m STR_c STR_m "\0"
 #define STRING_me0 STR_m STR_e "\0"
 #define STRING_medefaidrin0 STR_m STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
 #define STRING_medf0 STR_m STR_e STR_d STR_f "\0"
@@ -337,6 +349,7 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_mlym0 STR_m STR_l STR_y STR_m "\0"
 #define STRING_mn0 STR_m STR_n "\0"
 #define STRING_modi0 STR_m STR_o STR_d STR_i "\0"
+#define STRING_modifiercombiningmark0 STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_c STR_o STR_m STR_b STR_i STR_n STR_i STR_n STR_g STR_m STR_a STR_r STR_k "\0"
 #define STRING_mong0 STR_m STR_o STR_n STR_g "\0"
 #define STRING_mongolian0 STR_m STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
 #define STRING_mro0 STR_m STR_r STR_o "\0"
@@ -379,6 +392,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_oldsoutharabian0 STR_o STR_l STR_d STR_s STR_o STR_u STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0"
 #define STRING_oldturkic0 STR_o STR_l STR_d STR_t STR_u STR_r STR_k STR_i STR_c "\0"
 #define STRING_olduyghur0 STR_o STR_l STR_d STR_u STR_y STR_g STR_h STR_u STR_r "\0"
+#define STRING_olonal0 STR_o STR_l STR_o STR_n STR_a STR_l "\0"
+#define STRING_onao0 STR_o STR_n STR_a STR_o "\0"
 #define STRING_oriya0 STR_o STR_r STR_i STR_y STR_a "\0"
 #define STRING_orkh0 STR_o STR_r STR_k STR_h "\0"
 #define STRING_orya0 STR_o STR_r STR_y STR_a "\0"
@@ -463,6 +478,8 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_sterm0 STR_s STR_t STR_e STR_r STR_m "\0"
 #define STRING_sund0 STR_s STR_u STR_n STR_d "\0"
 #define STRING_sundanese0 STR_s STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
+#define STRING_sunu0 STR_s STR_u STR_n STR_u "\0"
+#define STRING_sunuwar0 STR_s STR_u STR_n STR_u STR_w STR_a STR_r "\0"
 #define STRING_sylo0 STR_s STR_y STR_l STR_o "\0"
 #define STRING_sylotinagri0 STR_s STR_y STR_l STR_o STR_t STR_i STR_n STR_a STR_g STR_r STR_i "\0"
 #define STRING_syrc0 STR_s STR_y STR_r STR_c "\0"
@@ -498,7 +515,11 @@ the "loose matching" rules that Unicode advises and Perl uses. */
 #define STRING_tirh0 STR_t STR_i STR_r STR_h "\0"
 #define STRING_tirhuta0 STR_t STR_i STR_r STR_h STR_u STR_t STR_a "\0"
 #define STRING_tnsa0 STR_t STR_n STR_s STR_a "\0"
+#define STRING_todhri0 STR_t STR_o STR_d STR_h STR_r STR_i "\0"
+#define STRING_todr0 STR_t STR_o STR_d STR_r "\0"
 #define STRING_toto0 STR_t STR_o STR_t STR_o "\0"
+#define STRING_tulutigalari0 STR_t STR_u STR_l STR_u STR_t STR_i STR_g STR_a STR_l STR_a STR_r STR_i "\0"
+#define STRING_tutg0 STR_t STR_u STR_t STR_g "\0"
 #define STRING_ugar0 STR_u STR_g STR_a STR_r "\0"
 #define STRING_ugaritic0 STR_u STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
 #define STRING_uideo0 STR_u STR_i STR_d STR_e STR_o "\0"
@@ -690,6 +711,8 @@ const char PRIV(utt_names)[] =
   STRING_extendedpictographic0
   STRING_extender0
   STRING_extpict0
+  STRING_gara0
+  STRING_garay0
   STRING_geor0
   STRING_georgian0
   STRING_glag0
@@ -710,9 +733,11 @@ const char PRIV(utt_names)[] =
   STRING_grlink0
   STRING_gujarati0
   STRING_gujr0
+  STRING_gukh0
   STRING_gunjalagondi0
   STRING_gurmukhi0
   STRING_guru0
+  STRING_gurungkhema0
   STRING_han0
   STRING_hang0
   STRING_hangul0
@@ -733,6 +758,8 @@ const char PRIV(utt_names)[] =
   STRING_hmnp0
   STRING_hung0
   STRING_idc0
+  STRING_idcompatmathcontinue0
+  STRING_idcompatmathstart0
   STRING_idcontinue0
   STRING_ideo0
   STRING_ideographic0
@@ -742,7 +769,10 @@ const char PRIV(utt_names)[] =
   STRING_idst0
   STRING_idstart0
   STRING_idstrinaryoperator0
+  STRING_idsu0
+  STRING_idsunaryoperator0
   STRING_imperialaramaic0
+  STRING_incb0
   STRING_inherited0
   STRING_inscriptionalpahlavi0
   STRING_inscriptionalparthian0
@@ -766,8 +796,10 @@ const char PRIV(utt_names)[] =
   STRING_khoj0
   STRING_khojki0
   STRING_khudawadi0
+  STRING_kiratrai0
   STRING_kits0
   STRING_knda0
+  STRING_krai0
   STRING_kthi0
   STRING_l0
   STRING_l_AMPERSAND0
@@ -814,6 +846,7 @@ const char PRIV(utt_names)[] =
   STRING_masaramgondi0
   STRING_math0
   STRING_mc0
+  STRING_mcm0
   STRING_me0
   STRING_medefaidrin0
   STRING_medf0
@@ -828,6 +861,7 @@ const char PRIV(utt_names)[] =
   STRING_mlym0
   STRING_mn0
   STRING_modi0
+  STRING_modifiercombiningmark0
   STRING_mong0
   STRING_mongolian0
   STRING_mro0
@@ -870,6 +904,8 @@ const char PRIV(utt_names)[] =
   STRING_oldsoutharabian0
   STRING_oldturkic0
   STRING_olduyghur0
+  STRING_olonal0
+  STRING_onao0
   STRING_oriya0
   STRING_orkh0
   STRING_orya0
@@ -954,6 +990,8 @@ const char PRIV(utt_names)[] =
   STRING_sterm0
   STRING_sund0
   STRING_sundanese0
+  STRING_sunu0
+  STRING_sunuwar0
   STRING_sylo0
   STRING_sylotinagri0
   STRING_syrc0
@@ -989,7 +1027,11 @@ const char PRIV(utt_names)[] =
   STRING_tirh0
   STRING_tirhuta0
   STRING_tnsa0
+  STRING_todhri0
+  STRING_todr0
   STRING_toto0
+  STRING_tulutigalari0
+  STRING_tutg0
   STRING_ugar0
   STRING_ugaritic0
   STRING_uideo0
@@ -1037,7 +1079,7 @@ const char PRIV(utt_names)[] =
 const ucp_type_table PRIV(utt)[] = {
   {   0, PT_SCX, ucp_Adlam },
   {   6, PT_SCX, ucp_Adlam },
-  {  11, PT_SC, ucp_Caucasian_Albanian },
+  {  11, PT_SCX, ucp_Caucasian_Albanian },
   {  16, PT_BOOL, ucp_ASCII_Hex_Digit },
   {  21, PT_SC, ucp_Ahom },
   {  26, PT_BOOL, ucp_Alphabetic },
@@ -1046,13 +1088,13 @@ const ucp_type_table PRIV(utt)[] = {
   {  64, PT_ANY, 0 },
   {  68, PT_SCX, ucp_Arabic },
   {  73, PT_SCX, ucp_Arabic },
-  {  80, PT_SC, ucp_Armenian },
+  {  80, PT_SCX, ucp_Armenian },
   {  89, PT_SC, ucp_Imperial_Aramaic },
-  {  94, PT_SC, ucp_Armenian },
+  {  94, PT_SCX, ucp_Armenian },
   {  99, PT_BOOL, ucp_ASCII },
   { 105, PT_BOOL, ucp_ASCII_Hex_Digit },
-  { 119, PT_SC, ucp_Avestan },
-  { 127, PT_SC, ucp_Avestan },
+  { 119, PT_SCX, ucp_Avestan },
+  { 127, PT_SCX, ucp_Avestan },
   { 132, PT_SC, ucp_Balinese },
   { 137, PT_SC, ucp_Balinese },
   { 146, PT_SC, ucp_Bamum },
@@ -1106,11 +1148,11 @@ const ucp_type_table PRIV(utt)[] = {
   { 480, PT_SCX, ucp_Chakma },
   { 485, PT_SC, ucp_Canadian_Aboriginal },
   { 504, PT_SC, ucp_Canadian_Aboriginal },
-  { 509, PT_SC, ucp_Carian },
-  { 514, PT_SC, ucp_Carian },
+  { 509, PT_SCX, ucp_Carian },
+  { 514, PT_SCX, ucp_Carian },
   { 521, PT_BOOL, ucp_Cased },
   { 527, PT_BOOL, ucp_Case_Ignorable },
-  { 541, PT_SC, ucp_Caucasian_Albanian },
+  { 541, PT_SCX, ucp_Caucasian_Albanian },
   { 559, PT_PC, ucp_Cc },
   { 562, PT_PC, ucp_Cf },
   { 565, PT_SCX, ucp_Chakma },
@@ -1120,8 +1162,8 @@ const ucp_type_table PRIV(utt)[] = {
   { 621, PT_BOOL, ucp_Changes_When_Lowercased },
   { 643, PT_BOOL, ucp_Changes_When_Titlecased },
   { 665, PT_BOOL, ucp_Changes_When_Uppercased },
-  { 687, PT_SC, ucp_Cherokee },
-  { 692, PT_SC, ucp_Cherokee },
+  { 687, PT_SCX, ucp_Cherokee },
+  { 692, PT_SCX, ucp_Cherokee },
   { 701, PT_SC, ucp_Chorasmian },
   { 712, PT_SC, ucp_Chorasmian },
   { 717, PT_BOOL, ucp_Case_Ignorable },
@@ -1164,8 +1206,8 @@ const ucp_type_table PRIV(utt)[] = {
   { 963, PT_BOOL, ucp_Emoji_Component },
   { 969, PT_SC, ucp_Egyptian_Hieroglyphs },
   { 974, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 994, PT_SC, ucp_Elbasan },
-  { 999, PT_SC, ucp_Elbasan },
+  { 994, PT_SCX, ucp_Elbasan },
+  { 999, PT_SCX, ucp_Elbasan },
   { 1007, PT_SC, ucp_Elymaic },
   { 1012, PT_SC, ucp_Elymaic },
   { 1020, PT_BOOL, ucp_Emoji_Modifier },
@@ -1175,355 +1217,376 @@ const ucp_type_table PRIV(utt)[] = {
   { 1060, PT_BOOL, ucp_Emoji_Modifier_Base },
   { 1078, PT_BOOL, ucp_Emoji_Presentation },
   { 1096, PT_BOOL, ucp_Emoji_Presentation },
-  { 1102, PT_SC, ucp_Ethiopic },
-  { 1107, PT_SC, ucp_Ethiopic },
+  { 1102, PT_SCX, ucp_Ethiopic },
+  { 1107, PT_SCX, ucp_Ethiopic },
   { 1116, PT_BOOL, ucp_Extender },
   { 1120, PT_BOOL, ucp_Extended_Pictographic },
   { 1141, PT_BOOL, ucp_Extender },
   { 1150, PT_BOOL, ucp_Extended_Pictographic },
-  { 1158, PT_SCX, ucp_Georgian },
-  { 1163, PT_SCX, ucp_Georgian },
-  { 1172, PT_SCX, ucp_Glagolitic },
-  { 1177, PT_SCX, ucp_Glagolitic },
-  { 1188, PT_SCX, ucp_Gunjala_Gondi },
-  { 1193, PT_SCX, ucp_Masaram_Gondi },
-  { 1198, PT_SC, ucp_Gothic },
-  { 1203, PT_SC, ucp_Gothic },
-  { 1210, PT_SCX, ucp_Grantha },
-  { 1215, PT_SCX, ucp_Grantha },
-  { 1223, PT_BOOL, ucp_Grapheme_Base },
-  { 1236, PT_BOOL, ucp_Grapheme_Extend },
-  { 1251, PT_BOOL, ucp_Grapheme_Link },
-  { 1264, PT_BOOL, ucp_Grapheme_Base },
-  { 1271, PT_SCX, ucp_Greek },
-  { 1277, PT_SCX, ucp_Greek },
-  { 1282, PT_BOOL, ucp_Grapheme_Extend },
-  { 1288, PT_BOOL, ucp_Grapheme_Link },
-  { 1295, PT_SCX, ucp_Gujarati },
-  { 1304, PT_SCX, ucp_Gujarati },
-  { 1309, PT_SCX, ucp_Gunjala_Gondi },
-  { 1322, PT_SCX, ucp_Gurmukhi },
-  { 1331, PT_SCX, ucp_Gurmukhi },
-  { 1336, PT_SCX, ucp_Han },
-  { 1340, PT_SCX, ucp_Hangul },
-  { 1345, PT_SCX, ucp_Hangul },
-  { 1352, PT_SCX, ucp_Han },
-  { 1357, PT_SCX, ucp_Hanifi_Rohingya },
-  { 1372, PT_SCX, ucp_Hanunoo },
-  { 1377, PT_SCX, ucp_Hanunoo },
-  { 1385, PT_SC, ucp_Hatran },
-  { 1390, PT_SC, ucp_Hatran },
-  { 1397, PT_SC, ucp_Hebrew },
-  { 1402, PT_SC, ucp_Hebrew },
-  { 1409, PT_BOOL, ucp_Hex_Digit },
-  { 1413, PT_BOOL, ucp_Hex_Digit },
-  { 1422, PT_SCX, ucp_Hiragana },
-  { 1427, PT_SCX, ucp_Hiragana },
-  { 1436, PT_SC, ucp_Anatolian_Hieroglyphs },
-  { 1441, PT_SC, ucp_Pahawh_Hmong },
-  { 1446, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
-  { 1451, PT_SC, ucp_Old_Hungarian },
-  { 1456, PT_BOOL, ucp_ID_Continue },
-  { 1460, PT_BOOL, ucp_ID_Continue },
-  { 1471, PT_BOOL, ucp_Ideographic },
-  { 1476, PT_BOOL, ucp_Ideographic },
-  { 1488, PT_BOOL, ucp_ID_Start },
-  { 1492, PT_BOOL, ucp_IDS_Binary_Operator },
-  { 1497, PT_BOOL, ucp_IDS_Binary_Operator },
-  { 1515, PT_BOOL, ucp_IDS_Trinary_Operator },
-  { 1520, PT_BOOL, ucp_ID_Start },
-  { 1528, PT_BOOL, ucp_IDS_Trinary_Operator },
-  { 1547, PT_SC, ucp_Imperial_Aramaic },
-  { 1563, PT_SC, ucp_Inherited },
-  { 1573, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 1594, PT_SC, ucp_Inscriptional_Parthian },
-  { 1616, PT_SC, ucp_Old_Italic },
-  { 1621, PT_SCX, ucp_Javanese },
-  { 1626, PT_SCX, ucp_Javanese },
-  { 1635, PT_BOOL, ucp_Join_Control },
-  { 1641, PT_BOOL, ucp_Join_Control },
-  { 1653, PT_SCX, ucp_Kaithi },
-  { 1660, PT_SCX, ucp_Kayah_Li },
-  { 1665, PT_SCX, ucp_Katakana },
-  { 1670, PT_SCX, ucp_Kannada },
-  { 1678, PT_SCX, ucp_Katakana },
-  { 1687, PT_SC, ucp_Kawi },
-  { 1692, PT_SCX, ucp_Kayah_Li },
-  { 1700, PT_SC, ucp_Kharoshthi },
-  { 1705, PT_SC, ucp_Kharoshthi },
-  { 1716, PT_SC, ucp_Khitan_Small_Script },
-  { 1734, PT_SC, ucp_Khmer },
-  { 1740, PT_SC, ucp_Khmer },
-  { 1745, PT_SCX, ucp_Khojki },
-  { 1750, PT_SCX, ucp_Khojki },
-  { 1757, PT_SCX, ucp_Khudawadi },
-  { 1767, PT_SC, ucp_Khitan_Small_Script },
-  { 1772, PT_SCX, ucp_Kannada },
-  { 1777, PT_SCX, ucp_Kaithi },
-  { 1782, PT_GC, ucp_L },
-  { 1784, PT_LAMP, 0 },
-  { 1787, PT_SC, ucp_Tai_Tham },
-  { 1792, PT_SC, ucp_Lao },
-  { 1796, PT_SC, ucp_Lao },
-  { 1801, PT_SCX, ucp_Latin },
-  { 1807, PT_SCX, ucp_Latin },
-  { 1812, PT_LAMP, 0 },
-  { 1815, PT_SC, ucp_Lepcha },
-  { 1820, PT_SC, ucp_Lepcha },
-  { 1827, PT_SCX, ucp_Limbu },
-  { 1832, PT_SCX, ucp_Limbu },
-  { 1838, PT_SCX, ucp_Linear_A },
-  { 1843, PT_SCX, ucp_Linear_B },
-  { 1848, PT_SCX, ucp_Linear_A },
-  { 1856, PT_SCX, ucp_Linear_B },
-  { 1864, PT_SC, ucp_Lisu },
-  { 1869, PT_PC, ucp_Ll },
-  { 1872, PT_PC, ucp_Lm },
-  { 1875, PT_PC, ucp_Lo },
-  { 1878, PT_BOOL, ucp_Logical_Order_Exception },
-  { 1882, PT_BOOL, ucp_Logical_Order_Exception },
-  { 1904, PT_BOOL, ucp_Lowercase },
-  { 1910, PT_BOOL, ucp_Lowercase },
-  { 1920, PT_PC, ucp_Lt },
-  { 1923, PT_PC, ucp_Lu },
-  { 1926, PT_SC, ucp_Lycian },
-  { 1931, PT_SC, ucp_Lycian },
-  { 1938, PT_SC, ucp_Lydian },
-  { 1943, PT_SC, ucp_Lydian },
-  { 1950, PT_GC, ucp_M },
-  { 1952, PT_SCX, ucp_Mahajani },
-  { 1961, PT_SCX, ucp_Mahajani },
-  { 1966, PT_SC, ucp_Makasar },
-  { 1971, PT_SC, ucp_Makasar },
-  { 1979, PT_SCX, ucp_Malayalam },
-  { 1989, PT_SCX, ucp_Mandaic },
-  { 1994, PT_SCX, ucp_Mandaic },
-  { 2002, PT_SCX, ucp_Manichaean },
-  { 2007, PT_SCX, ucp_Manichaean },
-  { 2018, PT_SC, ucp_Marchen },
-  { 2023, PT_SC, ucp_Marchen },
-  { 2031, PT_SCX, ucp_Masaram_Gondi },
-  { 2044, PT_BOOL, ucp_Math },
-  { 2049, PT_PC, ucp_Mc },
-  { 2052, PT_PC, ucp_Me },
-  { 2055, PT_SC, ucp_Medefaidrin },
-  { 2067, PT_SC, ucp_Medefaidrin },
-  { 2072, PT_SC, ucp_Meetei_Mayek },
-  { 2084, PT_SC, ucp_Mende_Kikakui },
-  { 2089, PT_SC, ucp_Mende_Kikakui },
-  { 2102, PT_SC, ucp_Meroitic_Cursive },
-  { 2107, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 2112, PT_SC, ucp_Meroitic_Cursive },
-  { 2128, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 2148, PT_SC, ucp_Miao },
-  { 2153, PT_SCX, ucp_Malayalam },
-  { 2158, PT_PC, ucp_Mn },
-  { 2161, PT_SCX, ucp_Modi },
-  { 2166, PT_SCX, ucp_Mongolian },
-  { 2171, PT_SCX, ucp_Mongolian },
-  { 2181, PT_SC, ucp_Mro },
-  { 2185, PT_SC, ucp_Mro },
-  { 2190, PT_SC, ucp_Meetei_Mayek },
-  { 2195, PT_SCX, ucp_Multani },
-  { 2200, PT_SCX, ucp_Multani },
-  { 2208, PT_SCX, ucp_Myanmar },
-  { 2216, PT_SCX, ucp_Myanmar },
-  { 2221, PT_GC, ucp_N },
-  { 2223, PT_SC, ucp_Nabataean },
-  { 2233, PT_SC, ucp_Nag_Mundari },
-  { 2238, PT_SC, ucp_Nag_Mundari },
-  { 2249, PT_SCX, ucp_Nandinagari },
-  { 2254, PT_SCX, ucp_Nandinagari },
-  { 2266, PT_SC, ucp_Old_North_Arabian },
-  { 2271, PT_SC, ucp_Nabataean },
-  { 2276, PT_BOOL, ucp_Noncharacter_Code_Point },
-  { 2282, PT_PC, ucp_Nd },
-  { 2285, PT_SC, ucp_Newa },
-  { 2290, PT_SC, ucp_New_Tai_Lue },
-  { 2300, PT_SCX, ucp_Nko },
-  { 2304, PT_SCX, ucp_Nko },
-  { 2309, PT_PC, ucp_Nl },
-  { 2312, PT_PC, ucp_No },
-  { 2315, PT_BOOL, ucp_Noncharacter_Code_Point },
-  { 2337, PT_SC, ucp_Nushu },
-  { 2342, PT_SC, ucp_Nushu },
-  { 2348, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
-  { 2369, PT_SC, ucp_Ogham },
-  { 2374, PT_SC, ucp_Ogham },
-  { 2380, PT_SC, ucp_Ol_Chiki },
-  { 2388, PT_SC, ucp_Ol_Chiki },
-  { 2393, PT_SC, ucp_Old_Hungarian },
-  { 2406, PT_SC, ucp_Old_Italic },
-  { 2416, PT_SC, ucp_Old_North_Arabian },
-  { 2432, PT_SCX, ucp_Old_Permic },
-  { 2442, PT_SC, ucp_Old_Persian },
-  { 2453, PT_SC, ucp_Old_Sogdian },
-  { 2464, PT_SC, ucp_Old_South_Arabian },
-  { 2480, PT_SC, ucp_Old_Turkic },
-  { 2490, PT_SCX, ucp_Old_Uyghur },
-  { 2500, PT_SCX, ucp_Oriya },
-  { 2506, PT_SC, ucp_Old_Turkic },
-  { 2511, PT_SCX, ucp_Oriya },
-  { 2516, PT_SC, ucp_Osage },
-  { 2522, PT_SC, ucp_Osage },
-  { 2527, PT_SC, ucp_Osmanya },
-  { 2532, PT_SC, ucp_Osmanya },
-  { 2540, PT_SCX, ucp_Old_Uyghur },
-  { 2545, PT_GC, ucp_P },
-  { 2547, PT_SC, ucp_Pahawh_Hmong },
-  { 2559, PT_SC, ucp_Palmyrene },
-  { 2564, PT_SC, ucp_Palmyrene },
-  { 2574, PT_BOOL, ucp_Pattern_Syntax },
-  { 2581, PT_BOOL, ucp_Pattern_Syntax },
-  { 2595, PT_BOOL, ucp_Pattern_White_Space },
-  { 2613, PT_BOOL, ucp_Pattern_White_Space },
-  { 2619, PT_SC, ucp_Pau_Cin_Hau },
-  { 2624, PT_SC, ucp_Pau_Cin_Hau },
-  { 2634, PT_PC, ucp_Pc },
-  { 2637, PT_BOOL, ucp_Prepended_Concatenation_Mark },
-  { 2641, PT_PC, ucp_Pd },
-  { 2644, PT_PC, ucp_Pe },
-  { 2647, PT_SCX, ucp_Old_Permic },
-  { 2652, PT_PC, ucp_Pf },
-  { 2655, PT_SCX, ucp_Phags_Pa },
-  { 2660, PT_SCX, ucp_Phags_Pa },
-  { 2668, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 2673, PT_SCX, ucp_Psalter_Pahlavi },
-  { 2678, PT_SC, ucp_Phoenician },
-  { 2683, PT_SC, ucp_Phoenician },
-  { 2694, PT_PC, ucp_Pi },
-  { 2697, PT_SC, ucp_Miao },
-  { 2702, PT_PC, ucp_Po },
-  { 2705, PT_BOOL, ucp_Prepended_Concatenation_Mark },
-  { 2732, PT_SC, ucp_Inscriptional_Parthian },
-  { 2737, PT_PC, ucp_Ps },
-  { 2740, PT_SCX, ucp_Psalter_Pahlavi },
-  { 2755, PT_SCX, ucp_Coptic },
-  { 2760, PT_SC, ucp_Inherited },
-  { 2765, PT_BOOL, ucp_Quotation_Mark },
-  { 2771, PT_BOOL, ucp_Quotation_Mark },
-  { 2785, PT_BOOL, ucp_Radical },
-  { 2793, PT_BOOL, ucp_Regional_Indicator },
-  { 2811, PT_SC, ucp_Rejang },
-  { 2818, PT_BOOL, ucp_Regional_Indicator },
-  { 2821, PT_SC, ucp_Rejang },
-  { 2826, PT_SCX, ucp_Hanifi_Rohingya },
-  { 2831, PT_SC, ucp_Runic },
-  { 2837, PT_SC, ucp_Runic },
-  { 2842, PT_GC, ucp_S },
-  { 2844, PT_SC, ucp_Samaritan },
-  { 2854, PT_SC, ucp_Samaritan },
-  { 2859, PT_SC, ucp_Old_South_Arabian },
-  { 2864, PT_SC, ucp_Saurashtra },
-  { 2869, PT_SC, ucp_Saurashtra },
-  { 2880, PT_PC, ucp_Sc },
-  { 2883, PT_BOOL, ucp_Soft_Dotted },
-  { 2886, PT_BOOL, ucp_Sentence_Terminal },
-  { 2903, PT_SC, ucp_SignWriting },
-  { 2908, PT_SCX, ucp_Sharada },
-  { 2916, PT_SC, ucp_Shavian },
-  { 2924, PT_SC, ucp_Shavian },
-  { 2929, PT_SCX, ucp_Sharada },
-  { 2934, PT_SC, ucp_Siddham },
-  { 2939, PT_SC, ucp_Siddham },
-  { 2947, PT_SC, ucp_SignWriting },
-  { 2959, PT_SCX, ucp_Khudawadi },
-  { 2964, PT_SCX, ucp_Sinhala },
-  { 2969, PT_SCX, ucp_Sinhala },
-  { 2977, PT_PC, ucp_Sk },
-  { 2980, PT_PC, ucp_Sm },
-  { 2983, PT_PC, ucp_So },
-  { 2986, PT_BOOL, ucp_Soft_Dotted },
-  { 2997, PT_SCX, ucp_Sogdian },
-  { 3002, PT_SCX, ucp_Sogdian },
-  { 3010, PT_SC, ucp_Old_Sogdian },
-  { 3015, PT_SC, ucp_Sora_Sompeng },
-  { 3020, PT_SC, ucp_Sora_Sompeng },
-  { 3032, PT_SC, ucp_Soyombo },
-  { 3037, PT_SC, ucp_Soyombo },
-  { 3045, PT_BOOL, ucp_White_Space },
-  { 3051, PT_BOOL, ucp_Sentence_Terminal },
-  { 3057, PT_SC, ucp_Sundanese },
-  { 3062, PT_SC, ucp_Sundanese },
-  { 3072, PT_SCX, ucp_Syloti_Nagri },
-  { 3077, PT_SCX, ucp_Syloti_Nagri },
-  { 3089, PT_SCX, ucp_Syriac },
-  { 3094, PT_SCX, ucp_Syriac },
-  { 3101, PT_SCX, ucp_Tagalog },
-  { 3109, PT_SCX, ucp_Tagbanwa },
-  { 3114, PT_SCX, ucp_Tagbanwa },
-  { 3123, PT_SCX, ucp_Tai_Le },
-  { 3129, PT_SC, ucp_Tai_Tham },
-  { 3137, PT_SC, ucp_Tai_Viet },
-  { 3145, PT_SCX, ucp_Takri },
-  { 3150, PT_SCX, ucp_Takri },
-  { 3156, PT_SCX, ucp_Tai_Le },
-  { 3161, PT_SC, ucp_New_Tai_Lue },
-  { 3166, PT_SCX, ucp_Tamil },
-  { 3172, PT_SCX, ucp_Tamil },
-  { 3177, PT_SC, ucp_Tangut },
-  { 3182, PT_SC, ucp_Tangsa },
-  { 3189, PT_SC, ucp_Tangut },
-  { 3196, PT_SC, ucp_Tai_Viet },
-  { 3201, PT_SCX, ucp_Telugu },
-  { 3206, PT_SCX, ucp_Telugu },
-  { 3213, PT_BOOL, ucp_Terminal_Punctuation },
-  { 3218, PT_BOOL, ucp_Terminal_Punctuation },
-  { 3238, PT_SC, ucp_Tifinagh },
-  { 3243, PT_SCX, ucp_Tagalog },
-  { 3248, PT_SCX, ucp_Thaana },
-  { 3253, PT_SCX, ucp_Thaana },
-  { 3260, PT_SC, ucp_Thai },
-  { 3265, PT_SC, ucp_Tibetan },
-  { 3273, PT_SC, ucp_Tibetan },
-  { 3278, PT_SC, ucp_Tifinagh },
-  { 3287, PT_SCX, ucp_Tirhuta },
-  { 3292, PT_SCX, ucp_Tirhuta },
-  { 3300, PT_SC, ucp_Tangsa },
-  { 3305, PT_SC, ucp_Toto },
-  { 3310, PT_SC, ucp_Ugaritic },
-  { 3315, PT_SC, ucp_Ugaritic },
-  { 3324, PT_BOOL, ucp_Unified_Ideograph },
-  { 3330, PT_BOOL, ucp_Unified_Ideograph },
-  { 3347, PT_SC, ucp_Unknown },
-  { 3355, PT_BOOL, ucp_Uppercase },
-  { 3361, PT_BOOL, ucp_Uppercase },
-  { 3371, PT_SC, ucp_Vai },
-  { 3375, PT_SC, ucp_Vai },
-  { 3380, PT_BOOL, ucp_Variation_Selector },
-  { 3398, PT_SC, ucp_Vithkuqi },
-  { 3403, PT_SC, ucp_Vithkuqi },
-  { 3412, PT_BOOL, ucp_Variation_Selector },
-  { 3415, PT_SC, ucp_Wancho },
-  { 3422, PT_SC, ucp_Warang_Citi },
-  { 3427, PT_SC, ucp_Warang_Citi },
-  { 3438, PT_SC, ucp_Wancho },
-  { 3443, PT_BOOL, ucp_White_Space },
-  { 3454, PT_BOOL, ucp_White_Space },
-  { 3461, PT_ALNUM, 0 },
-  { 3465, PT_BOOL, ucp_XID_Continue },
-  { 3470, PT_BOOL, ucp_XID_Continue },
-  { 3482, PT_BOOL, ucp_XID_Start },
-  { 3487, PT_BOOL, ucp_XID_Start },
-  { 3496, PT_SC, ucp_Old_Persian },
-  { 3501, PT_PXSPACE, 0 },
-  { 3505, PT_SPACE, 0 },
-  { 3509, PT_SC, ucp_Cuneiform },
-  { 3514, PT_UCNC, 0 },
-  { 3518, PT_WORD, 0 },
-  { 3522, PT_SCX, ucp_Yezidi },
-  { 3527, PT_SCX, ucp_Yezidi },
-  { 3534, PT_SCX, ucp_Yi },
-  { 3537, PT_SCX, ucp_Yi },
-  { 3542, PT_GC, ucp_Z },
-  { 3544, PT_SC, ucp_Zanabazar_Square },
-  { 3560, PT_SC, ucp_Zanabazar_Square },
-  { 3565, PT_SC, ucp_Inherited },
-  { 3570, PT_PC, ucp_Zl },
-  { 3573, PT_PC, ucp_Zp },
-  { 3576, PT_PC, ucp_Zs },
-  { 3579, PT_SC, ucp_Common },
-  { 3584, PT_SC, ucp_Unknown }
+  { 1158, PT_SCX, ucp_Garay },
+  { 1163, PT_SCX, ucp_Garay },
+  { 1169, PT_SCX, ucp_Georgian },
+  { 1174, PT_SCX, ucp_Georgian },
+  { 1183, PT_SCX, ucp_Glagolitic },
+  { 1188, PT_SCX, ucp_Glagolitic },
+  { 1199, PT_SCX, ucp_Gunjala_Gondi },
+  { 1204, PT_SCX, ucp_Masaram_Gondi },
+  { 1209, PT_SCX, ucp_Gothic },
+  { 1214, PT_SCX, ucp_Gothic },
+  { 1221, PT_SCX, ucp_Grantha },
+  { 1226, PT_SCX, ucp_Grantha },
+  { 1234, PT_BOOL, ucp_Grapheme_Base },
+  { 1247, PT_BOOL, ucp_Grapheme_Extend },
+  { 1262, PT_BOOL, ucp_Grapheme_Link },
+  { 1275, PT_BOOL, ucp_Grapheme_Base },
+  { 1282, PT_SCX, ucp_Greek },
+  { 1288, PT_SCX, ucp_Greek },
+  { 1293, PT_BOOL, ucp_Grapheme_Extend },
+  { 1299, PT_BOOL, ucp_Grapheme_Link },
+  { 1306, PT_SCX, ucp_Gujarati },
+  { 1315, PT_SCX, ucp_Gujarati },
+  { 1320, PT_SCX, ucp_Gurung_Khema },
+  { 1325, PT_SCX, ucp_Gunjala_Gondi },
+  { 1338, PT_SCX, ucp_Gurmukhi },
+  { 1347, PT_SCX, ucp_Gurmukhi },
+  { 1352, PT_SCX, ucp_Gurung_Khema },
+  { 1364, PT_SCX, ucp_Han },
+  { 1368, PT_SCX, ucp_Hangul },
+  { 1373, PT_SCX, ucp_Hangul },
+  { 1380, PT_SCX, ucp_Han },
+  { 1385, PT_SCX, ucp_Hanifi_Rohingya },
+  { 1400, PT_SCX, ucp_Hanunoo },
+  { 1405, PT_SCX, ucp_Hanunoo },
+  { 1413, PT_SC, ucp_Hatran },
+  { 1418, PT_SC, ucp_Hatran },
+  { 1425, PT_SCX, ucp_Hebrew },
+  { 1430, PT_SCX, ucp_Hebrew },
+  { 1437, PT_BOOL, ucp_Hex_Digit },
+  { 1441, PT_BOOL, ucp_Hex_Digit },
+  { 1450, PT_SCX, ucp_Hiragana },
+  { 1455, PT_SCX, ucp_Hiragana },
+  { 1464, PT_SC, ucp_Anatolian_Hieroglyphs },
+  { 1469, PT_SC, ucp_Pahawh_Hmong },
+  { 1474, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 1479, PT_SCX, ucp_Old_Hungarian },
+  { 1484, PT_BOOL, ucp_ID_Continue },
+  { 1488, PT_BOOL, ucp_ID_Compat_Math_Continue },
+  { 1509, PT_BOOL, ucp_ID_Compat_Math_Start },
+  { 1527, PT_BOOL, ucp_ID_Continue },
+  { 1538, PT_BOOL, ucp_Ideographic },
+  { 1543, PT_BOOL, ucp_Ideographic },
+  { 1555, PT_BOOL, ucp_ID_Start },
+  { 1559, PT_BOOL, ucp_IDS_Binary_Operator },
+  { 1564, PT_BOOL, ucp_IDS_Binary_Operator },
+  { 1582, PT_BOOL, ucp_IDS_Trinary_Operator },
+  { 1587, PT_BOOL, ucp_ID_Start },
+  { 1595, PT_BOOL, ucp_IDS_Trinary_Operator },
+  { 1614, PT_BOOL, ucp_IDS_Unary_Operator },
+  { 1619, PT_BOOL, ucp_IDS_Unary_Operator },
+  { 1636, PT_SC, ucp_Imperial_Aramaic },
+  { 1652, PT_BOOL, ucp_InCB },
+  { 1657, PT_SC, ucp_Inherited },
+  { 1667, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 1688, PT_SC, ucp_Inscriptional_Parthian },
+  { 1710, PT_SC, ucp_Old_Italic },
+  { 1715, PT_SCX, ucp_Javanese },
+  { 1720, PT_SCX, ucp_Javanese },
+  { 1729, PT_BOOL, ucp_Join_Control },
+  { 1735, PT_BOOL, ucp_Join_Control },
+  { 1747, PT_SCX, ucp_Kaithi },
+  { 1754, PT_SCX, ucp_Kayah_Li },
+  { 1759, PT_SCX, ucp_Katakana },
+  { 1764, PT_SCX, ucp_Kannada },
+  { 1772, PT_SCX, ucp_Katakana },
+  { 1781, PT_SC, ucp_Kawi },
+  { 1786, PT_SCX, ucp_Kayah_Li },
+  { 1794, PT_SC, ucp_Kharoshthi },
+  { 1799, PT_SC, ucp_Kharoshthi },
+  { 1810, PT_SC, ucp_Khitan_Small_Script },
+  { 1828, PT_SC, ucp_Khmer },
+  { 1834, PT_SC, ucp_Khmer },
+  { 1839, PT_SCX, ucp_Khojki },
+  { 1844, PT_SCX, ucp_Khojki },
+  { 1851, PT_SCX, ucp_Khudawadi },
+  { 1861, PT_SC, ucp_Kirat_Rai },
+  { 1870, PT_SC, ucp_Khitan_Small_Script },
+  { 1875, PT_SCX, ucp_Kannada },
+  { 1880, PT_SC, ucp_Kirat_Rai },
+  { 1885, PT_SCX, ucp_Kaithi },
+  { 1890, PT_GC, ucp_L },
+  { 1892, PT_LAMP, 0 },
+  { 1895, PT_SC, ucp_Tai_Tham },
+  { 1900, PT_SC, ucp_Lao },
+  { 1904, PT_SC, ucp_Lao },
+  { 1909, PT_SCX, ucp_Latin },
+  { 1915, PT_SCX, ucp_Latin },
+  { 1920, PT_LAMP, 0 },
+  { 1923, PT_SC, ucp_Lepcha },
+  { 1928, PT_SC, ucp_Lepcha },
+  { 1935, PT_SCX, ucp_Limbu },
+  { 1940, PT_SCX, ucp_Limbu },
+  { 1946, PT_SCX, ucp_Linear_A },
+  { 1951, PT_SCX, ucp_Linear_B },
+  { 1956, PT_SCX, ucp_Linear_A },
+  { 1964, PT_SCX, ucp_Linear_B },
+  { 1972, PT_SCX, ucp_Lisu },
+  { 1977, PT_PC, ucp_Ll },
+  { 1980, PT_PC, ucp_Lm },
+  { 1983, PT_PC, ucp_Lo },
+  { 1986, PT_BOOL, ucp_Logical_Order_Exception },
+  { 1990, PT_BOOL, ucp_Logical_Order_Exception },
+  { 2012, PT_BOOL, ucp_Lowercase },
+  { 2018, PT_BOOL, ucp_Lowercase },
+  { 2028, PT_PC, ucp_Lt },
+  { 2031, PT_PC, ucp_Lu },
+  { 2034, PT_SCX, ucp_Lycian },
+  { 2039, PT_SCX, ucp_Lycian },
+  { 2046, PT_SCX, ucp_Lydian },
+  { 2051, PT_SCX, ucp_Lydian },
+  { 2058, PT_GC, ucp_M },
+  { 2060, PT_SCX, ucp_Mahajani },
+  { 2069, PT_SCX, ucp_Mahajani },
+  { 2074, PT_SC, ucp_Makasar },
+  { 2079, PT_SC, ucp_Makasar },
+  { 2087, PT_SCX, ucp_Malayalam },
+  { 2097, PT_SCX, ucp_Mandaic },
+  { 2102, PT_SCX, ucp_Mandaic },
+  { 2110, PT_SCX, ucp_Manichaean },
+  { 2115, PT_SCX, ucp_Manichaean },
+  { 2126, PT_SC, ucp_Marchen },
+  { 2131, PT_SC, ucp_Marchen },
+  { 2139, PT_SCX, ucp_Masaram_Gondi },
+  { 2152, PT_BOOL, ucp_Math },
+  { 2157, PT_PC, ucp_Mc },
+  { 2160, PT_BOOL, ucp_Modifier_Combining_Mark },
+  { 2164, PT_PC, ucp_Me },
+  { 2167, PT_SC, ucp_Medefaidrin },
+  { 2179, PT_SC, ucp_Medefaidrin },
+  { 2184, PT_SC, ucp_Meetei_Mayek },
+  { 2196, PT_SC, ucp_Mende_Kikakui },
+  { 2201, PT_SC, ucp_Mende_Kikakui },
+  { 2214, PT_SC, ucp_Meroitic_Cursive },
+  { 2219, PT_SCX, ucp_Meroitic_Hieroglyphs },
+  { 2224, PT_SC, ucp_Meroitic_Cursive },
+  { 2240, PT_SCX, ucp_Meroitic_Hieroglyphs },
+  { 2260, PT_SC, ucp_Miao },
+  { 2265, PT_SCX, ucp_Malayalam },
+  { 2270, PT_PC, ucp_Mn },
+  { 2273, PT_SCX, ucp_Modi },
+  { 2278, PT_BOOL, ucp_Modifier_Combining_Mark },
+  { 2300, PT_SCX, ucp_Mongolian },
+  { 2305, PT_SCX, ucp_Mongolian },
+  { 2315, PT_SC, ucp_Mro },
+  { 2319, PT_SC, ucp_Mro },
+  { 2324, PT_SC, ucp_Meetei_Mayek },
+  { 2329, PT_SCX, ucp_Multani },
+  { 2334, PT_SCX, ucp_Multani },
+  { 2342, PT_SCX, ucp_Myanmar },
+  { 2350, PT_SCX, ucp_Myanmar },
+  { 2355, PT_GC, ucp_N },
+  { 2357, PT_SC, ucp_Nabataean },
+  { 2367, PT_SC, ucp_Nag_Mundari },
+  { 2372, PT_SC, ucp_Nag_Mundari },
+  { 2383, PT_SCX, ucp_Nandinagari },
+  { 2388, PT_SCX, ucp_Nandinagari },
+  { 2400, PT_SC, ucp_Old_North_Arabian },
+  { 2405, PT_SC, ucp_Nabataean },
+  { 2410, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2416, PT_PC, ucp_Nd },
+  { 2419, PT_SC, ucp_Newa },
+  { 2424, PT_SC, ucp_New_Tai_Lue },
+  { 2434, PT_SCX, ucp_Nko },
+  { 2438, PT_SCX, ucp_Nko },
+  { 2443, PT_PC, ucp_Nl },
+  { 2446, PT_PC, ucp_No },
+  { 2449, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2471, PT_SC, ucp_Nushu },
+  { 2476, PT_SC, ucp_Nushu },
+  { 2482, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 2503, PT_SC, ucp_Ogham },
+  { 2508, PT_SC, ucp_Ogham },
+  { 2514, PT_SC, ucp_Ol_Chiki },
+  { 2522, PT_SC, ucp_Ol_Chiki },
+  { 2527, PT_SCX, ucp_Old_Hungarian },
+  { 2540, PT_SC, ucp_Old_Italic },
+  { 2550, PT_SC, ucp_Old_North_Arabian },
+  { 2566, PT_SCX, ucp_Old_Permic },
+  { 2576, PT_SC, ucp_Old_Persian },
+  { 2587, PT_SC, ucp_Old_Sogdian },
+  { 2598, PT_SC, ucp_Old_South_Arabian },
+  { 2614, PT_SCX, ucp_Old_Turkic },
+  { 2624, PT_SCX, ucp_Old_Uyghur },
+  { 2634, PT_SCX, ucp_Ol_Onal },
+  { 2641, PT_SCX, ucp_Ol_Onal },
+  { 2646, PT_SCX, ucp_Oriya },
+  { 2652, PT_SCX, ucp_Old_Turkic },
+  { 2657, PT_SCX, ucp_Oriya },
+  { 2662, PT_SCX, ucp_Osage },
+  { 2668, PT_SCX, ucp_Osage },
+  { 2673, PT_SC, ucp_Osmanya },
+  { 2678, PT_SC, ucp_Osmanya },
+  { 2686, PT_SCX, ucp_Old_Uyghur },
+  { 2691, PT_GC, ucp_P },
+  { 2693, PT_SC, ucp_Pahawh_Hmong },
+  { 2705, PT_SC, ucp_Palmyrene },
+  { 2710, PT_SC, ucp_Palmyrene },
+  { 2720, PT_BOOL, ucp_Pattern_Syntax },
+  { 2727, PT_BOOL, ucp_Pattern_Syntax },
+  { 2741, PT_BOOL, ucp_Pattern_White_Space },
+  { 2759, PT_BOOL, ucp_Pattern_White_Space },
+  { 2765, PT_SC, ucp_Pau_Cin_Hau },
+  { 2770, PT_SC, ucp_Pau_Cin_Hau },
+  { 2780, PT_PC, ucp_Pc },
+  { 2783, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2787, PT_PC, ucp_Pd },
+  { 2790, PT_PC, ucp_Pe },
+  { 2793, PT_SCX, ucp_Old_Permic },
+  { 2798, PT_PC, ucp_Pf },
+  { 2801, PT_SCX, ucp_Phags_Pa },
+  { 2806, PT_SCX, ucp_Phags_Pa },
+  { 2814, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 2819, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2824, PT_SC, ucp_Phoenician },
+  { 2829, PT_SC, ucp_Phoenician },
+  { 2840, PT_PC, ucp_Pi },
+  { 2843, PT_SC, ucp_Miao },
+  { 2848, PT_PC, ucp_Po },
+  { 2851, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2878, PT_SC, ucp_Inscriptional_Parthian },
+  { 2883, PT_PC, ucp_Ps },
+  { 2886, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2901, PT_SCX, ucp_Coptic },
+  { 2906, PT_SC, ucp_Inherited },
+  { 2911, PT_BOOL, ucp_Quotation_Mark },
+  { 2917, PT_BOOL, ucp_Quotation_Mark },
+  { 2931, PT_BOOL, ucp_Radical },
+  { 2939, PT_BOOL, ucp_Regional_Indicator },
+  { 2957, PT_SC, ucp_Rejang },
+  { 2964, PT_BOOL, ucp_Regional_Indicator },
+  { 2967, PT_SC, ucp_Rejang },
+  { 2972, PT_SCX, ucp_Hanifi_Rohingya },
+  { 2977, PT_SCX, ucp_Runic },
+  { 2983, PT_SCX, ucp_Runic },
+  { 2988, PT_GC, ucp_S },
+  { 2990, PT_SCX, ucp_Samaritan },
+  { 3000, PT_SCX, ucp_Samaritan },
+  { 3005, PT_SC, ucp_Old_South_Arabian },
+  { 3010, PT_SC, ucp_Saurashtra },
+  { 3015, PT_SC, ucp_Saurashtra },
+  { 3026, PT_PC, ucp_Sc },
+  { 3029, PT_BOOL, ucp_Soft_Dotted },
+  { 3032, PT_BOOL, ucp_Sentence_Terminal },
+  { 3049, PT_SC, ucp_SignWriting },
+  { 3054, PT_SCX, ucp_Sharada },
+  { 3062, PT_SCX, ucp_Shavian },
+  { 3070, PT_SCX, ucp_Shavian },
+  { 3075, PT_SCX, ucp_Sharada },
+  { 3080, PT_SC, ucp_Siddham },
+  { 3085, PT_SC, ucp_Siddham },
+  { 3093, PT_SC, ucp_SignWriting },
+  { 3105, PT_SCX, ucp_Khudawadi },
+  { 3110, PT_SCX, ucp_Sinhala },
+  { 3115, PT_SCX, ucp_Sinhala },
+  { 3123, PT_PC, ucp_Sk },
+  { 3126, PT_PC, ucp_Sm },
+  { 3129, PT_PC, ucp_So },
+  { 3132, PT_BOOL, ucp_Soft_Dotted },
+  { 3143, PT_SCX, ucp_Sogdian },
+  { 3148, PT_SCX, ucp_Sogdian },
+  { 3156, PT_SC, ucp_Old_Sogdian },
+  { 3161, PT_SC, ucp_Sora_Sompeng },
+  { 3166, PT_SC, ucp_Sora_Sompeng },
+  { 3178, PT_SC, ucp_Soyombo },
+  { 3183, PT_SC, ucp_Soyombo },
+  { 3191, PT_BOOL, ucp_White_Space },
+  { 3197, PT_BOOL, ucp_Sentence_Terminal },
+  { 3203, PT_SC, ucp_Sundanese },
+  { 3208, PT_SC, ucp_Sundanese },
+  { 3218, PT_SCX, ucp_Sunuwar },
+  { 3223, PT_SCX, ucp_Sunuwar },
+  { 3231, PT_SCX, ucp_Syloti_Nagri },
+  { 3236, PT_SCX, ucp_Syloti_Nagri },
+  { 3248, PT_SCX, ucp_Syriac },
+  { 3253, PT_SCX, ucp_Syriac },
+  { 3260, PT_SCX, ucp_Tagalog },
+  { 3268, PT_SCX, ucp_Tagbanwa },
+  { 3273, PT_SCX, ucp_Tagbanwa },
+  { 3282, PT_SCX, ucp_Tai_Le },
+  { 3288, PT_SC, ucp_Tai_Tham },
+  { 3296, PT_SC, ucp_Tai_Viet },
+  { 3304, PT_SCX, ucp_Takri },
+  { 3309, PT_SCX, ucp_Takri },
+  { 3315, PT_SCX, ucp_Tai_Le },
+  { 3320, PT_SC, ucp_New_Tai_Lue },
+  { 3325, PT_SCX, ucp_Tamil },
+  { 3331, PT_SCX, ucp_Tamil },
+  { 3336, PT_SCX, ucp_Tangut },
+  { 3341, PT_SC, ucp_Tangsa },
+  { 3348, PT_SCX, ucp_Tangut },
+  { 3355, PT_SC, ucp_Tai_Viet },
+  { 3360, PT_SCX, ucp_Telugu },
+  { 3365, PT_SCX, ucp_Telugu },
+  { 3372, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3377, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3397, PT_SCX, ucp_Tifinagh },
+  { 3402, PT_SCX, ucp_Tagalog },
+  { 3407, PT_SCX, ucp_Thaana },
+  { 3412, PT_SCX, ucp_Thaana },
+  { 3419, PT_SCX, ucp_Thai },
+  { 3424, PT_SCX, ucp_Tibetan },
+  { 3432, PT_SCX, ucp_Tibetan },
+  { 3437, PT_SCX, ucp_Tifinagh },
+  { 3446, PT_SCX, ucp_Tirhuta },
+  { 3451, PT_SCX, ucp_Tirhuta },
+  { 3459, PT_SC, ucp_Tangsa },
+  { 3464, PT_SCX, ucp_Todhri },
+  { 3471, PT_SCX, ucp_Todhri },
+  { 3476, PT_SCX, ucp_Toto },
+  { 3481, PT_SCX, ucp_Tulu_Tigalari },
+  { 3494, PT_SCX, ucp_Tulu_Tigalari },
+  { 3499, PT_SC, ucp_Ugaritic },
+  { 3504, PT_SC, ucp_Ugaritic },
+  { 3513, PT_BOOL, ucp_Unified_Ideograph },
+  { 3519, PT_BOOL, ucp_Unified_Ideograph },
+  { 3536, PT_SC, ucp_Unknown },
+  { 3544, PT_BOOL, ucp_Uppercase },
+  { 3550, PT_BOOL, ucp_Uppercase },
+  { 3560, PT_SC, ucp_Vai },
+  { 3564, PT_SC, ucp_Vai },
+  { 3569, PT_BOOL, ucp_Variation_Selector },
+  { 3587, PT_SC, ucp_Vithkuqi },
+  { 3592, PT_SC, ucp_Vithkuqi },
+  { 3601, PT_BOOL, ucp_Variation_Selector },
+  { 3604, PT_SC, ucp_Wancho },
+  { 3611, PT_SC, ucp_Warang_Citi },
+  { 3616, PT_SC, ucp_Warang_Citi },
+  { 3627, PT_SC, ucp_Wancho },
+  { 3632, PT_BOOL, ucp_White_Space },
+  { 3643, PT_BOOL, ucp_White_Space },
+  { 3650, PT_ALNUM, 0 },
+  { 3654, PT_BOOL, ucp_XID_Continue },
+  { 3659, PT_BOOL, ucp_XID_Continue },
+  { 3671, PT_BOOL, ucp_XID_Start },
+  { 3676, PT_BOOL, ucp_XID_Start },
+  { 3685, PT_SC, ucp_Old_Persian },
+  { 3690, PT_PXSPACE, 0 },
+  { 3694, PT_SPACE, 0 },
+  { 3698, PT_SC, ucp_Cuneiform },
+  { 3703, PT_UCNC, 0 },
+  { 3707, PT_WORD, 0 },
+  { 3711, PT_SCX, ucp_Yezidi },
+  { 3716, PT_SCX, ucp_Yezidi },
+  { 3723, PT_SCX, ucp_Yi },
+  { 3726, PT_SCX, ucp_Yi },
+  { 3731, PT_GC, ucp_Z },
+  { 3733, PT_SC, ucp_Zanabazar_Square },
+  { 3749, PT_SC, ucp_Zanabazar_Square },
+  { 3754, PT_SC, ucp_Inherited },
+  { 3759, PT_PC, ucp_Zl },
+  { 3762, PT_PC, ucp_Zp },
+  { 3765, PT_PC, ucp_Zs },
+  { 3768, PT_SC, ucp_Common },
+  { 3773, PT_SC, ucp_Unknown }
 };
 
 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

+ 132 - 0
thirdparty/pcre2/src/pcre2_util.h

@@ -0,0 +1,132 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
+
+/* Assertion macros */
+
+#ifdef PCRE2_DEBUG
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#include <assert.h>
+#endif
+
+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
+that the code below doesn't support. It is a NOP for non debug builds
+but in debug builds will print information about the location of the
+code where it triggered and crash.
+
+It is meant to work like assert(), and therefore the expression used
+should indicate what the expected state is, and shouldn't have any
+side-effects. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_ASSERT(x) assert(x)
+#else
+#define PCRE2_ASSERT(x) do                                            \
+{                                                                     \
+  if (!(x))                                                           \
+  {                                                                   \
+  fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+  abort();                                                            \
+  }                                                                   \
+} while(0)
+#endif
+
+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
+shouldn't be reached. In non debug builds is defined as a hint for
+the compiler to eliminate any code after it, so it is useful also for
+performance reasons, but should be used with care because if it is
+ever reached will trigger Undefined Behaviour and if you are lucky a
+crash. In debug builds it will report the location where it was triggered
+and crash. One important point to consider when using this macro, is
+that it is only implemented for a few compilers, and therefore can't
+be relied on to always be active either, so if it is followed by some
+code it is important to make sure that the whole thing is safe to
+use even if the macro is not there (ex: make sure there is a `break`
+after it if used at the end of a `case`) and to test your code also
+with a configuration where the macro will be a NOP. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_UNREACHABLE()                                         \
+assert(((void)"Execution reached unexpected point", 0))
+#else
+#define PCRE2_UNREACHABLE() do                                      \
+{                                                                   \
+fprintf(stderr, "Execution reached unexpected point at " __FILE__   \
+                ":%d\n", __LINE__);                                 \
+abort();                                                            \
+} while(0)
+#endif
+
+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
+macro. It is meant to be used in places where the code is handling
+an error situation in code that shouldn't be reached, but that has
+some sort of fallback code to normally handle the error. When in
+doubt you should use this instead of the previous macro. Like in
+the previous case, it is a good idea to document as much as possible
+the reason and the actions that should be taken if it ever triggers. */
+
+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
+
+#endif /* PCRE2_DEBUG */
+
+#ifndef PCRE2_DEBUG_UNREACHABLE
+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
+#endif
+
+#ifndef PCRE2_UNREACHABLE
+#ifdef HAVE_BUILTIN_UNREACHABLE
+#define PCRE2_UNREACHABLE() __builtin_unreachable()
+#elif defined(HAVE_BUILTIN_ASSUME)
+#define PCRE2_UNREACHABLE() __assume(0)
+#else
+#define PCRE2_UNREACHABLE() do {} while(0)
+#endif
+#endif /* !PCRE2_UNREACHABLE */
+
+#ifndef PCRE2_ASSERT
+#define PCRE2_ASSERT(x) do {} while(0)
+#endif
+
+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_util.h */

+ 316 - 79
thirdparty/pcre2/src/pcre2_xclass.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2023 University of Cambridge
+          New API code Copyright (c) 2016-2024 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -38,9 +38,9 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
-/* This module contains an internal function that is used to match an extended
-class. It is used by pcre2_auto_possessify() and by both pcre2_match() and
-pcre2_def_match(). */
+/* This module contains two internal functions that are used to match
+OP_XCLASS and OP_ECLASS. It is used by pcre2_auto_possessify() and by both
+pcre2_match() and pcre2_dfa_match(). */
 
 
 #ifdef HAVE_CONFIG_H
@@ -66,114 +66,75 @@ Returns:      TRUE if character matches, else FALSE
 */
 
 BOOL
-PRIV(xclass)(uint32_t c, PCRE2_SPTR data, BOOL utf)
+PRIV(xclass)(uint32_t c, PCRE2_SPTR data, const uint8_t *char_lists_end, BOOL utf)
 {
+/* Update PRIV(update_classbits) when this function is changed. */
 PCRE2_UCHAR t;
-BOOL negated = (*data & XCL_NOT) != 0;
+BOOL not_negated = (*data & XCL_NOT) == 0;
+uint32_t type, max_index, min_index, value;
+const uint8_t *next_char;
 
 #if PCRE2_CODE_UNIT_WIDTH == 8
 /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
 utf = TRUE;
 #endif
 
-/* Code points < 256 are matched against a bitmap, if one is present. If not,
-we still carry on, because there may be ranges that start below 256 in the
-additional data. */
+/* Code points < 256 are matched against a bitmap, if one is present. */
 
-if (c < 256)
+if ((*data++ & XCL_MAP) != 0)
   {
-  if ((*data & XCL_HASPROP) == 0)
-    {
-    if ((*data & XCL_MAP) == 0) return negated;
-    return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0;
-    }
-  if ((*data & XCL_MAP) != 0 &&
-    (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0)
-    return !negated; /* char found */
+  if (c < 256)
+    return (((const uint8_t *)data)[c/8] & (1u << (c&7))) != 0;
+  /* Skip bitmap. */
+  data += 32 / sizeof(PCRE2_UCHAR);
   }
 
-/* First skip the bit map if present. Then match against the list of Unicode
-properties or large chars or ranges that end with a large char. We won't ever
+/* Match against the list of Unicode properties. We won't ever
 encounter XCL_PROP or XCL_NOTPROP when UTF support is not compiled. */
-
-if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(PCRE2_UCHAR);
-
-while ((t = *data++) != XCL_END)
-  {
-  uint32_t x, y;
-  if (t == XCL_SINGLE)
-    {
-#ifdef SUPPORT_UNICODE
-    if (utf)
-      {
-      GETCHARINC(x, data); /* macro generates multiple statements */
-      }
-    else
-#endif
-    x = *data++;
-    if (c == x) return !negated;
-    }
-  else if (t == XCL_RANGE)
-    {
 #ifdef SUPPORT_UNICODE
-    if (utf)
-      {
-      GETCHARINC(x, data); /* macro generates multiple statements */
-      GETCHARINC(y, data); /* macro generates multiple statements */
-      }
-    else
-#endif
-      {
-      x = *data++;
-      y = *data++;
-      }
-    if (c >= x && c <= y) return !negated;
-    }
+if (*data == XCL_PROP || *data == XCL_NOTPROP)
+  {
+  /* The UCD record is the same for all properties. */
+  const ucd_record *prop = GET_UCD(c);
 
-#ifdef SUPPORT_UNICODE
-  else  /* XCL_PROP & XCL_NOTPROP */
+  do
     {
     int chartype;
-    const ucd_record *prop = GET_UCD(c);
-    BOOL isprop = t == XCL_PROP;
+    BOOL isprop = (*data++) == XCL_PROP;
     BOOL ok;
 
     switch(*data)
       {
-      case PT_ANY:
-      if (isprop) return !negated;
-      break;
-
       case PT_LAMP:
       chartype = prop->chartype;
       if ((chartype == ucp_Lu || chartype == ucp_Ll ||
-           chartype == ucp_Lt) == isprop) return !negated;
+           chartype == ucp_Lt) == isprop) return not_negated;
       break;
 
       case PT_GC:
       if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       case PT_PC:
-      if ((data[1] == prop->chartype) == isprop) return !negated;
+      if ((data[1] == prop->chartype) == isprop) return not_negated;
       break;
 
       case PT_SC:
-      if ((data[1] == prop->script) == isprop) return !negated;
+      if ((data[1] == prop->script) == isprop) return not_negated;
       break;
 
       case PT_SCX:
       ok = (data[1] == prop->script ||
             MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
-      if (ok == isprop) return !negated;
+      if (ok == isprop) return not_negated;
       break;
 
       case PT_ALNUM:
       chartype = prop->chartype;
       if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
            PRIV(ucp_gentype)[chartype] == ucp_N) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       /* Perl space used to exclude VT, but from Perl 5.18 it is included,
@@ -186,12 +147,12 @@ while ((t = *data++) != XCL_END)
         {
         HSPACE_CASES:
         VSPACE_CASES:
-        if (isprop) return !negated;
+        if (isprop) return not_negated;
         break;
 
         default:
         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
-          return !negated;
+          return not_negated;
         break;
         }
       break;
@@ -201,7 +162,7 @@ while ((t = *data++) != XCL_END)
       if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
            PRIV(ucp_gentype)[chartype] == ucp_N ||
            chartype == ucp_Mn || chartype == ucp_Pc) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       case PT_UCNC:
@@ -209,24 +170,24 @@ while ((t = *data++) != XCL_END)
         {
         if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
              c == CHAR_GRAVE_ACCENT) == isprop)
-          return !negated;
+          return not_negated;
         }
       else
         {
         if ((c < 0xd800 || c > 0xdfff) == isprop)
-          return !negated;
+          return not_negated;
         }
       break;
 
       case PT_BIDICL:
       if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       case PT_BOOL:
       ok = MAPBIT(PRIV(ucd_boolprop_sets) +
         UCD_BPROPS_PROP(prop), data[1]) != 0;
-      if (ok == isprop) return !negated;
+      if (ok == isprop) return not_negated;
       break;
 
       /* The following three properties can occur only in an XCLASS, as there
@@ -248,7 +209,7 @@ while ((t = *data++) != XCL_END)
               (chartype == ucp_Cf &&
                 c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
          )) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       /* Printable character: same as graphic, with the addition of Zs, i.e.
@@ -262,7 +223,7 @@ while ((t = *data++) != XCL_END)
               (chartype == ucp_Cf &&
                 c != 0x061c && (c < 0x2066 || c > 0x2069))
          )) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       /* Punctuation: all Unicode punctuation, plus ASCII characters that
@@ -273,7 +234,7 @@ while ((t = *data++) != XCL_END)
       chartype = prop->chartype;
       if ((PRIV(ucp_gentype)[chartype] == ucp_P ||
             (c < 128 && PRIV(ucp_gentype)[chartype] == ucp_S)) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       /* Perl has two sets of hex digits */
@@ -285,24 +246,300 @@ while ((t = *data++) != XCL_END)
            (c >= 0xff10 && c <= 0xff19) ||  /* Fullwidth digits */
            (c >= 0xff21 && c <= 0xff26) ||  /* Fullwidth letters */
            (c >= 0xff41 && c <= 0xff46)) == isprop)
-        return !negated;
+        return not_negated;
       break;
 
       /* This should never occur, but compilers may mutter if there is no
       default. */
 
       default:
+      PCRE2_DEBUG_UNREACHABLE();
       return FALSE;
       }
 
     data += 2;
     }
+  while (*data == XCL_PROP || *data == XCL_NOTPROP);
+  }
 #else
   (void)utf;  /* Avoid compiler warning */
 #endif  /* SUPPORT_UNICODE */
+
+/* Match against large chars or ranges that end with a large char. */
+if (*data < XCL_LIST)
+  {
+  while ((t = *data++) != XCL_END)
+    {
+    uint32_t x, y;
+
+#ifdef SUPPORT_UNICODE
+    if (utf)
+      {
+      GETCHARINC(x, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      x = *data++;
+
+    if (t == XCL_SINGLE)
+      {
+      /* Since character ranges follow the properties, and they are
+      sorted, early return is possible for all characters <= x. */
+      if (c <= x) return (c == x) ? not_negated : !not_negated;
+      continue;
+      }
+
+    PCRE2_ASSERT(t == XCL_RANGE);
+#ifdef SUPPORT_UNICODE
+    if (utf)
+      {
+      GETCHARINC(y, data); /* macro generates multiple statements */
+      }
+    else
+#endif
+      y = *data++;
+
+    /* Since character ranges follow the properties, and they are
+    sorted, early return is possible for all characters <= y. */
+    if (c <= y) return (c >= x) ? not_negated : !not_negated;
+    }
+
+  return !not_negated;   /* char did not match */
+  }
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+type = (uint32_t)(data[0] << 8) | data[1];
+data += 2;
+#else
+type = data[0];
+data++;
+#endif  /* CODE_UNIT_WIDTH */
+
+/* Align characters. */
+next_char = char_lists_end - (GET(data, 0) << 1);
+type &= XCL_TYPE_MASK;
+
+/* Alignment check. */
+PCRE2_ASSERT(((uintptr_t)next_char & 0x1) == 0);
+
+if (c >= XCL_CHAR_LIST_HIGH_16_START)
+  {
+  max_index = type & XCL_ITEM_COUNT_MASK;
+  if (max_index == XCL_ITEM_COUNT_MASK)
+    {
+    max_index = *(const uint16_t*)next_char;
+    PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
+    next_char += 2;
+    }
+
+  next_char += max_index << 1;
+  type >>= XCL_TYPE_BIT_LEN;
   }
 
-return negated;   /* char did not match */
+if (c < XCL_CHAR_LIST_LOW_32_START)
+  {
+  max_index = type & XCL_ITEM_COUNT_MASK;
+
+  c = (uint16_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
+
+  if (max_index == XCL_ITEM_COUNT_MASK)
+    {
+    max_index = *(const uint16_t*)next_char;
+    PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
+    next_char += 2;
+    }
+
+  if (max_index == 0 || c < *(const uint16_t*)next_char)
+    return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
+
+  min_index = 0;
+  value = ((const uint16_t*)next_char)[--max_index];
+  if (c >= value)
+    return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
+
+  max_index--;
+
+  /* Binary search of a range. */
+  while (TRUE)
+    {
+    uint32_t mid_index = (min_index + max_index) >> 1;
+    value = ((const uint16_t*)next_char)[mid_index];
+
+    if (c < value)
+      max_index = mid_index - 1;
+    else if (((const uint16_t*)next_char)[mid_index + 1] <= c)
+      min_index = mid_index + 1;
+    else
+      return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
+    }
+  }
+
+/* Skip the 16 bit ranges. */
+max_index = type & XCL_ITEM_COUNT_MASK;
+if (max_index == XCL_ITEM_COUNT_MASK)
+  {
+  max_index = *(const uint16_t*)next_char;
+  PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
+  next_char += 2;
+  }
+
+next_char += (max_index << 1);
+type >>= XCL_TYPE_BIT_LEN;
+
+/* Alignment check. */
+PCRE2_ASSERT(((uintptr_t)next_char & 0x3) == 0);
+
+max_index = type & XCL_ITEM_COUNT_MASK;
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+if (c >= XCL_CHAR_LIST_HIGH_32_START)
+  {
+  if (max_index == XCL_ITEM_COUNT_MASK)
+    {
+    max_index = *(const uint32_t*)next_char;
+    PCRE2_ASSERT(max_index >= XCL_ITEM_COUNT_MASK);
+    next_char += 4;
+    }
+
+  next_char += max_index << 2;
+  type >>= XCL_TYPE_BIT_LEN;
+  max_index = type & XCL_ITEM_COUNT_MASK;
+  }
+#endif
+
+c = (uint32_t)((c << XCL_CHAR_SHIFT) | XCL_CHAR_END);
+
+if (max_index == XCL_ITEM_COUNT_MASK)
+  {
+  max_index = *(const uint32_t*)next_char;
+  next_char += 4;
+  }
+
+if (max_index == 0 || c < *(const uint32_t*)next_char)
+  return ((type & XCL_BEGIN_WITH_RANGE) != 0) == not_negated;
+
+min_index = 0;
+value = ((const uint32_t*)next_char)[--max_index];
+if (c >= value)
+  return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
+
+max_index--;
+
+/* Binary search of a range. */
+while (TRUE)
+  {
+  uint32_t mid_index = (min_index + max_index) >> 1;
+  value = ((const uint32_t*)next_char)[mid_index];
+
+  if (c < value)
+    max_index = mid_index - 1;
+  else if (((const uint32_t*)next_char)[mid_index + 1] <= c)
+    min_index = mid_index + 1;
+  else
+    return (value == c || (value & XCL_CHAR_END) == 0) == not_negated;
+  }
+}
+
+
+
+/*************************************************
+*       Match character against an ECLASS        *
+*************************************************/
+
+/* This function is called to match a character against an extended class
+used for describing characters using boolean operations on sets.
+
+Arguments:
+  c           the character
+  data_start  points to the start of the ECLASS data
+  data_end    points one-past-the-last of the ECLASS data
+  utf         TRUE if in UTF mode
+
+Returns:      TRUE if character matches, else FALSE
+*/
+
+BOOL
+PRIV(eclass)(uint32_t c, PCRE2_SPTR data_start, PCRE2_SPTR data_end,
+  const uint8_t *char_lists_end, BOOL utf)
+{
+PCRE2_SPTR ptr = data_start;
+PCRE2_UCHAR flags;
+uint32_t stack = 0;
+int stack_depth = 0;
+
+PCRE2_ASSERT(data_start < data_end);
+flags = *ptr++;
+PCRE2_ASSERT((flags & ECL_MAP) == 0 ||
+             (data_end - ptr) >= 32 / (int)sizeof(PCRE2_UCHAR));
+
+/* Code points < 256 are matched against a bitmap, if one is present.
+Otherwise all codepoints are checked later. */
+
+if ((flags & ECL_MAP) != 0)
+  {
+  if (c < 256)
+    return (((const uint8_t *)ptr)[c/8] & (1u << (c&7))) != 0;
+
+  /* Skip the bitmap. */
+  ptr += 32 / sizeof(PCRE2_UCHAR);
+  }
+
+/* Do a little loop, until we reach the end of the ECLASS. */
+while (ptr < data_end)
+  {
+  switch (*ptr)
+    {
+    case ECL_AND:
+    ++ptr;
+    stack = (stack >> 1) & (stack | ~(uint32_t)1u);
+    PCRE2_ASSERT(stack_depth >= 2);
+    --stack_depth;
+    break;
+
+    case ECL_OR:
+    ++ptr;
+    stack = (stack >> 1) | (stack & (uint32_t)1u);
+    PCRE2_ASSERT(stack_depth >= 2);
+    --stack_depth;
+    break;
+
+    case ECL_XOR:
+    ++ptr;
+    stack = (stack >> 1) ^ (stack & (uint32_t)1u);
+    PCRE2_ASSERT(stack_depth >= 2);
+    --stack_depth;
+    break;
+
+    case ECL_NOT:
+    ++ptr;
+    stack ^= (uint32_t)1u;
+    PCRE2_ASSERT(stack_depth >= 1);
+    break;
+
+    case ECL_XCLASS:
+      {
+      uint32_t matched = PRIV(xclass)(c, ptr + 1 + LINK_SIZE, char_lists_end, utf);
+
+      ptr += GET(ptr, 1);
+      stack = (stack << 1) | matched;
+      ++stack_depth;
+      break;
+      }
+
+    /* This should never occur, but compilers may mutter if there is no
+    default. */
+
+    default:
+    PCRE2_DEBUG_UNREACHABLE();
+    return FALSE;
+    }
+  }
+
+PCRE2_ASSERT(stack_depth == 1);
+(void)stack_depth;  /* Ignore unused variable, if assertions are disabled. */
+
+/* The final bit left on the stack now holds the match result. */
+return (stack & 1u) != 0;
 }
 
 /* End of pcre2_xclass.c */

Some files were not shown because too many files changed in this diff