Browse Source

Updated to PCRE 10.43 (#28)

Options are now configured per search. Default options are used if none provided.
Added SetDefaultOptions method.
Options can be ignored in preference for pattern provided options.
Brucey 1 year ago
parent
commit
a6c442ed5d
100 changed files with 7227 additions and 4539 deletions
  1. 24 22
      regex.mod/common.bmx
  2. 4 1
      regex.mod/pcre/132html
  3. 3 3
      regex.mod/pcre/AUTHORS
  4. 180 83
      regex.mod/pcre/CMakeLists.txt
  5. 442 7
      regex.mod/pcre/ChangeLog
  6. 70 47
      regex.mod/pcre/CheckMan
  7. 3 3
      regex.mod/pcre/LICENCE
  8. 96 20
      regex.mod/pcre/Makefile.am
  9. 417 122
      regex.mod/pcre/Makefile.in
  10. 103 0
      regex.mod/pcre/NEWS
  11. 41 30
      regex.mod/pcre/NON-AUTOTOOLS-BUILD
  12. 22 1
      regex.mod/pcre/PrepareRelease
  13. 77 35
      regex.mod/pcre/README
  14. 282 36
      regex.mod/pcre/RunGrepTest
  15. 699 699
      regex.mod/pcre/RunGrepTest.bat
  16. 57 10
      regex.mod/pcre/RunTest
  17. 6 3
      regex.mod/pcre/RunTest.bat
  18. 19 19
      regex.mod/pcre/aclocal.m4
  19. 7 8
      regex.mod/pcre/ar-lib
  20. 4 3
      regex.mod/pcre/cmake/pcre2-config.cmake.in
  21. 3 1
      regex.mod/pcre/compile
  22. 200 159
      regex.mod/pcre/config.guess
  23. 95 20
      regex.mod/pcre/configure.ac
  24. 41 30
      regex.mod/pcre/depcomp
  25. 77 35
      regex.mod/pcre/doc/html/README.txt
  26. 3 0
      regex.mod/pcre/doc/html/index.html
  27. 21 9
      regex.mod/pcre/doc/html/pcre2_compile.html
  28. 1 1
      regex.mod/pcre/doc/html/pcre2_general_context_create.html
  29. 40 0
      regex.mod/pcre/doc/html/pcre2_get_match_data_heapframes_size.html
  30. 12 2
      regex.mod/pcre/doc/html/pcre2_jit_match.html
  31. 4 3
      regex.mod/pcre/doc/html/pcre2_jit_stack_create.html
  32. 2 0
      regex.mod/pcre/doc/html/pcre2_match.html
  33. 1 1
      regex.mod/pcre/doc/html/pcre2_match_data_create_from_pattern.html
  34. 5 3
      regex.mod/pcre/doc/html/pcre2_match_data_free.html
  35. 1 1
      regex.mod/pcre/doc/html/pcre2_serialize_decode.html
  36. 8 2
      regex.mod/pcre/doc/html/pcre2_set_compile_extra_options.html
  37. 42 0
      regex.mod/pcre/doc/html/pcre2_set_max_varlookbehind.html
  38. 1 1
      regex.mod/pcre/doc/html/pcre2_set_recursion_memory_management.html
  39. 17 17
      regex.mod/pcre/doc/html/pcre2_substitute.html
  40. 1 1
      regex.mod/pcre/doc/html/pcre2_substring_list_free.html
  41. 301 148
      regex.mod/pcre/doc/html/pcre2api.html
  42. 71 44
      regex.mod/pcre/doc/html/pcre2build.html
  43. 9 9
      regex.mod/pcre/doc/html/pcre2callout.html
  44. 84 65
      regex.mod/pcre/doc/html/pcre2compat.html
  45. 1 1
      regex.mod/pcre/doc/html/pcre2convert.html
  46. 3 2
      regex.mod/pcre/doc/html/pcre2demo.html
  47. 191 128
      regex.mod/pcre/doc/html/pcre2grep.html
  48. 67 46
      regex.mod/pcre/doc/html/pcre2jit.html
  49. 14 4
      regex.mod/pcre/doc/html/pcre2limits.html
  50. 3 3
      regex.mod/pcre/doc/html/pcre2matching.html
  51. 1 1
      regex.mod/pcre/doc/html/pcre2partial.html
  52. 327 342
      regex.mod/pcre/doc/html/pcre2pattern.html
  53. 28 9
      regex.mod/pcre/doc/html/pcre2perform.html
  54. 34 11
      regex.mod/pcre/doc/html/pcre2posix.html
  55. 1 1
      regex.mod/pcre/doc/html/pcre2sample.html
  56. 7 8
      regex.mod/pcre/doc/html/pcre2serialize.html
  57. 158 229
      regex.mod/pcre/doc/html/pcre2syntax.html
  58. 135 64
      regex.mod/pcre/doc/html/pcre2test.html
  59. 45 18
      regex.mod/pcre/doc/html/pcre2unicode.html
  60. 3 0
      regex.mod/pcre/doc/index.html.src
  61. 13 9
      regex.mod/pcre/doc/pcre2-config.txt
  62. 294 267
      regex.mod/pcre/doc/pcre2.txt
  63. 21 10
      regex.mod/pcre/doc/pcre2_compile.3
  64. 1 1
      regex.mod/pcre/doc/pcre2_general_context_create.3
  65. 28 0
      regex.mod/pcre/doc/pcre2_get_match_data_heapframes_size.3
  66. 11 3
      regex.mod/pcre/doc/pcre2_jit_match.3
  67. 4 3
      regex.mod/pcre/doc/pcre2_jit_stack_create.3
  68. 3 1
      regex.mod/pcre/doc/pcre2_match.3
  69. 1 1
      regex.mod/pcre/doc/pcre2_match_data_create_from_pattern.3
  70. 6 4
      regex.mod/pcre/doc/pcre2_match_data_free.3
  71. 1 1
      regex.mod/pcre/doc/pcre2_serialize_decode.3
  72. 15 3
      regex.mod/pcre/doc/pcre2_set_compile_extra_options.3
  73. 30 0
      regex.mod/pcre/doc/pcre2_set_max_varlookbehind.3
  74. 1 1
      regex.mod/pcre/doc/pcre2_set_recursion_memory_management.3
  75. 29 19
      regex.mod/pcre/doc/pcre2_substitute.3
  76. 2 2
      regex.mod/pcre/doc/pcre2_substring_list_free.3
  77. 267 116
      regex.mod/pcre/doc/pcre2api.3
  78. 41 15
      regex.mod/pcre/doc/pcre2build.3
  79. 10 10
      regex.mod/pcre/doc/pcre2callout.3
  80. 80 63
      regex.mod/pcre/doc/pcre2compat.3
  81. 1 1
      regex.mod/pcre/doc/pcre2convert.3
  82. 11 0
      regex.mod/pcre/doc/pcre2demo.3
  83. 181 128
      regex.mod/pcre/doc/pcre2grep.1
  84. 362 305
      regex.mod/pcre/doc/pcre2grep.txt
  85. 76 45
      regex.mod/pcre/doc/pcre2jit.3
  86. 14 5
      regex.mod/pcre/doc/pcre2limits.3
  87. 4 4
      regex.mod/pcre/doc/pcre2matching.3
  88. 1 1
      regex.mod/pcre/doc/pcre2partial.3
  89. 329 345
      regex.mod/pcre/doc/pcre2pattern.3
  90. 26 10
      regex.mod/pcre/doc/pcre2perform.3
  91. 31 12
      regex.mod/pcre/doc/pcre2posix.3
  92. 1 1
      regex.mod/pcre/doc/pcre2sample.3
  93. 7 8
      regex.mod/pcre/doc/pcre2serialize.3
  94. 105 176
      regex.mod/pcre/doc/pcre2syntax.3
  95. 132 65
      regex.mod/pcre/doc/pcre2test.1
  96. 268 224
      regex.mod/pcre/doc/pcre2test.txt
  97. 42 19
      regex.mod/pcre/doc/pcre2unicode.3
  98. 1 1
      regex.mod/pcre/install-sh
  99. 96 28
      regex.mod/pcre/ltmain.sh
  100. 67 61
      regex.mod/pcre/m4/libtool.m4

+ 24 - 22
regex.mod/common.bmx

@@ -1,4 +1,4 @@
-' Copyright (c) 2007-2021 Bruce A Henderson
+' Copyright (c) 2007-2024 Bruce A Henderson
 ' All rights reserved.
 '
 ' Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@ Import "src/*.h"
 
 Import "pcre/src/pcre2_auto_possess.c"
 Import "pcre/src/pcre2_chartables.c"
+Import "pcre/src/pcre2_chkdint.c"
 Import "pcre/src/pcre2_compile.c"
 Import "pcre/src/pcre2_config.c"
 Import "pcre/src/pcre2_context.c"
@@ -71,6 +72,7 @@ Const PCRE2_CONFIG_VERSION:Int =                11
 Const PCRE2_CONFIG_HEAPLIMIT:Int =              12
 Const PCRE2_CONFIG_NEVER_BACKSLASH_C:Int =      13
 Const PCRE2_CONFIG_COMPILED_WIDTHS:Int =        14
+Const PCRE2_CONFIG_TABLES_LENGTH:Int =          15
 
 ' Exec-time and get/set-time error codes
 ' Error codes: no match and partial match are "expected" errors.
@@ -151,6 +153,9 @@ Const PCRE2_ERROR_TOOMANYREPLACE:Int =    -61
 Const PCRE2_ERROR_BADSERIALIZEDDATA:Int = -62
 Const PCRE2_ERROR_HEAPLIMIT:Int =         -63
 Const PCRE2_ERROR_CONVERT_SYNTAX:Int =    -64
+Const PCRE2_ERROR_INTERNAL_DUPMATCH:Int = -65
+Const PCRE2_ERROR_DFA_UINVALID_UTF:Int =  -66
+Const PCRE2_ERROR_INVALIDOFFSET:Int =     -67
 
 ' Error codes for pcre2_compile(). Some of these are also used by
 ' pcre2_pattern_convert()
@@ -213,7 +218,7 @@ Const PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE:Int =      155
 Const PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE:Int =       156
 Const PCRE2_ERROR_BACKSLASH_G_SYNTAX:Int =             157
 Const PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING:Int = 158
-Const PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED:Int =      159
+Const PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED:Int =      159 ' Error 159 is obsolete and should now never occur
 Const PCRE2_ERROR_VERB_UNKNOWN:Int =                   160
 Const PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG:Int =      161
 Const PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED:Int =       162
@@ -247,6 +252,14 @@ Const PCRE2_ERROR_INTERNAL_BAD_CODE:Int =              189
 Const PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP:Int =      190
 Const PCRE2_ERROR_NO_SURROGATES_IN_UTF16:Int =         191
 Const PCRE2_ERROR_BAD_LITERAL_OPTIONS:Int =            192
+Const PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE:Int =      193
+Const PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS:Int =      194
+Const PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN:Int =        195
+Const PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE:Int =       196
+Const PCRE2_ERROR_TOO_MANY_CAPTURES:Int =              197
+Const PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED:Int = 198
+Const PCRE2_ERROR_BACKSLASH_K_IN_LOOKAROUND:Int =      199
+
 
 ' The following option bits can be passed to pcre2_compile(), pcre2_match(),
 ' or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
@@ -291,6 +304,7 @@ Const PCRE2_ALT_VERBNAMES:Int =       $00400000  ' C
 Const PCRE2_USE_OFFSET_LIMIT:Int =    $00800000  '   J M D 
 Const PCRE2_EXTENDED_MORE:Int =       $01000000  ' C       
 Const PCRE2_LITERAL:Int =             $02000000  ' C       
+Const PCRE2_MATCH_INVALID_UTF:Int =   $04000000  '   J M D 
 
 ' These are for pcre2_jit_compile(). 
 
@@ -369,27 +383,15 @@ Extern
 
 	Function pcre2_config_16:Int(what:Int, where_:Int Ptr)
 
-?ptr64
-	Function pcre2_compile_16:Byte Ptr(pattern:Short Ptr, patternLength:Long, options:Int, errorcodeptr:Int Ptr, ..
-		erroffset:Long Ptr, contextptr:Byte Ptr)
-	Function pcre2_match_16:Int(pattern:Byte Ptr, subject:Byte Ptr, subjectLength:Long, startOffset:Long, ..
-		options:Int, matchPtr:Byte Ptr, context:Byte Ptr)
-	Function pcre2_substring_get_bynumber_16:Int(matchPtr:Byte Ptr, stringnumber:Int, ..
-		stringptr:Short Ptr Ptr, stringlength:Long Ptr)
-	Function pcre2_substring_get_byname_16:Int(matchPtr:Byte Ptr, name:Short Ptr, stringptr:Short Ptr Ptr, stringlength:Long Ptr)
-	Function pcre2_get_error_message_16:Int(errorcode:Int, buffer:Short Ptr, length:Long)
-	Function pcre2_get_ovector_pointer_16:Long Ptr(matchPtr:Byte Ptr)
-?Not ptr64
-	Function pcre2_compile_16:Byte Ptr(pattern:Short Ptr, patternLength:Int, options:Int, errorcodeptr:Int Ptr, ..
-		erroffset:Int Ptr, contextptr:Byte Ptr)
-	Function pcre2_match_16:Int(pattern:Byte Ptr, subject:Byte Ptr, subjectLength:Int, startOffset:Int, ..
+	Function pcre2_compile_16:Byte Ptr(pattern:Short Ptr, patternLength:Size_T, options:Int, errorcodeptr:Int Ptr, ..
+		erroffset:Size_T Ptr, contextptr:Byte Ptr)
+	Function pcre2_match_16:Int(pattern:Byte Ptr, subject:Byte Ptr, subjectLength:Size_T, startOffset:Size_T, ..
 		options:Int, matchPtr:Byte Ptr, context:Byte Ptr)
 	Function pcre2_substring_get_bynumber_16:Int(matchPtr:Byte Ptr, stringnumber:Int, ..
-		stringptr:Short Ptr Ptr, stringlength:Int Ptr)
-	Function pcre2_substring_get_byname_16:Int(matchPtr:Byte Ptr, name:Short Ptr, stringptr:Short Ptr Ptr, stringlength:Int Ptr)
-	Function pcre2_get_error_message_16:Int(errorcode:Int, buffer:Short Ptr, length:Int)
-	Function pcre2_get_ovector_pointer_16:Int Ptr(matchPtr:Byte Ptr)
-?
+		stringptr:Short Ptr Ptr, stringlength:Size_T Ptr)
+	Function pcre2_substring_get_byname_16:Int(matchPtr:Byte Ptr, name:Short Ptr, stringptr:Short Ptr Ptr, stringlength:Size_T Ptr)
+	Function pcre2_get_error_message_16:Int(errorcode:Int, buffer:Short Ptr, length:Size_T)
+	Function pcre2_get_ovector_pointer_16:Size_T Ptr(matchPtr:Byte Ptr)
 
 	Function pcre2_substring_free_16(strinptr:Short Ptr)
 	
@@ -398,7 +400,7 @@ Extern
 	
 	Function pcre2_pattern_info_16:Int(pcre:Byte Ptr, what:Int, where_:Int Ptr)
 	
-	Function pcre2_get_ovector_count_16:Int(matchPtr:Byte Ptr)
+	Function pcre2_get_ovector_count_16:UInt(matchPtr:Byte Ptr)
 	
 	Function pcre2_jit_compile_16:Int(pcre:Byte Ptr, options:Int)
 	

+ 4 - 1
regex.mod/pcre/132html

@@ -94,7 +94,7 @@ while (<STDIN>)
       die "*** Processing abandoned\n";
       }
 
-    # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
+    # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi.
 
     elsif (/^\.nf/)
       {
@@ -180,6 +180,9 @@ while (<STDIN>)
       $wrotetext = 1;
       }
 
+    # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3
+    elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; }
+
     # A comment that starts "HREF" takes the next line as a name that
     # is turned into a hyperlink, using the text given, which might be
     # in a special font. If it ends in () or (digits) or punctuation, they

+ 3 - 3
regex.mod/pcre/AUTHORS

@@ -8,7 +8,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2024 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2024 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2024 Zoltan Herczeg
 All rights reserved.
 
 ####

+ 180 - 83
regex.mod/pcre/CMakeLists.txt

@@ -99,17 +99,29 @@
 #            build in one go.
 # 2021-08-28 PH increased minimum version
 # 2021-08-28 PH added test for realpath()
+# 2022-12-10 PH added support for pcre2posix_test
+# 2023-01-15 Carlo added C99 as the minimum required
+# 2023-08-06 PH added support for setting variable length lookbehind maximum
 
-PROJECT(PCRE2 C)
+# Increased minimum to 3.5 to workaround deprecated backward compatibility
+# since 3.27.
+cmake_minimum_required(VERSION 3.5 FATAL_ERROR)
+project(PCRE2 C)
+set(CMAKE_C_STANDARD 99)
+set(CMAKE_C_STANDARD_REQUIRED TRUE)
 
-# Increased minimum to 2.8.5 to support GNUInstallDirs.
-# Increased minimum to 3.0.0 because older than 2.8.12 is deprecated.
-CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0)
+set(CMAKE_C_VISIBILITY_PRESET hidden)
+cmake_policy(SET CMP0063 NEW)
 
 # Set policy CMP0026 to avoid warnings for the use of LOCATION in
 # GET_TARGET_PROPERTY. This should no longer be required.
 # CMAKE_POLICY(SET CMP0026 OLD)
 
+# With a recent cmake, you can provide a rootdir to look for non
+# standard installed library dependencies, but to do so, the policy
+# needs to be set to new (by uncommenting the following)
+# CMAKE_POLICY(SET CMP0074 NEW)
+
 # For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH
 # on the command line.
 # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
@@ -142,16 +154,43 @@ CHECK_INCLUDE_FILE(windows.h    HAVE_WINDOWS_H)
 CHECK_SYMBOL_EXISTS(bcopy         "strings.h"  HAVE_BCOPY)
 CHECK_SYMBOL_EXISTS(memfd_create  "sys/mman.h" HAVE_MEMFD_CREATE)
 CHECK_SYMBOL_EXISTS(memmove       "string.h"   HAVE_MEMMOVE)
-CHECK_SYMBOL_EXISTS(realpath      "stdlib.h"   HAVE_REALPATH)
 CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h"   HAVE_SECURE_GETENV)
 CHECK_SYMBOL_EXISTS(strerror      "string.h"   HAVE_STRERROR)
 
+CHECK_C_SOURCE_COMPILES(
+  "#include <stdlib.h>
+   #include <limits.h>
+   int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[c], buf); return 0; }"
+  HAVE_REALPATH
+)
+
 set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
 set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror")
+
 CHECK_C_SOURCE_COMPILES(
-  "int main() { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
+  "#include <stddef.h>
+   int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }"
+  HAVE_BUILTIN_MUL_OVERFLOW
+)
+
+CHECK_C_SOURCE_COMPILES(
+  "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
   HAVE_ATTRIBUTE_UNINITIALIZED
 )
+
+CHECK_C_SOURCE_COMPILES([=[
+  extern __attribute__ ((visibility ("default"))) int f(void);
+  int main(void) { return f(); }
+  int f(void) { return 42; }
+  ]=] HAVE_VISIBILITY
+)
+
+if (HAVE_VISIBILITY)
+  set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
+else()
+  set(PCRE2_EXPORT)
+endif()
+
 set(CMAKE_REQUIRED_FLAGS ${ORIG_CMAKE_REQUIRED_FLAGS})
 
 # Check whether Intel CET is enabled, and if so, adjust compiler flags. This
@@ -170,8 +209,6 @@ IF (INTEL_CET_ENABLED)
   SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
 ENDIF(INTEL_CET_ENABLED)
 
-
-
 # User-configurable options
 #
 # Note: CMakeSetup displays these in alphabetical order, regardless of
@@ -208,6 +245,9 @@ SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
 SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
     "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")
 
+SET(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING
+    "Default limit on variable lookbehinds.")
+
 SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
     "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
 
@@ -300,9 +340,19 @@ ENDIF(PCRE2_SUPPORT_LIBZ)
 IF(EDITLINE_FOUND)
   OPTION (PCRE2_SUPPORT_LIBEDIT  "Enable support for linking pcre2test with libedit." OFF)
 ENDIF(EDITLINE_FOUND)
-IF(PCRE2_SUPPORT_LIBEDIT)
-  INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
-ENDIF(PCRE2_SUPPORT_LIBEDIT)
+IF(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR})
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ELSE(EDITLINE_FOUND)
+  IF(PCRE2_SUPPORT_LIBEDIT)
+    MESSAGE(FATAL_ERROR
+      " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n"
+      " or set Editline_ROOT to a full libedit installed tree, as needed\n"
+      " Might need to enable policy CMP0074 in CMakeLists.txt"
+    )
+  ENDIF(PCRE2_SUPPORT_LIBEDIT)
+ENDIF(EDITLINE_FOUND)
 
 # readline lib
 IF(READLINE_FOUND)
@@ -340,7 +390,12 @@ IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
 ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8)
 
 IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
-        MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified")
+        IF(READLINE_FOUND)
+                MESSAGE(FATAL_ERROR
+                  " Only one of the readline compatible libraries can be enabled.\n"
+                  " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF"
+                )
+        ENDIF(READLINE_FOUND)
 ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT)
 
 IF(PCRE2_SUPPORT_BSR_ANYCRLF)
@@ -356,7 +411,13 @@ IF(PCRE2_SUPPORT_UNICODE)
 ENDIF(PCRE2_SUPPORT_UNICODE)
 
 IF(PCRE2_SUPPORT_JIT)
-        SET(SUPPORT_JIT 1)
+	SET(SUPPORT_JIT 1)
+	IF(UNIX)
+		FIND_PACKAGE(Threads REQUIRED)
+		IF(CMAKE_USE_PTHREADS_INIT)
+			SET(REQUIRE_PTHREAD 1)
+		ENDIF(CMAKE_USE_PTHREADS_INIT)
+	ENDIF(UNIX)
 ENDIF(PCRE2_SUPPORT_JIT)
 
 IF(PCRE2_SUPPORT_JIT_SEALLOC)
@@ -510,46 +571,6 @@ IF(WIN32)
   SET(CMAKE_DEBUG_POSTFIX "d")
 ENDIF(WIN32)
 
-# Generate pkg-config files
-
-SET(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
-SET(prefix ${CMAKE_INSTALL_PREFIX})
-
-SET(exec_prefix "\${prefix}")
-SET(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
-SET(includedir "\${prefix}/include")
-IF(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
-  SET(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
-ENDIF()
-CONFIGURE_FILE(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
-SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
-
-IF(PCRE2_BUILD_PCRE2_8)
-  CONFIGURE_FILE(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
-  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
-  SET(enable_pcre2_8 "yes")
-ELSE()
-  SET(enable_pcre2_8 "no")
-ENDIF()
-
-IF(PCRE2_BUILD_PCRE2_16)
-  CONFIGURE_FILE(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
-  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
-  SET(enable_pcre2_16 "yes")
-ELSE()
-  SET(enable_pcre2_16 "no")
-ENDIF()
-
-IF(PCRE2_BUILD_PCRE2_32)
-  CONFIGURE_FILE(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
-  SET(pkg_config_files ${pkg_config_files} "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
-  SET(enable_pcre2_32 "yes")
-ELSE()
-  SET(enable_pcre2_32 "no")
-ENDIF()
-
-CONFIGURE_FILE(pcre2-config.in pcre2-config @ONLY)
-
 # Character table generation
 
 OPTION(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
@@ -575,6 +596,7 @@ SET(PCRE2_HEADERS ${PROJECT_BINARY_DIR}/pcre2.h)
 SET(PCRE2_SOURCES
   src/pcre2_auto_possess.c
   ${PROJECT_BINARY_DIR}/pcre2_chartables.c
+  src/pcre2_chkdint.c
   src/pcre2_compile.c
   src/pcre2_config.c
   src/pcre2_context.c
@@ -626,6 +648,8 @@ IF(MINGW AND BUILD_SHARED_LIBS)
 ENDIF(MINGW AND BUILD_SHARED_LIBS)
 
 IF(MSVC AND BUILD_SHARED_LIBS)
+  SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-posix.pdb ${dll_pdb_files})
+  SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb ${dll_pdb_debug_files})
   IF (EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
     SET(PCRE2_SOURCES ${PCRE2_SOURCES} pcre2.rc)
   ENDIF(EXISTS ${PROJECT_SOURCE_DIR}/pcre2.rc)
@@ -657,7 +681,7 @@ ENDIF(MSVC)
 
 SET(CMAKE_INCLUDE_CURRENT_DIR 1)
 
-SET(targets)
+set(targets)
 
 # 8-bit library
 
@@ -671,6 +695,10 @@ IF(PCRE2_BUILD_PCRE2_8)
       VERSION ${LIBPCRE2_8_VERSION}
       SOVERSION ${LIBPCRE2_8_SOVERSION})
     TARGET_COMPILE_DEFINITIONS(pcre2-8-static PUBLIC PCRE2_STATIC)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-static PUBLIC ${PROJECT_BINARY_DIR})
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
     SET(targets ${targets} pcre2-8-static)
     ADD_LIBRARY(pcre2-posix-static STATIC ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
     SET_TARGET_PROPERTIES(pcre2-posix-static PROPERTIES
@@ -680,8 +708,8 @@ IF(PCRE2_BUILD_PCRE2_8)
       VERSION ${LIBPCRE2_POSIX_VERSION}
       SOVERSION ${LIBPCRE2_POSIX_SOVERSION})
     TARGET_LINK_LIBRARIES(pcre2-posix-static pcre2-8-static)
-    TARGET_COMPILE_DEFINITIONS(pcre2-posix-static PUBLIC PCRE2_STATIC)
-    SET(targets ${targets} pcre2-posix-static)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-static PUBLIC ${PROJECT_SOURCE_DIR}/src)
+    set(targets ${targets} pcre2-posix-static)
 
     IF(MSVC)
       SET_TARGET_PROPERTIES(pcre2-8-static PROPERTIES OUTPUT_NAME pcre2-8-static)
@@ -697,6 +725,7 @@ IF(PCRE2_BUILD_PCRE2_8)
 
   IF(BUILD_SHARED_LIBS)
     ADD_LIBRARY(pcre2-8-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
+    TARGET_INCLUDE_DIRECTORIES(pcre2-8-shared PUBLIC ${PROJECT_BINARY_DIR})
     SET_TARGET_PROPERTIES(pcre2-8-shared PROPERTIES
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_8_MACHO_COMPATIBILITY_VERSION}"
@@ -704,8 +733,13 @@ IF(PCRE2_BUILD_PCRE2_8)
       VERSION ${LIBPCRE2_8_VERSION}
       SOVERSION ${LIBPCRE2_8_SOVERSION}
       OUTPUT_NAME pcre2-8)
-    SET(targets ${targets} pcre2-8-shared)
+    IF(REQUIRE_PTHREAD)
+        TARGET_LINK_LIBRARIES(pcre2-8-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    set(targets ${targets} pcre2-8-shared)
+
     ADD_LIBRARY(pcre2-posix-shared SHARED ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
+    TARGET_INCLUDE_DIRECTORIES(pcre2-posix-shared PUBLIC ${PROJECT_SOURCE_DIR}/src)
     SET_TARGET_PROPERTIES(pcre2-posix-shared PROPERTIES
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_POSIX_MACHO_COMPATIBILITY_VERSION}"
@@ -713,8 +747,12 @@ IF(PCRE2_BUILD_PCRE2_8)
       VERSION ${LIBPCRE2_POSIX_VERSION}
       SOVERSION ${LIBPCRE2_POSIX_SOVERSION}
       OUTPUT_NAME pcre2-posix)
+    set(PCRE2POSIX_CFLAG "-DPCRE2POSIX_SHARED")
+    TARGET_COMPILE_DEFINITIONS(pcre2-posix-shared PUBLIC ${PCRE2POSIX_CFLAG})
     TARGET_LINK_LIBRARIES(pcre2-posix-shared pcre2-8-shared)
     SET(targets ${targets} pcre2-posix-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-8.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-8d.pdb ${dll_pdb_debug_files})
 
     IF(MINGW)
       IF(NON_STANDARD_LIB_PREFIX)
@@ -740,14 +778,18 @@ ENDIF(PCRE2_BUILD_PCRE2_8)
 IF(PCRE2_BUILD_PCRE2_16)
   IF(BUILD_STATIC_LIBS)
     ADD_LIBRARY(pcre2-16-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-    SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES UNITY_BUILD OFF
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
       MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
       VERSION ${LIBPCRE2_16_VERSION}
       SOVERSION ${LIBPCRE2_16_SOVERSION})
     TARGET_COMPILE_DEFINITIONS(pcre2-16-static PUBLIC PCRE2_STATIC)
-    SET(targets ${targets} pcre2-16-static)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    set(targets ${targets} pcre2-16-static)
 
     IF(MSVC)
       SET_TARGET_PROPERTIES(pcre2-16-static PROPERTIES OUTPUT_NAME pcre2-16-static)
@@ -761,14 +803,20 @@ IF(PCRE2_BUILD_PCRE2_16)
 
   IF(BUILD_SHARED_LIBS)
     ADD_LIBRARY(pcre2-16-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-    SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES
+    TARGET_INCLUDE_DIRECTORIES(pcre2-16-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-16-shared PROPERTIES UNITY_BUILD OFF
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=16
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
       MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
       VERSION ${LIBPCRE2_16_VERSION}
       SOVERSION ${LIBPCRE2_16_SOVERSION}
       OUTPUT_NAME pcre2-16)
-    SET(targets ${targets} pcre2-16-shared)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-16-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    set(targets ${targets} pcre2-16-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-16.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-16d.pdb ${dll_pdb_debug_files})
 
     IF(MINGW)
       IF(NON_STANDARD_LIB_PREFIX)
@@ -792,14 +840,18 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
 IF(PCRE2_BUILD_PCRE2_32)
   IF(BUILD_STATIC_LIBS)
     ADD_LIBRARY(pcre2-32-static STATIC ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-    SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-static PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES UNITY_BUILD OFF
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
       MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
       VERSION ${LIBPCRE2_32_VERSION}
       SOVERSION ${LIBPCRE2_32_SOVERSION})
     TARGET_COMPILE_DEFINITIONS(pcre2-32-static PUBLIC PCRE2_STATIC)
-    SET(targets ${targets} pcre2-32-static)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-static Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    set(targets ${targets} pcre2-32-static)
 
     IF(MSVC)
       SET_TARGET_PROPERTIES(pcre2-32-static PROPERTIES OUTPUT_NAME pcre2-32-static)
@@ -813,14 +865,20 @@ IF(PCRE2_BUILD_PCRE2_32)
 
   IF(BUILD_SHARED_LIBS)
     ADD_LIBRARY(pcre2-32-shared SHARED ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
-    SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES
+    TARGET_INCLUDE_DIRECTORIES(pcre2-32-shared PUBLIC ${PROJECT_BINARY_DIR})
+    SET_TARGET_PROPERTIES(pcre2-32-shared PROPERTIES UNITY_BUILD OFF
       COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32
       MACHO_COMPATIBILITY_VERSION "${LIBPCRE2_32_MACHO_COMPATIBILITY_VERSION}"
       MACHO_CURRENT_VERSION "${LIBPCRE2_32_MACHO_CURRENT_VERSION}"
       VERSION ${LIBPCRE2_32_VERSION}
       SOVERSION ${LIBPCRE2_32_SOVERSION}
       OUTPUT_NAME pcre2-32)
-    SET(targets ${targets} pcre2-32-shared)
+    IF(REQUIRE_PTHREAD)
+      TARGET_LINK_LIBRARIES(pcre2-32-shared Threads::Threads)
+    ENDIF(REQUIRE_PTHREAD)
+    set(targets ${targets} pcre2-32-shared)
+    SET(dll_pdb_files ${PROJECT_BINARY_DIR}/pcre2-32.pdb ${dll_pdb_files})
+    SET(dll_pdb_debug_files ${PROJECT_BINARY_DIR}/pcre2-32d.pdb ${dll_pdb_debug_files})
 
     IF(MINGW)
       IF(NON_STANDARD_LIB_PREFIX)
@@ -839,13 +897,53 @@ IF(PCRE2_BUILD_PCRE2_32)
   ENDIF(BUILD_STATIC_LIBS)
 ENDIF(PCRE2_BUILD_PCRE2_32)
 
+# Generate pkg-config files
+
+set(PACKAGE_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
+set(prefix ${CMAKE_INSTALL_PREFIX})
+
+set(exec_prefix "\${prefix}")
+set(libdir "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+set(includedir "\${prefix}/include")
+if(WIN32 AND (CMAKE_BUILD_TYPE MATCHES Debug))
+  set(LIB_POSTFIX ${CMAKE_DEBUG_POSTFIX})
+endif()
+
+if(PCRE2_BUILD_PCRE2_8)
+  configure_file(libpcre2-posix.pc.in libpcre2-posix.pc @ONLY)
+  list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-posix.pc")
+  configure_file(libpcre2-8.pc.in libpcre2-8.pc @ONLY)
+  list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-8.pc")
+  set(enable_pcre2_8 "yes")
+else()
+  set(enable_pcre2_8 "no")
+endif()
+
+if(PCRE2_BUILD_PCRE2_16)
+  configure_file(libpcre2-16.pc.in libpcre2-16.pc @ONLY)
+  list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-16.pc")
+  set(enable_pcre2_16 "yes")
+else()
+  set(enable_pcre2_16 "no")
+endif()
+
+if(PCRE2_BUILD_PCRE2_32)
+  configure_file(libpcre2-32.pc.in libpcre2-32.pc @ONLY)
+  list(APPEND pkg_config_files "${CMAKE_CURRENT_BINARY_DIR}/libpcre2-32.pc")
+  set(enable_pcre2_32 "yes")
+else()
+  set(enable_pcre2_32 "no")
+endif()
+
+configure_file(pcre2-config.in pcre2-config @ONLY NEWLINE_STYLE LF)
+
 # Executables
 
 IF(PCRE2_BUILD_PCRE2GREP)
   ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
   SET_PROPERTY(TARGET pcre2grep
     PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
-  SET(targets ${targets} pcre2grep)
+  set(targets ${targets} pcre2grep)
   TARGET_LINK_LIBRARIES(pcre2grep pcre2-posix ${PCRE2GREP_LIBS})
 ENDIF(PCRE2_BUILD_PCRE2GREP)
 
@@ -864,7 +962,7 @@ IF(PCRE2_BUILD_TESTS)
   ENDIF(MSVC)
 
   ADD_EXECUTABLE(pcre2test ${PCRE2TEST_SOURCES})
-  SET(targets ${targets} pcre2test)
+  set(targets ${targets} pcre2test)
   IF(PCRE2_BUILD_PCRE2_8)
     LIST(APPEND PCRE2TEST_LIBS pcre2-posix pcre2-8)
   ENDIF(PCRE2_BUILD_PCRE2_8)
@@ -876,10 +974,14 @@ IF(PCRE2_BUILD_TESTS)
   ENDIF(PCRE2_BUILD_PCRE2_32)
   TARGET_LINK_LIBRARIES(pcre2test ${PCRE2TEST_LIBS} ${PCRE2TEST_LINKER_FLAGS})
 
+  IF(PCRE2_BUILD_PCRE2_8)
+    ADD_EXECUTABLE(pcre2posix_test src/pcre2posix_test.c)
+    TARGET_LINK_LIBRARIES(pcre2posix_test pcre2-posix pcre2-8)
+  ENDIF(PCRE2_BUILD_PCRE2_8)
+
   IF(PCRE2_SUPPORT_JIT)
     ADD_EXECUTABLE(pcre2_jit_test src/pcre2_jit_test.c)
-    SET(targets ${targets} pcre2_jit_test)
-    SET(PCRE2_JIT_TEST_LIBS )
+    SET(PCRE2_JIT_TEST_LIBS)
     IF(PCRE2_BUILD_PCRE2_8)
       LIST(APPEND PCRE2_JIT_TEST_LIBS pcre2-8)
     ENDIF(PCRE2_BUILD_PCRE2_8)
@@ -927,6 +1029,7 @@ MESSAGE(\" \")
   FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_test.sh
   "#! /bin/sh
 # This is a generated file.
+srcdir=${PROJECT_SOURCE_DIR}
 . ${PROJECT_SOURCE_DIR}/RunTest
 if test \"$?\" != \"0\"; then exit 1; fi
 # End
@@ -940,6 +1043,7 @@ if test \"$?\" != \"0\"; then exit 1; fi
     FILE(WRITE ${PROJECT_BINARY_DIR}/pcre2_grep_test.sh
     "#! /bin/sh
 # This is a generated file.
+srcdir=${PROJECT_SOURCE_DIR}
 . ${PROJECT_SOURCE_DIR}/RunGrepTest
 if test \"$?\" != \"0\"; then exit 1; fi
 # End
@@ -991,6 +1095,10 @@ echo RunTest.bat tests successfully completed
     ADD_TEST(pcre2_jit_test pcre2_jit_test)
   ENDIF(PCRE2_SUPPORT_JIT)
 
+  IF(PCRE2_BUILD_PCRE2_8)
+    ADD_TEST(pcre2posix_test pcre2posix_test)
+  ENDIF(PCRE2_BUILD_PCRE2_8)
+
 ENDIF(PCRE2_BUILD_TESTS)
 
 # Installation
@@ -1022,25 +1130,13 @@ FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html)
 FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1)
 FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3)
 
-FOREACH(man ${man3})
-        GET_FILENAME_COMPONENT(man_tmp ${man} NAME)
-        SET(man3_new ${man3} ${man})
-ENDFOREACH(man ${man3})
-SET(man3 ${man3_new})
-
 INSTALL(FILES ${man1} DESTINATION man/man1)
 INSTALL(FILES ${man3} DESTINATION man/man3)
 INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html)
 
 IF(MSVC AND INSTALL_MSVC_PDB)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posix.pdb
-            DESTINATION bin
-            CONFIGURATIONS RelWithDebInfo)
-    INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb
-                  ${PROJECT_BINARY_DIR}/pcre2posixd.pdb
-            DESTINATION bin
-            CONFIGURATIONS Debug)
+ INSTALL(FILES ${dll_pdb_files} DESTINATION bin CONFIGURATIONS RelWithDebInfo)
+ INSTALL(FILES ${dll_pdb_debug_files} DESTINATION bin CONFIGURATIONS Debug)
 ENDIF(MSVC AND INSTALL_MSVC_PDB)
 
 # Help, only for nice output
@@ -1080,6 +1176,7 @@ IF(PCRE2_SHOW_REPORT)
   MESSAGE(STATUS "  EBCDIC coding with NL=0x25 ...... : ${PCRE2_EBCDIC_NL25}")
   MESSAGE(STATUS "  Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
   MESSAGE(STATUS "  Internal link size .............. : ${PCRE2_LINK_SIZE}")
+  MESSAGE(STATUS "  Maximum variable lookbehind ..... : ${PCRE2_MAX_VARLOOKBEHIND}")
   MESSAGE(STATUS "  Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
   MESSAGE(STATUS "  Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
   MESSAGE(STATUS "  Match limit ..................... : ${PCRE2_MATCH_LIMIT}")

+ 442 - 7
regex.mod/pcre/ChangeLog

@@ -1,6 +1,441 @@
 Change Log for PCRE2
 --------------------
 
+Before the move to GitHub, this was the only record of changes to PCRE2. Now
+there is often more detail in the pull requests.
+
+
+Version 10.43 16-February-2024
+------------------------------
+
+1. The test program added by change 2 of 10.42 didn't work when the default
+newline setting didn't include \n as a newline. One test needed (*LF) to ensure
+that it worked.
+
+2. Added the new freestanding POSIX test program to the ManyConfigTests script
+in the maint directory (overlooked in 2 below). Also improved the selection
+facilities in that script, and added a test with JIT in a non-source directory,
+fixing an oversight that would have made such a test fail before.
+
+3. Added pcre2_get_match_data_heapframes_size() and related pcre2test flags
+to allow for finer control of the heap used when pcre2_match() without JIT is
+used and the match_data might be reused. This began as PR #191, but has had
+further refinement and documentation edits.
+
+4. Applied PR #181, which tidies some casts in pcre2_valid_utf.c.
+
+5. Applied PR #184, which avoids overflow issues with the heap limit
+(introduced in 10.41/9).
+
+6. Applied PR #192, which changes the timing units for pcre2test from
+milliseconds to microseconds. This is more useful for modern CPUs.
+
+7. Applied PR #193, which makes the requirement for C99 explicit in
+configure.ac and CMakeLists.txt.
+
+8. Fixed a bug in pcre2test when a ridiculously large string repeat required a
+stupid amount of memory. It now gives a clean realloc() failure error.
+
+9. Updates to restrict the interaction between ASCII and non-ASCII characters
+for caseless matching and items like \d:
+
+   (a) Added PCRE2_EXTRA_CASELESS_RESTRICT to lock out mixing of ASCII and
+       non-ASCII when matching caselessly. This is also /r in pcre2test and
+       (?r) within patterns.
+
+   (b) Added PCRE2_EXTRA_ASCII_{BSD,BSS,BSW,POSIX} and corresponding (?aD) etc
+       in patterns and /a in pcre2test.
+
+   (c) Corresponding updates to pcre2test.
+
+10. Unicode has been updated to 15.0.0.
+
+11. The Python scripts and ucptest.c in maint have been updated (a) a minor
+change needed for 9(a) above; (b) fix bugs in ucptest,
+
+12. Integer overflow testing is now centralized in a new function.
+
+13. Made PCRE2_UCP the default in UTF mode in pcre2grep, and added new options
+--case-restrict and --no-ucp.
+
+14. In the debugging printint module (which is normally only linked into
+pcre2test), avoid the use of a variable called "not" because that's deprecated
+in C and forbidden in C++. Also rewrite some code to avoid a goto into a block
+that bypassed its initialization (though it didn't actually matter).
+
+15. More minor code adjustments to avoid using reserved C++ words as variable
+names ("new" and "typename") and another jump that bypassed an (irrelevant)
+initialization.
+
+16. Merged a pull request that removed pcre2_ucptables.c from the list of files
+to compile in NON-AUTOTOOLS-BUILD because it is #included in pcre2_tables.c.
+Also adjusted the BUILD.bazel and build.zig files, which had the same issue. At
+the same time, fixed a typo in the Bazel file.
+
+17. Add PCRE2_EXTRA_ASCII_DIGIT to allow [:digit:] to be kept on sync with \d
+even in UCP mode.
+
+18. Fix an invalid match of ascii word classes when invalid utf is enabled.
+
+19. Add a --posix-digit to pcre2grep for compatibility with GNU grep, and
+other tools that prefer the POSIX compatible unicode definition for \d.
+
+20. Report the bit width of the library in use by pcre2test for usability.
+
+21. A pathological pattern conversion test could result in a string longer than
+the available input buffer. Cause such a test to fail.
+
+22. Add a check that forces a compiler error if PCRE2_CODE_UNIT_WIDTH is not 8,
+16, or 32 when compiling any of the library modules.
+
+23. Update pcre2_compile() to treat a NULL pattern with zero length as an empty
+string.
+
+24. Add support for limited-length variable-length lookbehind assertions, with
+default maximum length 255 characters (same as Perl) but with a function to
+adjust the limit.
+
+25. Applied pull request #262, which updates the zig configuration, and #278
+which fixes a bug with out-of-source-tree CMake build testing.
+
+26. Add support for LoongArch to JIT.
+
+27. Fixed a bug in pcre2_match() in the code for handling the vector of
+backtracking frames on the heap, which caused a heap overflow if *LIMIT_HEAP
+restricted an attempt to extend to less than the frame size. Generally tidy up
+the code for extending the heap frames vector. This fixes GitHub issue #275.
+
+28. Update pcre2_fuzzsupport.c to avoid clang sanitize complaint about shifting
+left by 16 when there are non-zeros in the top 16 bits.
+
+29. Perl 5.34.0 changed the meaning of (for example) {,3} which did not used to
+be treated as a quantifier. Now it is interpreted as {0,3} and PCRE2 has
+changed to match. Note that {,} is still not a quantifier.
+
+30. Perl allows spaces and/or horizontal tabs after { or before } in all items
+that use braces, and also before or after the comma in quantifiers. PCRE2 now
+does the same, except for \u{...}, which is recognized only when
+PCRE2_EXTRA_ALT_BSUX is set. This an ECMAScript, non-Perl compatible,
+extension, so PCRE2 follows ECMAScript rather than Perl.
+
+31. Applied pull request #300 by Carlo, which fixes #261. The bug was that
+pcre2_match() was not fully resetting all captures that had been set within a
+(possibly recursive) subroutine call such as (?3).
+
+32. Changed the meaning of \w (and its synonyms) in UCP mode to match Perl. It
+now matches characters whose general categories are L or N or whose particular
+categories are Mn (non-spacing mark) or Pc (combining puntuation). The latter
+includes underscore.
+
+33. Changed the meaning of [:xdigit:] in UCP mode to match Perl. It now also
+matches the "fullwidth" versions of the hex digits. Just like it is done for
+[:digit:], PCRE2_EXTRA_ASCII_DIGIT can be used to keep this class ASCII only
+without affecting other POSIX classes.
+
+34. GitHub PR305 fixes a potential integer overflow in pcre2_dfa_match().
+
+35. Updated handling of \b and \B in UCP mode to match the changes to \w in 32
+above because \b and \B are defined in terms of \w.
+
+36. Within a pattern (?aT) and (?-aT) set and reset the PCRE2_EXTRA_ASCII_DIGIT
+option, and (?aP) also sets (?aT) so that (?-aP) disables all ASCII
+restrictions on POSIX classes.
+
+37. If PCRE2_FIRSTLINE was set on an anchored pattern, pcre2_match() and
+pcre2_dfa_match() misbehaved. PCRE2_FIRSTLINE is now ignored for anchored
+patterns.
+
+38. Add a test for ridiculous ovector offset values to the substring extraction
+functions.
+
+39. Make OP_REVERSE use IMM2_SIZE for its data instead of LINK_SIZE, for
+consistency with OP_VREVERSE.
+
+40. In some legacy environments with a pre C99 snprintf, pcre2_regerror could
+return an incorrect value when the provided buffer was too small.
+
+41. Applied pull request #342 which adds sanity checks for ctype functions and
+locks out any accidental sign-extension.
+
+42. In the 32-bit library, in non-UTF mode, a quantifier that followed a
+literal character with a value greater than or equal to 0x80000000u caused
+undefined behaviour.
+
+43. \z was misbehaving when matching fragments inside invalid UTF strings.
+
+44. Implement --group-separator and --no-group-separator for pcre2grep.
+
+45. Fix \X matching in 32 bit mode without UTF in JIT.
+
+46. Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set in JIT.
+
+47. Refactor the handling of whole-pattern recursion (?0) in pcre2_match() so
+that its end is handled similarly to other recursions. This has altered the
+behaviour of   /|(?0)./endanchored   which was previously not right.
+
+48. Improved the test for looping recursion by checking the last referenced
+character as well as the current character. This allows some patterns that
+previously triggered the check to run to completion instead of giving the loop
+error.
+
+49. In 32-bit mode, the compiler looped for the pattern /[\x{ffffffff}]/ when
+PCRE2_CASELESS and PCRE2_UCP (but not PCRE2_UTF) were set. Fixed by not trying
+to look for other cases for characters above the Unicode range.
+
+50. In caseless 32-bit mode with UCP (but not UTF) set, the character
+0xffffffff incorrectly matched any character that has more than one other case,
+in particular k and s.
+
+51. Fix accept and endanchored interaction in JIT.
+
+52. Fix backreferences with unset backref and non-greedy iterators in JIT.
+
+53. Improve the logic that checks for a list of starting code units -- positive
+lookahead assertions are now ignored if the immediately following item is one
+that sets a mandatory starting character. For example, /a?(?=bc|)d/ used to set
+all of a, b, and d as possible starting code units; now it sets only a and d.
+
+54. Fix incorrect class character matches in JIT.
+
+55. In pcre2test, ensure pcre2_jit_match() is used when jitfast is used with
+substitution testing.
+
+56. Insert omitted setting of subject length in match data at the end of
+pcre2_jit_match().
+
+57. Implemented PCRE2_DISABLE_RECURSELOOP_CHECK for pcre2_match() to enable
+some apparently looping recursions to run to completion and therefore match the
+JIT behaviour. With this set, real loops will eventually get caught by match or
+heap limits or run out of resource.
+
+58. AC did a lot of work on pcre2_fuzzsupport.c to extend it to 16-bit and
+32-bit libraries and to compare JIT and non-JIT matching.
+
+
+Version 10.42 11-December-2022
+------------------------------
+
+1. Change 19 of 10.41 wasn't quite right; it put the definition of a default,
+empty value for PCRE2_CALL_CONVENTION in src/pcre2posix.c instead of
+src/pcre2posix.h, which meant that programs that included pcre2posix.h but not
+pcre2.h failed to compile.
+
+2. To catch similar issues to the above in future, a new small test program
+that includes pcre2posix.h but not pcre2.h has been added to the test suite.
+
+3. When the -S option of pcre2test was used to set a stack size greater than
+the allowed maximum, the error message displayed the hard limit incorrectly.
+This was pointed out on GitHub pull request #171, but the suggested patch
+didn't cope with all cases. Some further modification was required.
+
+4. Supplying an ovector count of more than 65535 to pcre2_match_data_create()
+caused a crash because the field in the match data block is only 16 bits. A
+maximum of 65535 is now silently applied.
+
+5. Merged @carenas patch #175 which fixes #86 - segfault on aarch64 (ARM),
+
+6. The prototype for pcre2_substring_list_free() specified its argument as
+PCRE2_SPTR * which is a const data type, whereas the yield from
+pcre2_substring_list() is not const. This caused compiler warnings. I have
+changed the argument of pcre2_substring_list_free() to be PCRE2_UCHAR ** to
+remove this anomaly. This might cause new warnings in existing code where a
+cast has been used to avoid previous ones.
+
+
+Version 10.41 06-December-2022
+------------------------------
+
+1. Add fflush() before and after a fork callout in pcre2grep to get its output
+to be the same on all systems. (There were previously ordering differences in
+Alpine Linux).
+
+2. Merged patch from @carenas (GitHub #110) for pthreads support in CMake.
+
+3. SSF scorecards grumbled about possible overflow in an expression in
+pcre2test. It never would have overflowed in practice, but some casts have been
+added and at the some time there's been some tidying of fprints that output
+size_t values.
+
+4. PR #94 showed up an unused enum in pcre2_convert.c, which is now removed.
+
+5. Minor code re-arrangement to remove gcc warning about realloc() in
+pcre2test.
+
+6. Change a number of int variables that hold buffer and line lengths in
+pcre2grep to PCRE2_SIZE (aka size_t).
+
+7. Added an #ifdef to cut out a call to PRIV(jit_free) when JIT is not
+supported (even though that function would do nothing in that case) at the
+request of a user who doesn't even want to link with pcre_jit_compile.o. Also
+tidied up an untidy #ifdef arrangement in pcre2test.
+
+8. Fixed an issue in the backtracking optimization of character repeats in
+JIT. Furthermore optimize star repetitions, not just plus repetitions.
+
+9. Removed the use of an initial backtracking frames vector on the system stack
+in pcre2_match() so that it now always uses the heap. (In a multi-thread
+environment with very small stacks there had been an issue.) This also is
+tidier for JIT matching, which didn't need that vector. The heap vector is now
+remembered in the match data block and re-used if that block itself is re-used.
+It is freed with the match data block.
+
+10. Adjusted the find_limits code in pcre2test to work with change 9 above.
+
+11. Added find_limits_noheap to pcre2test, because the heap limits are now
+different in different environments and so cannot be included in the standard
+tests.
+
+12. Created a test for pcre2_match() heap processing that is not part of the
+tests run by 'make check', but can be run manually. The current output is from
+a 64-bit system.
+
+13. Implemented -Z aka --null in pcre2grep.
+
+14. A minor change to pcre2test and the addition of several new pcre2grep tests
+have improved LCOV coverage statistics. At the same time, code in pcre2grep and
+elsewhere that can never be obeyed in normal testing has been excluded from
+coverage.
+
+15. Fixed a bug in pcre2grep that could cause an extra newline to be written
+after output generaed by --output.
+
+16. If a file has a .bz2 extension but is not in fact compressed, pcre2grep
+should process it as a plain text file. A bug stopped this happening; now fixed
+and added to the tests.
+
+17. When pcre2grep was running not in UTF mode, if a string specified by
+--output or obtained from a callout in a pattern contained a character (byte)
+greater than 127, it was incorrectly output in UTF-8 format.
+
+18. Added some casts after warnings from Clang sanitize.
+
+19. Merged patch from cbouc (GitHub #139): 4 function prototypes were missing
+PCRE2_CALL_CONVENTION in src/pcre2posix.h. All function prototypes returning
+pointers had out of place PCRE2_CALL_CONVENTION in src/pcre2.h.*. These
+produced errors when building for Windows with #define PCRE2_CALL_CONVENTION
+__stdcall.
+
+20. A negative repeat value in a pcre2test subject line was not being
+diagnosed, leading to infinite looping.
+
+21. Updated RunGrepTest to discard the warning that Bash now gives when setting
+LC_CTYPE to a bad value (because older versions didn't).
+
+22. Updated pcre2grep so that it behaves like GNU grep when matching more than
+one pattern and a later pattern matches at an earlier point in the subject when
+the matched substrings are being identified by colour or by offsets.
+
+23. Updated the PrepareRelease script so that the man page that it makes for
+the pcre2demo demonstration program is more standard and does not cause errors
+when processed by lexgrog or mandb -c (GitHub issue #160).
+
+24. The JIT compiler was updated.
+
+
+Version 10.40 15-April-2022
+---------------------------
+
+1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect
+handling of multiple passes.
+
+2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue
+in pcre2grep with buffered fseek(stdin).
+
+3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is
+not supported.
+
+4. Revert an unintended change in JIT repeat detection.
+
+5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd.
+
+6. Merged documentation and comments patches from @carenas (GitHub #47).
+
+7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code
+from pcre2grep.
+
+8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46.
+
+9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and
+substituting.
+
+10. Add null_subject and null_replacement modifiers to pcre2test.
+
+11. Add check for NULL subject to POSIX regexec() function.
+
+12. Add check for NULL replacement to pcre2_substitute().
+
+13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and
+pcre2_substitute(), and the replacement argument of the latter, if the pointer
+is NULL and the length is zero, treat as an empty string. Apparently a number
+of applications treat NULL/0 in this way.
+
+14. Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+15. Fix some minor issues raised by clang sanitize.
+
+16. Very minor code speed up for maximizing character property matches.
+
+17. A number of changes to script matching for \p and \P:
+
+    (a) Script extensions for a character are now coded as a bitmap instead of
+        a list of script numbers, which should be faster and does not need a
+        loop.
+
+    (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+        sc and scx).
+
+    (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+        the same as \p{scx:scriptname} because this change happened in Perl at
+        release 5.26.
+
+    (d) The standard Unicode 4-letter abbreviations for script names are now
+        recognized.
+
+    (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+        hyphens, and underscores are ignored in property names, which are then
+        matched independent of case.
+
+18. The Python scripts in the maint directory have been refactored. There are
+now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c
+(which is #included by pcre2_tables.c). The data lists that used to be
+duplicated are now held in a single common Python module.
+
+19. On CHERI, and thus Arm's Morello prototype, pointers are represented as
+hardware capabilities, which consist of both an integer address and additional
+metadata, meaning they are twice the size of the platform's size_t type, i.e.
+16 bytes on a 64-bit system. The ovector member of heapframe happens to only be
+8 byte aligned, and so computing frame_size ended up with a multiple of 8 but
+not 16. Whilst the first frame was always suitably aligned, this then
+misaligned the frame that follows, resulting in an alignment fault when storing
+a pointer to Fecode at the start of match. Patch to fix this issue by Jessica
+Clarke PR#72.
+
+20. Added -LP and -LS listing options to pcre2test.
+
+21. A user discovered that the library names in CMakeLists.txt for MSVC
+debugger (PDB) files were incorrect - perhaps never tried for PCRE2?
+
+22. An item such as [Aa] is optimized into a caseless single character match.
+When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a
+pattern, the optimizing "must be present for a match" character check was not
+being flagged as caseless, causing some matches that should have succeeded to
+fail.
+
+23. Fixed a unicode property matching issue in JIT. The character was not
+fully read in caseless matching.
+
+24. Fixed an issue affecting recursions in JIT caused by duplicated data
+transfers.
+
+25. Merged patch from @carenas (GitHub #96) which fixes some problems with
+pcre2test and readline/readedit:
+
+  * Use the right header for libedit in FreeBSD with autoconf
+  * Really allow libedit with cmake
+  * Avoid using readline headers with libedit
+
+
 Version 10.39 29-October-2021
 -----------------------------
 
@@ -14,10 +449,10 @@ Version 10.39 29-October-2021
   honoured if chosen.
 
   prtdiff_t is signed, so use a signed type instead, and make sure
-  that an appropiate width is chosen if pointers are 64bit wide and
+  that an appropriate width is chosen if pointers are 64bit wide and
   long is not (ex: Windows 64bit).
 
-  IMHO removing the cast (and therefore the positibilty of truncation)
+  IMHO removing the cast (and therefore the possibilty of truncation)
   make the code cleaner and the fallback is likely portable enough
   with all 64-bit POSIX systems doing LP64 except for Windows.
 
@@ -68,7 +503,7 @@ Version 10.38 01-October-2021
 -----------------------------
 
 1. Fix invalid single character repetition issues in JIT when the repetition
-is inside a capturing bracket and the bracket is preceeded by character
+is inside a capturing bracket and the bracket is preceded by character
 literals.
 
 2. Installed revised CMake configuration files provided by Jan-Willem Blokland.
@@ -308,7 +743,7 @@ now correctly backtracked, so this unnecessary restriction has been removed.
 
 7. Added PCRE2_SUBSTITUTE_MATCHED.
 
-8. Added (?* and (?<* as synonms for (*napla: and (*naplb: to match another
+8. Added (?* and (?<* as synonyms for (*napla: and (*naplb: to match another
 regex engine. The Perl regex folks are aware of this usage and have made a note
 about it.
 
@@ -739,7 +1174,7 @@ Patch by Guillem Jover.
 warnings were reported.
 
 38. Using the clang compiler with sanitizing options causes runtime complaints
-about truncation for statments such as x = ~x when x is an 8-bit value; it
+about truncation for statements such as x = ~x when x is an 8-bit value; it
 seems to compute ~x as a 32-bit value. Changing such statements to x = 255 ^ x
 gets rid of the warnings. There were also two missing casts in pcre2test.
 
@@ -747,7 +1182,7 @@ gets rid of the warnings. There were also two missing casts in pcre2test.
 Version 10.32 10-September-2018
 -------------------------------
 
-1. When matching using the the REG_STARTEND feature of the POSIX API with a
+1. When matching using the REG_STARTEND feature of the POSIX API with a
 non-zero starting offset, unset capturing groups with lower numbers than a
 group that did capture something were not being correctly returned as "unset"
 (that is, with offset values of -1).
@@ -922,7 +1357,7 @@ assumed empty second branch cannot be anchored. Demonstrated by test patterns
 such as /(?(1)^())b/ or /(?(?=^))b/.
 
 40. A repeated conditional subpattern that could match an empty string was
-always assumed to be unanchored. Now it it checked just like any other
+always assumed to be unanchored. Now it is checked just like any other
 repeated conditional subpattern, and can be found to be anchored if the minimum
 quantifier is one or more. I can't see much use for a repeated anchored
 pattern, but the behaviour is now consistent.

+ 70 - 47
regex.mod/pcre/CheckMan

@@ -1,4 +1,4 @@
-Technical Notes about PCRE2
+Technical notes about PCRE2
 ---------------------------
 
 These are very rough technical notes that record potentially useful information
@@ -8,8 +8,8 @@ library is referred to as PCRE1 below. For information about testing PCRE2, see
 the pcre2test documentation and the comment at the head of the RunTest file.
 
 PCRE1 releases were up to 8.3x when PCRE2 was developed, and later bug fix
-releases remain in the 8.xx series. PCRE2 releases started at 10.00 to avoid
-confusion with PCRE1.
+releases carried on the 8.xx series, up to the final 8.45 release. PCRE2
+releases started at 10.00 to avoid confusion with PCRE1.
 
 
 Historical note 1
@@ -38,8 +38,8 @@ Historical note 2
 By contrast, the code originally written by Henry Spencer (which was
 subsequently heavily modified for Perl) compiles the expression twice: once in
 a dummy mode in order to find out how much store will be needed, and then for
-real. (The Perl version probably doesn't do this any more; I'm talking about
-the original library.) The execution function operates by backtracking and
+real. (The Perl version may or may not still do this; I'm talking about the
+original library.) The execution function operates by backtracking and
 maximizing (or, optionally, minimizing, in Perl) the amount of the subject that
 matches individual wild portions of the pattern. This is an "NFA algorithm" in
 Friedl's terminology.
@@ -151,8 +151,8 @@ of code units in the item itself. The exception is the aforementioned large
 advance to check for such values. When auto-callouts are enabled, the generous
 assumption is made that there will be a callout for each pattern code unit
 (which of course is only actually true if all code units are literals) plus one
-at the end. There is a default parsed pattern vector on the system stack, but
-if this is not big enough, heap memory is used.
+at the end. A default parsed pattern vector is defined on the system stack, to
+minimize memory handling, but if this is not big enough, heap memory is used.
 
 As before, the actual compiling function is run twice, the first time to
 determine the amount of memory needed for the final compiled pattern. It
@@ -187,7 +187,7 @@ META_CLASS_EMPTY      [] empty class - only with PCRE2_ALLOW_EMPTY_CLASS
 META_CLASS_EMPTY_NOT  [^] negative empty class - ditto
 META_CLASS_END        ] end of non-empty class
 META_CLASS_NOT        [^ start non-empty negative class
-META_COMMIT           (*COMMIT)
+META_COMMIT           (*COMMIT) - no argument (see below for with argument)
 META_COND_ASSERT      (?(?assertion)
 META_DOLLAR           $ metacharacter
 META_DOT              . metacharacter
@@ -201,18 +201,18 @@ META_NOCAPTURE        (?: no capture parens
 META_PLUS             +
 META_PLUS_PLUS        ++
 META_PLUS_QUERY       +?
-META_PRUNE            (*PRUNE) - no argument
+META_PRUNE            (*PRUNE) - no argument (see below for with argument)
 META_QUERY            ?
 META_QUERY_PLUS       ?+
 META_QUERY_QUERY      ??
 META_RANGE_ESCAPED    hyphen in class range with at least one escape
 META_RANGE_LITERAL    hyphen in class range defined literally
-META_SKIP             (*SKIP) - no argument
-META_THEN             (*THEN) - no argument
+META_SKIP             (*SKIP) - no argument (see below for with argument)
+META_THEN             (*THEN) - no argument (see below for with argument)
 
 The two RANGE values occur only in character classes. They are positioned
 between two literals that define the start and end of the range. In an EBCDIC
-evironment it is necessary to know whether either of the range values was
+environment it is necessary to know whether either of the range values was
 specified as an escape. In an ASCII/Unicode environment the distinction is not
 relevant.
 
@@ -226,20 +226,20 @@ META_ESCAPE           non-literal escape sequence
 META_RECURSE          recursion call
 
 If the data for META_ALT is non-zero, it is inside a lookbehind, and the data
-is the length of its branch, for which OP_REVERSE must be generated.
+is the maximum length of its branch (see META_LOOKBEHIND below for more
+detail).
 
 META_BACKREF, META_CAPTURE, and META_RECURSE have the capture group number as
-their data in the lower 16 bits of the element.
+their data in the lower 16 bits of the element. META_RECURSE is followed by an
+offset, for use in error messages.
 
 META_BACKREF is followed by an offset if the back reference group number is 10
-or more. The offsets of the first ocurrences of references to groups whose
+or more. The offsets of the first occurrences of references to groups whose
 numbers are less than 10 are put in cb->small_ref_offset[] (only the first
 occurrence is useful). On 64-bit systems this avoids using more than two parsed
 pattern elements for items such as \3. The offset is used when an error occurs
 because the reference is to a non-existent group.
 
-META_RECURSE is always followed by an offset, for use in error messages.
-
 META_ESCAPE has an ESC_xxx value as its data. For ESC_P and ESC_p, the next
 element contains the 16-bit type and data property values, packed together.
 ESC_g and ESC_k are used only for named references - numerical ones are turned
@@ -249,7 +249,6 @@ by a length and an offset into the pattern to specify the name.
 The following have one data item that follows in the next vector element:
 
 META_BIGVALUE         Next is a literal >= META_END
-META_OPTIONS          (?i) and friends (data is new option bits)
 META_POSIX            POSIX class item (data identifies the class)
 META_POSIX_NEG        negative POSIX class item (ditto)
 
@@ -283,22 +282,34 @@ The following is followed just by an offset, for use in error messages:
 
 META_COND_DEFINE      (?(DEFINE)
 
-The following are also followed just by an offset, but also the lower 16 bits
-of the main word contain the length of the first branch of the lookbehind
-group; this is used when generating OP_REVERSE for that branch.
+The following are at first also followed just by an offset for use in error
+messages. After the lengths of the branches of a lookbehind group have been
+checked the error offset is no longer needed. The lower 16 bits of the main
+word are now set to the maximum length of the first branch of the lookbehind
+group, and the second word is set to the mimimum matching length for a
+variable-length lookbehind group, or to LOOKBEHIND_MAX for a group whose
+branches are all of fixed length. These values are used when generating
+OP_REVERSE or OP_VREVERSE for the first branch. The miminum value is also used
+for any subsequent branches because there is only room for one value (the
+branch maximum length) in a META_ALT item.
 
 META_LOOKBEHIND       (?<=      start of lookbehind
 META_LOOKBEHIND_NA    (*naplb:  start of non-atomic lookbehind
 META_LOOKBEHINDNOT    (?<!      start of negative lookbehind
 
-The following are followed by two elements, the minimum and maximum. Repeat
-values are limited to 65535 (MAX_REPEAT). A maximum value of "unlimited" is
-represented by UNLIMITED_REPEAT, which is bigger than MAX_REPEAT:
+The following are followed by two elements, the minimum and maximum. The
+maximum value is limited to 65535 (MAX_REPEAT_COUNT). A maximum value of
+"unlimited" is represented by REPEAT_UNLIMITED, which is bigger than it:
 
 META_MINMAX           {n,m}  repeat
 META_MINMAX_PLUS      {n,m}+ repeat
 META_MINMAX_QUERY     {n,m}? repeat
 
+This one is followed by two elements, giving the new option settings for the
+main and extra options, respectively.
+
+META_OPTIONS          (?i) and friends
+
 This one is followed by three elements. The first is 0 for '>' and 1 for '>=';
 the next two are the major and minor numbers:
 
@@ -347,11 +358,11 @@ support is not available for this kind of matching.
 Changeable options
 ------------------
 
-The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL, and
-others) may be changed in the middle of patterns by items such as (?i). Their
-processing is handled entirely at compile time by generating different opcodes
-for the different settings. The runtime functions do not need to keep track of
-an option's state.
+The /i, /m, or /s options (PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL) and
+some others may be changed in the middle of patterns by items such as (?i).
+Their processing is handled entirely at compile time by generating different
+opcodes for the different settings. The runtime functions do not need to keep
+track of an option's state.
 
 PCRE2_DUPNAMES, PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE
 are tracked and processed during the parsing pre-pass. The others are handled
@@ -370,9 +381,9 @@ data that follows it.
 In many cases listed below, LINK_SIZE data values are specified for offsets
 within the compiled pattern. LINK_SIZE always specifies a number of bytes. The
 default value for LINK_SIZE is 2, except for the 32-bit library, where it can
-only be 4. The 8-bit library can be compiled to used 3-byte or 4-byte values,
+only be 4. The 8-bit library can be compiled to use 3-byte or 4-byte values,
 and the 16-bit library can be compiled to use 4-byte values, though this
-impairs performance. Specifing a LINK_SIZE larger than 2 for these libraries is
+impairs performance. Specifying a LINK_SIZE larger than 2 for these libraries is
 necessary only when patterns whose compiled length is greater than 65535 code
 units are going to be processed. When a LINK_SIZE value uses more than one code
 unit, the most significant unit is first.
@@ -437,7 +448,7 @@ Backtracking control verbs
 --------------------------
 
 Verbs with no arguments generate opcodes with no following data (as listed
-in the section above). 
+in the section above).
 
 (*MARK:NAME) generates OP_MARK followed by the mark name, preceded by a
 length in one code unit, and followed by a binary zero. The name length is
@@ -468,8 +479,8 @@ Caseless matching (positive or negative) of characters that have more than two
 case-equivalent code points (which is possible only in UTF mode) is handled by
 compiling a Unicode property item (see below), with the pseudo-property
 PT_CLIST. The value of this property is an offset in a vector called
-"ucd_caseless_sets" which identifies the start of a short list of equivalent
-characters, terminated by the value NOTACHAR (0xffffffff).
+"ucd_caseless_sets" which identifies the start of a short list of case
+equivalent characters, terminated by the value NOTACHAR (0xffffffff).
 
 
 Repeating single characters
@@ -546,8 +557,9 @@ Each is followed by two code units that encode the desired property as a type
 and a value. The types are a set of #defines of the form PT_xxx, and the values
 are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file.
 The value is relevant only for PT_GC (General Category), PT_PC (Particular
-Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to
-identify a list of case-equivalent characters when there are three or more.
+Category), PT_SC (Script), PT_BIDICL (Bidi Class), PT_BOOL (Boolean property),
+and the pseudo-property PT_CLIST, which is used to identify a list of
+case-equivalent characters when there are three or more (see above).
 
 Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by
 three code units: OP_PROP or OP_NOTPROP, and then the desired property type and
@@ -665,9 +677,9 @@ a count that immediately follows the offset.
 There are several opcodes that mark the end of a subpattern group. OP_KET is
 used for subpatterns that do not repeat indefinitely, OP_KETRMIN and
 OP_KETRMAX are used for indefinite repetitions, minimally or maximally
-respectively, and OP_KETRPOS for possessive repetitions (see below for more 
+respectively, and OP_KETRPOS for possessive repetitions (see below for more
 details). All four are followed by a LINK_SIZE value giving (as a positive
-number) the offset back to the matching bracket opcode.
+number) the offset back to the matching opening bracket opcode.
 
 If a subpattern is quantified such that it is permitted to match zero times, it
 is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
@@ -718,14 +730,25 @@ Assertions
 
 Forward assertions are also just like other subpatterns, but starting with one
 of the opcodes OP_ASSERT, OP_ASSERT_NA (non-atomic assertion), or
-OP_ASSERT_NOT. Backward assertions use the opcodes OP_ASSERTBACK, 
-OP_ASSERTBACK_NA, and OP_ASSERTBACK_NOT, and the first opcode inside the
-assertion is OP_REVERSE, followed by a count of the number of characters to
-move back the pointer in the subject string. In ASCII or UTF-32 mode, the count
-is also the number of code units, but in UTF-8/16 mode each character may
-occupy more than one code unit. A separate count is present in each alternative
-of a lookbehind assertion, allowing each branch to have a different (but fixed)
-length.
+OP_ASSERT_NOT.
+
+Backward assertions use the opcodes OP_ASSERTBACK, OP_ASSERTBACK_NA, and
+OP_ASSERTBACK_NOT. If all the branches of a backward assertion are of fixed
+length (not necessarily the same), the first opcode inside each branch is
+OP_REVERSE, followed by an IMM2_SIZE count of the number of characters to move
+back the pointer in the subject string, thus allowing each branch to have a
+different (but fixed) length.
+
+Variable-length backward assertions whose maximum matching length is limited
+are also supported. For such assertions, the first opcode inside each branch is
+OP_VREVERSE, followed by the minimum and maximum lengths for that branch,
+unless these happen to be equal, in which case OP_REVERSE is used. These
+IMM2_SIZE values occupy two code units each in 8-bit mode, and 1 code unit in
+16/32 bit modes.
+
+In ASCII or UTF-32 mode, the character counts in OP_REVERSE and OP_VREVERSE are
+also the number of code units, but in UTF-8/16 mode each character may occupy
+more than one code unit.
 
 
 Conditional subpatterns
@@ -827,4 +850,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
 opcode are the correct length, in order to catch updating errors.
 
 Philip Hazel
-12 July 2019
+November 2023

+ 3 - 3
regex.mod/pcre/LICENCE

@@ -26,7 +26,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2024 University of Cambridge
 All rights reserved.
 
 
@@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2024 Zoltan Herczeg
 All rights reserved.
 
 
@@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2024 Zoltan Herczeg
 All rights reserved.
 
 

+ 96 - 20
regex.mod/pcre/Makefile.am

@@ -46,6 +46,7 @@ dist_html_DATA = \
   doc/html/pcre2_general_context_free.html \
   doc/html/pcre2_get_error_message.html \
   doc/html/pcre2_get_mark.html \
+  doc/html/pcre2_get_match_data_heapframes_size.html \
   doc/html/pcre2_get_match_data_size.html \
   doc/html/pcre2_get_ovector_count.html \
   doc/html/pcre2_get_ovector_pointer.html \
@@ -82,6 +83,7 @@ dist_html_DATA = \
   doc/html/pcre2_set_heap_limit.html \
   doc/html/pcre2_set_match_limit.html \
   doc/html/pcre2_set_max_pattern_length.html \
+  doc/html/pcre2_set_max_varlookbehind.html \
   doc/html/pcre2_set_offset_limit.html \
   doc/html/pcre2_set_newline.html \
   doc/html/pcre2_set_parens_nest_limit.html \
@@ -142,6 +144,7 @@ dist_man_MANS = \
   doc/pcre2_general_context_free.3 \
   doc/pcre2_get_error_message.3 \
   doc/pcre2_get_mark.3 \
+  doc/pcre2_get_match_data_heapframes_size.3 \
   doc/pcre2_get_match_data_size.3 \
   doc/pcre2_get_ovector_count.3 \
   doc/pcre2_get_ovector_pointer.3 \
@@ -178,6 +181,7 @@ dist_man_MANS = \
   doc/pcre2_set_heap_limit.3 \
   doc/pcre2_set_match_limit.3 \
   doc/pcre2_set_max_pattern_length.3 \
+  doc/pcre2_set_max_varlookbehind.3 \
   doc/pcre2_set_offset_limit.3 \
   doc/pcre2_set_newline.3 \
   doc/pcre2_set_parens_nest_limit.3 \
@@ -290,8 +294,7 @@ src/pcre2.h.generic: src/pcre2.h.in configure.ac
 # config.status out of the way while doing the default configuration. The
 # resulting config.h is munged by perl to put #ifdefs round any #defines for
 # macros with values, and to #undef all boolean macros such as HAVE_xxx and
-# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings. Make
-# sure that PCRE2_EXP_DEFN is unset (in case it has visibility settings).
+# SUPPORT_xxx. We also get rid of any gcc-specific visibility settings.
 
 src/config.h.generic: configure.ac
 	rm -rf $@ _generic
@@ -302,9 +305,7 @@ src/config.h.generic: configure.ac
 	test -f _generic/src/config.h
 	perl -n \
 	  -e 'BEGIN{$$blank=0;}' \
-	  -e 'if(/PCRE2_EXP_DEFN/){print"/* #undef PCRE2_EXP_DEFN */\n";$$blank=0;next;}' \
-	  -e 'if(/to make a symbol visible/){next;}' \
-	  -e 'if(/__attribute__ \(\(visibility/){next;}' \
+	  -e 'if(/(.+?)\s*__attribute__ \(\(visibility/){print"$$1\n";$$blank=0;next;}' \
 	  -e 'if(/LT_OBJDIR/){print"/* This is ignored unless you are using libtool. */\n";}' \
 	  -e 'if(/^#define\s((?:HAVE|SUPPORT|STDC)_\w+)/){print"/* #undef $$1 */\n";$$blank=0;next;}' \
 	  -e 'if(/^#define\s(?!PACKAGE|VERSION)(\w+)/){print"#ifndef $$1\n$$_#endif\n";$$blank=0;next;}' \
@@ -351,6 +352,7 @@ NODIST_SOURCES = src/pcre2_chartables.c
 
 COMMON_SOURCES = \
   src/pcre2_auto_possess.c \
+  src/pcre2_chkdint.c \
   src/pcre2_compile.c \
   src/pcre2_config.c \
   src/pcre2_context.c \
@@ -382,6 +384,10 @@ COMMON_SOURCES = \
   src/pcre2_valid_utf.c \
   src/pcre2_xclass.c
 
+# The pcre2_ucptables.c file is #included by pcre2_tables.c
+
+EXTRA_DIST += src/pcre2_ucptables.c
+
 if WITH_PCRE2_8
 lib_LTLIBRARIES += libpcre2-8.la
 libpcre2_8_la_SOURCES = \
@@ -435,28 +441,37 @@ CLEANFILES += src/pcre2_chartables.c
 
 EXTRA_DIST += \
   src/sljit/sljitConfig.h \
+  src/sljit/sljitConfigCPU.h \
   src/sljit/sljitConfigInternal.h \
-  src/sljit/sljitExecAllocator.c \
   src/sljit/sljitLir.c \
   src/sljit/sljitLir.h \
   src/sljit/sljitNativeARM_32.c \
   src/sljit/sljitNativeARM_64.c \
   src/sljit/sljitNativeARM_T2_32.c \
+  src/sljit/sljitNativeLOONGARCH_64.c \
   src/sljit/sljitNativeMIPS_32.c \
   src/sljit/sljitNativeMIPS_64.c \
   src/sljit/sljitNativeMIPS_common.c \
   src/sljit/sljitNativePPC_32.c \
   src/sljit/sljitNativePPC_64.c \
   src/sljit/sljitNativePPC_common.c \
+  src/sljit/sljitNativeRISCV_32.c \
+  src/sljit/sljitNativeRISCV_64.c \
+  src/sljit/sljitNativeRISCV_common.c \
   src/sljit/sljitNativeS390X.c \
-  src/sljit/sljitNativeSPARC_32.c \
-  src/sljit/sljitNativeSPARC_common.c \
   src/sljit/sljitNativeX86_32.c \
   src/sljit/sljitNativeX86_64.c \
   src/sljit/sljitNativeX86_common.c \
-  src/sljit/sljitProtExecAllocator.c \
   src/sljit/sljitUtils.c \
-  src/sljit/sljitWXExecAllocator.c
+  src/sljit/allocator_src/sljitExecAllocatorApple.c \
+  src/sljit/allocator_src/sljitExecAllocatorCore.c \
+  src/sljit/allocator_src/sljitExecAllocatorFreeBSD.c \
+  src/sljit/allocator_src/sljitExecAllocatorPosix.c \
+  src/sljit/allocator_src/sljitExecAllocatorWindows.c \
+  src/sljit/allocator_src/sljitProtExecAllocatorNetBSD.c \
+  src/sljit/allocator_src/sljitProtExecAllocatorPosix.c \
+  src/sljit/allocator_src/sljitWXExecAllocatorPosix.c \
+  src/sljit/allocator_src/sljitWXExecAllocatorWindows.c
 
 # Some of the JIT sources are also in separate files that are #included.
 
@@ -504,7 +519,7 @@ if WITH_PCRE2_8
 lib_LTLIBRARIES += libpcre2-posix.la
 libpcre2_posix_la_SOURCES = src/pcre2posix.c
 libpcre2_posix_la_CFLAGS = \
-  -DPCRE2_CODE_UNIT_WIDTH=8 \
+  -DPCRE2_CODE_UNIT_WIDTH=8 @PCRE2POSIX_CFLAG@ \
   $(VISIBILITY_CFLAGS) $(AM_CFLAGS)
 libpcre2_posix_la_LDFLAGS = $(EXTRA_LIBPCRE2_POSIX_LDFLAGS)
 libpcre2_posix_la_LIBADD = libpcre2-8.la
@@ -525,30 +540,77 @@ if WITH_GCOV
 pcre2grep_CFLAGS += $(GCOV_CFLAGS)
 pcre2grep_LDADD += $(GCOV_LIBS)
 endif # WITH_GCOV
+endif # WITH_PCRE2_8
 
 ## If fuzzer support is enabled, build a non-distributed library containing the
 ## fuzzing function. Also build the standalone checking binary from the same
 ## source but using -DSTANDALONE.
 
 if WITH_FUZZ_SUPPORT
-noinst_LIBRARIES = .libs/libpcre2-fuzzsupport.a
+noinst_LIBRARIES =
+if WITH_PCRE2_8
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport.a
 _libs_libpcre2_fuzzsupport_a_SOURCES = src/pcre2_fuzzsupport.c
 _libs_libpcre2_fuzzsupport_a_CFLAGS = $(AM_CFLAGS)
 _libs_libpcre2_fuzzsupport_a_LIBADD =
 
-noinst_PROGRAMS += pcre2fuzzcheck
-pcre2fuzzcheck_SOURCES = src/pcre2_fuzzsupport.c
-pcre2fuzzcheck_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
-pcre2fuzzcheck_LDADD = libpcre2-8.la
+noinst_PROGRAMS += pcre2fuzzcheck-8
+pcre2fuzzcheck_8_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_8_CFLAGS = -DSTANDALONE $(AM_CFLAGS)
+pcre2fuzzcheck_8_LDADD = libpcre2-8.la
 if WITH_GCOV
-pcre2fuzzcheck_CFLAGS += $(GCOV_CFLAGS)
-pcre2fuzzcheck_LDADD += $(GCOV_LIBS)
+pcre2fuzzcheck_8_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_8_LDADD += $(GCOV_LIBS)
 endif # WITH_GCOV
-endif # WITH FUZZ_SUPPORT
 endif # WITH_PCRE2_8
 
+if WITH_PCRE2_16
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-16.a
+_libs_libpcre2_fuzzsupport_16_a_SOURCES = src/pcre2_fuzzsupport.c
+_libs_libpcre2_fuzzsupport_16_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
+_libs_libpcre2_fuzzsupport_16_a_LIBADD =
+
+noinst_PROGRAMS += pcre2fuzzcheck-16
+pcre2fuzzcheck_16_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_16_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=16
+pcre2fuzzcheck_16_LDADD = libpcre2-16.la
+if WITH_GCOV
+pcre2fuzzcheck_16_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_16_LDADD += $(GCOV_LIBS)
+endif # WITH_GCOV
+endif # WITH_PCRE2_16
+
+if WITH_PCRE2_32
+noinst_LIBRARIES += .libs/libpcre2-fuzzsupport-32.a
+_libs_libpcre2_fuzzsupport_32_a_SOURCES = src/pcre2_fuzzsupport.c
+_libs_libpcre2_fuzzsupport_32_a_CFLAGS = $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
+_libs_libpcre2_fuzzsupport_32_a_LIBADD =
+
+noinst_PROGRAMS += pcre2fuzzcheck-32
+pcre2fuzzcheck_32_SOURCES = src/pcre2_fuzzsupport.c
+pcre2fuzzcheck_32_CFLAGS = -DSTANDALONE $(AM_CFLAGS) -DPCRE2_CODE_UNIT_WIDTH=32
+pcre2fuzzcheck_32_LDADD = libpcre2-32.la
+if WITH_GCOV
+pcre2fuzzcheck_32_CFLAGS += $(GCOV_CFLAGS)
+pcre2fuzzcheck_32_LDADD += $(GCOV_LIBS)
+endif # WITH_GCOV
+endif # WITH_PCRE2_32
+
+endif # WITH_FUZZ_SUPPORT
+
 ## -------- Testing ----------
 
+## If the 8-bit library is enabled, build the POSIX wrapper test program and
+## arrange for it to run.
+
+if WITH_PCRE2_8
+TESTS += pcre2posix_test
+noinst_PROGRAMS += pcre2posix_test
+pcre2posix_test_SOURCES = src/pcre2posix_test.c
+pcre2posix_test_CFLAGS = $(AM_CFLAGS) @PCRE2POSIX_CFLAG@
+pcre2posix_test_LDADD = libpcre2-posix.la libpcre2-8.la
+endif # WITH_PCRE2_8
+
 ## If JIT support is enabled, arrange for the JIT test program to run.
 
 if WITH_JIT
@@ -627,15 +689,23 @@ EXTRA_DIST += \
   testdata/grepinput \
   testdata/grepinput3 \
   testdata/grepinput8 \
+  testdata/grepinputC.bz2 \
+  testdata/grepinputC.gz \
   testdata/grepinputM \
   testdata/grepinputv \
   testdata/grepinputx \
   testdata/greplist \
+  testdata/grepnot.bz2 \
   testdata/grepoutput \
   testdata/grepoutput8 \
   testdata/grepoutputC \
   testdata/grepoutputCN \
+  testdata/grepoutputCNU \
+  testdata/grepoutputCU \
+  testdata/grepoutputCbz2 \
+  testdata/grepoutputCgz \
   testdata/grepoutputN \
+  testdata/grepoutputUN \
   testdata/greppatN4 \
   testdata/testbtables \
   testdata/testinput1 \
@@ -663,7 +733,9 @@ EXTRA_DIST += \
   testdata/testinput23 \
   testdata/testinput24 \
   testdata/testinput25 \
+  testdata/testinput26 \
   testdata/testinputEBC \
+  testdata/testinputheap \
   testdata/testoutput1 \
   testdata/testoutput2 \
   testdata/testoutput3 \
@@ -705,7 +777,11 @@ EXTRA_DIST += \
   testdata/testoutput23 \
   testdata/testoutput24 \
   testdata/testoutput25 \
+  testdata/testoutput26 \
   testdata/testoutputEBC \
+  testdata/testoutputheap-16 \
+  testdata/testoutputheap-32 \
+  testdata/testoutputheap-8 \
   testdata/valgrind-jit.supp \
   testdata/wintestinput3 \
   testdata/wintestoutput3 \
@@ -732,7 +808,7 @@ CLEANFILES += \
 ## ------------ End of testing -------------
 
 
-# PCRE2 demonstration program. Not built automatcally. The point is that the
+# PCRE2 demonstration program. Not built automatically. The point is that the
 # users should build it themselves. So just distribute the source.
 
 EXTRA_DIST += src/pcre2demo.c

File diff suppressed because it is too large
+ 417 - 122
regex.mod/pcre/Makefile.in


+ 103 - 0
regex.mod/pcre/NEWS

@@ -2,6 +2,109 @@ News about PCRE2 releases
 -------------------------
 
 
+Version 10.43 16-February-2024
+------------------------------
+
+There are quite a lot of changes in this release (see ChangeLog and git log for
+a list). Those that are not bugfixes or code tidies are:
+
+* The JIT code no longer supports ARMv5 architecture.
+
+* A new function pcre2_get_match_data_heapframes_size() for finer heap control.
+
+* New option flags to restrict the interaction between ASCII and non-ASCII
+  characters for caseless matching and \d and friends. There are also new
+  pattern constructs to control these flags from within a pattern.
+
+* Upgrade to Unicode 15.0.0.
+
+* Treat a NULL pattern with zero length as an empty string.
+
+* Added support for limited-length variable-length lookbehind assertions, with
+  a default maximum length of 255 characters (same as Perl) but with a function
+  to adjust the limit.
+
+* Support for LoongArch in JIT.
+
+* Perl changed the meaning of (for example) {,3} which did not used to be
+  recognized as a quantifier. Now it means {0,3} and PCRE2 has also changed.
+  Note that {,} is still not a quantifier.
+
+* Following Perl, allow spaces and tabs after { and before } in all Perl-
+  compatible items that use braces, and also around commas in quantifiers. The
+  one exception in PCRE2 is \u{...}, which is from ECMAScript, not Perl, and
+  PCRE2 follows ECMAScript usage.
+
+* Changed the meaning of \w and its synonyms and derivatives (\b and \B) in UCP
+  mode to follow Perl. It now matches characters whose general categories are L
+  or N or whose particular categories are Mn (non-spacing mark) or Pc
+  (combining punctuation).
+
+* Changed the default meaning of [:xdigit:] in UCP mode to follow Perl. It now
+  matches the "fullwidth" versions of hex digits. PCRE2_EXTRA_ASCII_DIGIT can
+  be used to keep it ASCII only.
+
+* Make PCRE2_UCP the default in UTF mode in pcre2grep and add -no_ucp,
+  --case-restrict and --posix-digit.
+
+* Add --group-separator and --no-group-separator to pcre2grep.
+
+
+Version 10.42 11-December-2022
+------------------------------
+
+This is an unexpectedly early release to fix a problem that was introduced in
+10.41. ChangeLog number 19 (GitHub #139) added the default definition of
+PCRE2_CALL_CONVENTION to pcre2posix.c instead of pcre2posix.h, which meant that
+programs including pcre2posix.h but not pcre2.h couldn't compile. A new test
+that checks this case has been added.
+
+A couple of other minor issues are also fixed, and a patch for an intermittent
+JIT fault is also included. See ChangeLog and the Git log.
+
+
+Version 10.41 06-December-2022
+------------------------------
+
+This is another mainly bug-fixing and code-tidying release. There is one
+significant upgrade to pcre2grep: it now behaves like GNU grep when matching
+more than one pattern and a later pattern matches at an earlier point in the
+subject when the matched substrings are being identified by colour or by
+offsets.
+
+
+Version 10.40 15-April-2022
+---------------------------
+
+This is mostly a bug-fixing and code-tidying release. However, there are some
+extensions to Unicode property handling:
+
+* Added support for Bidi_Class and a number of binary Unicode properties,
+including Bidi_Control.
+
+* A number of changes to script matching for \p and \P:
+
+  (a) Script extensions for a character are now coded as a bitmap instead of
+      a list of script numbers, which should be faster and does not need a
+      loop.
+
+  (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms
+      sc and scx).
+
+  (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being
+      the same as \p{scx:scriptname} because this change happened in Perl at
+      release 5.26.
+
+  (d) The standard Unicode 4-letter abbreviations for script names are now
+      recognized.
+
+  (e) In accordance with Unicode and Perl's "loose matching" rules, spaces,
+      hyphens, and underscores are ignored in property names, which are then
+      matched independent of case.
+
+As always, see ChangeLog for a list of all changes (also the Git log).
+
+
 Version 10.39 29-October-2021
 -----------------------------
 

+ 41 - 30
regex.mod/pcre/NON-AUTOTOOLS-BUILD

@@ -4,7 +4,7 @@ Building PCRE2 without using autotools
 This document contains the following sections:
 
   General
-  Generic instructions for the PCRE2 C library
+  Generic instructions for the PCRE2 C libraries
   Stack size in Windows environments
   Linking programs in Windows environments
   Calling conventions in Windows environments
@@ -17,9 +17,9 @@ This document contains the following sections:
 
 GENERAL
 
-The basic PCRE2 library consists entirely of code written in Standard C, and so
-should compile successfully on any system that has a Standard C compiler and
-library.
+The source of the PCRE2 libraries consists entirely of code written in Standard
+C, and so should compile successfully on any system that has a Standard C
+compiler and library.
 
 The PCRE2 distribution includes a "configure" file for use by the
 configure/make (autotools) build system, as found in many Unix-like
@@ -36,21 +36,25 @@ provided for those who build PCRE2 without using "configure" or CMake. If you
 use "configure" or CMake, the .generic versions are not used.
 
 
-GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
+GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES
 
-The following are generic instructions for building the PCRE2 C library "by
-hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section. Note that the settings concerned with
-8-bit, 16-bit, and 32-bit code units relate to the type of data string that
-PCRE2 processes. They are NOT referring to the underlying operating system bit
-width. You do not have to do anything special to compile in a 64-bit
-environment, for example.
+There are three possible PCRE2 libraries, each handling data with a specific
+code unit width: 8, 16, or 32 bits. You can build any combination of them. The
+following are generic instructions for building a PCRE2 C library "by hand". If
+you are going to use CMake, this section does not apply to you; you can skip
+ahead to the CMake section. Note that the settings concerned with 8-bit,
+16-bit, and 32-bit code units relate to the type of data string that PCRE2
+processes. They are NOT referring to the underlying operating system bit width.
+You do not have to do anything special to compile in a 64-bit environment, for
+example.
 
  (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
      macro settings that it contains to whatever is appropriate for your
      environment. In particular, you can alter the definition of the NEWLINE
      macro to specify what character(s) you want to be interpreted as line
-     terminators by default.
+     terminators by default. You need to #define at least one of
+     SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which
+     libraries you are going to build. You must set all that apply.
 
      When you subsequently compile any of the PCRE2 modules, you must specify
      -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
@@ -69,7 +73,7 @@ environment, for example.
      Note also that the src/config.h.generic file is created from a config.h
      that was generated by Autotools, which automatically includes settings of
      a number of macros that are not actually used by PCRE2 (for example,
-     HAVE_MEMORY_H).
+     HAVE_DLFCN_H).
 
  (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
 
@@ -97,6 +101,7 @@ environment, for example.
      or else use other -D settings to change the configuration as required.
 
        pcre2_auto_possess.c
+       pcre2_chkdint.c
        pcre2_chartables.c
        pcre2_compile.c
        pcre2_config.c
@@ -135,29 +140,31 @@ environment, for example.
      pcre2_jit_compile.c #includes other files from the sljit subdirectory,
      all of whose names begin with "sljit". It also #includes
      src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
-     these yourself.
+     those yourself.
 
      Note also that the pcre2_fuzzsupport.c file contains special code that is
      useful to those who want to run fuzzing tests on the PCRE2 library. Unless
      you are doing that, you can ignore it.
 
  (5) Now link all the compiled code into an object library in whichever form
-     your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
-     If your system has static and shared libraries, you may have to do this
-     once for each type.
+     your system keeps such libraries. This is the PCRE2 C 8-bit library,
+     typically called something like libpcre2-8. If your system has static and
+     shared libraries, you may have to do this once for each type.
 
  (6) If you want to build a library that supports 16-bit or 32-bit code units,
-     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
-     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+     set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4
+     above. If you want to build more than one PCRE2 library, repeat steps 4
+     and 5 as necessary.
 
  (7) If you want to build the POSIX wrapper functions (which apply only to the
      8-bit library), ensure that you have the src/pcre2posix.h file and then
      compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
-     library.
+     library. If targeting a DLL in Windows, make sure to include
+     -DPCRE2POSIX_SHARED with your compiler flags.
 
  (8) The pcre2test program can be linked with any combination of the 8-bit,
-     16-bit and 32-bit libraries (depending on what you selected in
-     src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
+     16-bit and 32-bit libraries (depending on what you specfied in
+     src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
      necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
      appropriate library/ies. If you compiled an 8-bit library, pcre2test also
      needs the pcre2posix wrapper library.
@@ -185,9 +192,13 @@ environment, for example.
      the RunTest script. You might also like to build and run the freestanding
      JIT test program, src/pcre2_jit_test.c.
 
-(11) If you want to use the pcre2grep command, compile and link
-     src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
-     need the pcre2posix library). If you have built the PCRE2 library with JIT
+(11) The pcre2test program tests the POSIX wrapper library, but there is also a
+     freestanding test program in src/pcre2posix_test.c. It must be linked with
+     both the pcre2posix library and the 8-bit PCRE2 library.
+
+(12) If you want to use the pcre2grep command, compile and link
+     src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need
+     the pcre2posix library). If you have built the PCRE2 library with JIT
      support by defining SUPPORT_JIT in src/config.h, you can also define
      SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
      it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
@@ -211,7 +222,7 @@ CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
 
 It is possible to compile programs to use different calling conventions using
 MSVC. Search the web for "calling conventions" for more information. To make it
-easier to change the calling convention for the exported functions in the
+easier to change the calling convention for the exported functions in a
 PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
 definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
 not set, it defaults to empty; the default calling convention is then used
@@ -306,7 +317,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
     source dir. For example, C:\pcre2\pcre2-xx\build.
 
-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
     Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
     to start Cmake from the Windows Start menu, as this can lead to errors.
 
@@ -373,7 +384,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
    have been created.
 
-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
    the pcre2 source (wherein which the testdata folder resides), e.g.:
 
    set srcdir=C:\pcre2\pcre2-10.00
@@ -406,5 +417,5 @@ z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.
 
 ===========================
-Last Updated: 28 April 2021
+Last Updated: 15 April 2023
 ===========================

+ 22 - 1
regex.mod/pcre/PrepareRelease

@@ -92,24 +92,44 @@ done
 
 echo "Making pcre2demo.3"
 perl <<"END" >pcre2demo.3
+  use Time::Piece;
+  open(VH, "<", "../src/config.h.generic") || die "Failed to open src/config.h.generic\n";
   open(IN, "../src/pcre2demo.c") || die "Failed to open src/pcre2demo.c\n";
   open(OUT, ">pcre2demo.3") || die "Failed to open pcre2demo.3\n";
-  print OUT ".\\\" Start example.\n" .
+  my $version;
+  while (<VH>)
+  {
+    chomp;
+    if ( /^#define PACKAGE_STRING "([^"]+)"/ ) { $version = $1 ; last }
+  }
+  my $t = localtime;
+  print OUT ".TH PCRE2DEMO 3 \"", $t->strftime('%e %B %Y'), '" "', $version, "\"\n" .
+            ".\\\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!\n" .
+            ".SH NAME\n" .
+            "PCRE2DEMO - A demonstration C program for PCRE2\n" .
+            ".SH \"SOURCE CODE\"\n" .
+            ".rs\n" .
+            ".sp\n" .
+            ".\\\" Start example.\n" .
             ".de EX\n" .
+	    ".	do ds mF \\\\n[.fam]\n" .
             ".  nr mE \\\\n(.f\n" .
             ".  nf\n" .
             ".  nh\n" .
+	    ".	do fam C\n" .
             ".  ft CW\n" .
             "..\n" .
             ".\n" .
             ".\n" .
             ".\\\" End example.\n" .
             ".de EE\n" .
+	    ".	do fam \\\\*(mF\n" .
             ".  ft \\\\n(mE\n" .
             ".  fi\n" .
             ".  hy \\\\n(HY\n" .
             "..\n" .
             ".\n" .
+            ".RS -7\n" .
             ".EX\n" ;
   while (<IN>)
     {
@@ -147,6 +167,7 @@ for file in *.3 ; do
   if [ `expr $base : '.*_'` -ne 0 ] ; then toc="" ; fi
   if [ "$base" = "pcre2sample" ]  || \
      [ "$base" = "pcre2compat" ]  || \
+     [ "$base" = "pcre2demo" ]    || \
      [ "$base" = "pcre2limits" ]  || \
      [ "$base" = "pcre2unicode" ] ; then
     toc=""

+ 77 - 35
regex.mod/pcre/README

@@ -8,7 +8,7 @@ features, and the internals have been improved. The original PCRE1 library is
 now obsolete and no longer maintained. The latest release of PCRE2 is available
 in .tar.gz, tar.bz2, or .zip form from this GitHub repository:
 
-https://github.com/PhilipHazel/pcre2/releases
+https://github.com/PCRE2Project/pcre2/releases
 
 There is a mailing list for discussion about the development of PCRE2 at
 [email protected]. You can subscribe by sending an email to
@@ -17,7 +17,7 @@ [email protected].
 You can access the archives and also subscribe or manage your subscription
 here:
 
-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev
 
 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@@ -114,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.
 
-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.
 
+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@@ -151,7 +157,18 @@ library. They are also documented in the pcre2build man page.
   --disable-shared
   --disable-static
 
-  (See also "Shared libraries on Unix-like systems" below.)
+  Setting --disable-shared ensures that PCRE2 libraries are built as static
+  libraries. The binaries that are then created as part of the build process
+  (for example, pcre2test and pcre2grep) are linked statically with one or more
+  PCRE2 libraries, but may also be dynamically linked with other libraries such
+  as libc. If you want these binaries to be fully statically linked, you can
+  set LDFLAGS like this:
+
+  LDFLAGS=--static ./configure --disable-shared
+
+  Note the two hyphens in --static. Of course, this works only if static
+  versions of all the relevant libraries are available for linking. See also
+  "Shared libraries" below.
 
 . By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
   the "configure" command, the 16-bit library is also built. If you add
@@ -188,10 +205,10 @@ library. They are also documented in the pcre2build man page.
 
   As well as supporting UTF strings, Unicode support includes support for the
   \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
 
 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
   of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@@ -265,6 +282,17 @@ library. They are also documented in the pcre2build man page.
   performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
   link size setting is ignored, as 4-byte offsets are always used.
 
+. Lookbehind assertions in which one or more branches can match a variable
+  number of characters are supported only if there is a maximum matching length
+  for each top-level branch. There is a limit to this maximum that defaults to
+  255 characters. You can alter this default by a setting such as
+
+  --with-max-varlookbehind=100
+
+  The limit can be changed at runtime by calling pcre2_set_max_varlookbehind().
+  Lookbehind assertions in which every branch matches a fixed number of
+  characters (not necessarily all the same) are not constrained by this limit.
+
 . For speed, PCRE2 uses four tables for manipulating and identifying characters
   whose code point values are less than 256. By default, it uses a set of
   tables for ASCII encoding that is part of the distribution. If you specify
@@ -363,16 +391,16 @@ library. They are also documented in the pcre2build man page.
   avoided by linking with libedit (which has a BSD licence) instead.
 
   Enabling libreadline causes the -lreadline option to be added to the
-  pcre2test build. In many operating environments with a sytem-installed
+  pcre2test build. In many operating environments with a system-installed
   readline library this is sufficient. However, in some environments (e.g. if
   an unmodified distribution version of readline is in use), it may be
   necessary to specify something like LIBS="-lncurses" as well. This is
   because, to quote the readline INSTALL, "Readline uses the termcap functions,
   but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
-  If you get error messages about missing functions tgetstr, tgetent, tputs,
-  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
-  should fix it.
+  applications which link with readline the option to choose an appropriate
+  library." If you get error messages about missing functions tgetstr, tgetent,
+  tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses
+  library should fix it.
 
 . The C99 standard defines formatting modifiers z and t for size_t and
   ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
@@ -394,24 +422,24 @@ library. They are also documented in the pcre2build man page.
   Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
   be created. This is normally run under valgrind or used when PCRE2 is
   compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.
 
 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
   which caused pcre2_match() to use individual blocks on the heap for
   backtracking instead of recursive function calls (which use the stack). This
-  is now obsolete since pcre2_match() was refactored always to use the heap (in
-  a much more efficient way than before). This option is retained for backwards
-  compatibility, but has no effect other than to output a warning.
+  is now obsolete because pcre2_match() was refactored always to use the heap
+  (in a much more efficient way than before). This option is retained for
+  backwards compatibility, but has no effect other than to output a warning.
 
 The "configure" script builds the following files for the basic C library:
 
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                          that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@@ -432,8 +460,9 @@ Once "configure" has run, you can run "make". This builds whichever of the
 libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
 program called pcre2test. If you enabled JIT support with --enable-jit, another
 test program called pcre2_jit_test is built as well. If the 8-bit library is
-built, libpcre2-posix and the pcre2grep command are also built. Running
-"make" with the -j option may speed up compilation on multiprocessor systems.
+built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also
+built. Running "make" with the -j option may speed up compilation on
+multiprocessor systems.
 
 The command "make check" runs all the appropriate tests. Details of the PCRE2
 tests are given below in a separate section of this document. The -j option of
@@ -542,7 +571,10 @@ configuring it. For example:
 ./configure --prefix=/usr/gnu --disable-shared
 
 Then run "make" in the usual way. Similarly, you can use --disable-static to
-build only shared libraries.
+build only shared libraries. Note, however, that when you build only static
+libraries, binary programs such as pcre2test and pcre2grep may still be
+dynamically linked with other libraries (for example, libc) unless you set
+LDFLAGS to --static when running "configure".
 
 
 Cross-compiling using autotools
@@ -571,9 +603,9 @@ at build time" for more details.
 Making new tarballs
 -------------------
 
-The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
-The command "make distcheck" does the same, but then does a trial build of the
-new distribution to ensure that it works.
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.
 
 If you have modified any of the man page sources in the doc directory, you
 should first run the PrepareRelease script before making a distribution. This
@@ -585,9 +617,11 @@ Testing PCRE2
 
 To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
 There is another script called RunGrepTest that tests the pcre2grep command.
-When JIT support is enabled, a third test program called pcre2_jit_test is
-built. Both the scripts and all the program tests are run if you obey "make
-check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
+When the 8-bit library is built, a test program for the POSIX wrapper, called
+pcre2posix_test, is compiled, and when JIT support is enabled, a test program
+called pcre2_jit_test is built. The scripts and the program tests are all run
+when you obey "make check". For other environments, see the instructions in
+NON-AUTOTOOLS-BUILD.
 
 The RunTest script runs the pcre2test test program (which is documented in its
 own man page) on each of the relevant testinput files in the testdata
@@ -602,13 +636,13 @@ is available. RunTest outputs a comment when it skips a test.
 
 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.
 
 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.
 
-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:
 
@@ -689,7 +723,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.
 
 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.
 
 Test 16 is run only when JIT support is not available. It checks that an
@@ -710,6 +744,9 @@ and with UTF support, respectively. Test 23 tests \C when it is locked out.
 Tests 24 and 25 test the experimental pattern conversion functions, without and
 with UTF support, respectively.
 
+Test 26 checks Unicode property support using tests that are generated
+automatically from the Unicode data tables.
+
 
 Character tables
 ----------------
@@ -788,6 +825,7 @@ The distribution should contain the files listed below.
 
   src/pcre2posix.c         )
   src/pcre2_auto_possess.c )
+  src/pcre2_chkdint.c      )
   src/pcre2_compile.c      )
   src/pcre2_config.c       )
   src/pcre2_context.c      )
@@ -813,6 +851,7 @@ The distribution should contain the files listed below.
   src/pcre2_substring.c    )
   src/pcre2_tables.c       )
   src/pcre2_ucd.c          )
+  src/pcre2_ucptables.c    )
   src/pcre2_valid_utf.c    )
   src/pcre2_xclass.c       )
 
@@ -824,6 +863,8 @@ The distribution should contain the files listed below.
   src/pcre2posix.h         header for the external POSIX wrapper API
   src/pcre2_internal.h     header for internal use
   src/pcre2_intmodedep.h   a mode-specific internal header
+  src/pcre2_jit_neon_inc.h header used by JIT
+  src/pcre2_jit_simd_inc.h header used by JIT
   src/pcre2_ucp.h          header for Unicode property handling
 
   sljit/*                  source files for the JIT compiler
@@ -834,6 +875,7 @@ The distribution should contain the files listed below.
   src/pcre2grep.c          source of a grep utility that uses PCRE2
   src/pcre2test.c          comprehensive test program
   src/pcre2_jit_test.c     JIT test program
+  src/pcre2posix_test.c    POSIX wrapper API test program
 
 (C) Auxiliary files:
 
@@ -905,4 +947,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: Philip.Hazel
 Email domain: gmail.com
-Last updated: 29 October 2021
+Last updated: 24 November 2023

+ 282 - 36
regex.mod/pcre/RunGrepTest

@@ -68,6 +68,27 @@ diff -b  /dev/null /dev/null 2>/dev/null && cf="diff -b"
 diff -u  /dev/null /dev/null 2>/dev/null && cf="diff -u"
 diff -ub /dev/null /dev/null 2>/dev/null && cf="diff -ub"
 
+# Add a -a (always treat as text) if available. This was added in an attempt
+# to get more detail from an Alpine Linux test failure on GitHub.
+
+$cf -a /dev/null /dev/null 2>/dev/null && cf="$cf -a"
+
+# Some tests involve NUL characters. It seems impossible to handle them easily
+# in many operating systems. An earlier version of this script used sed to
+# translate NUL into the string ZERO, but this didn't work on Solaris (aka
+# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
+# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
+# even when using GNU sed. A user suggested using tr instead, which
+# necessitates translating to a single character. However, on (some versions
+# of?) Solaris, the normal "tr" cannot handle binary zeros, but if
+# /usr/xpg4/bin/tr is available, it can do so, so test for that.
+
+if [ -x /usr/xpg4/bin/tr ] ; then
+  tr=/usr/xpg4/bin/tr
+else
+  tr=tr
+fi
+
 # If this test is being run from "make check", $srcdir will be set. If not, set
 # it to the current or parent directory, whichever one contains the test data.
 # Subsequently, we run most of the pcre2grep tests in the source directory so
@@ -254,7 +275,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include='grepinput[^C]' --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
@@ -296,7 +317,10 @@ echo "---------------------------- Test 45 ------------------------------" >>tes
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 46 ------------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -e 'unopened)' -e abc ./testdata/grepinput) >>testtrygrep 2>&1
 (cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e '(unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep --regex=123 -eabc -e xyz -e '[unclosed' ./testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 47 ------------------------------" >>testtrygrep
@@ -339,11 +363,11 @@ echo "---------------------------- Test 55 -----------------------------" >>test
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 56 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -c lazy ./testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -c --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 57 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -c -l lazy ./testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -c -l --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 58 -----------------------------" >>testtrygrep
@@ -367,7 +391,7 @@ echo "---------------------------- Test 62 -----------------------------" >>test
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 63 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1000 --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $pcre2grep --recursion-limit=1K --no-jit -M 'This is a file(.|\R)*file.' ./testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 64 ------------------------------" >>testtrygrep
@@ -506,25 +530,25 @@ echo "---------------------------- Test 95 -----------------------------" >>test
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinputM 'fox' ./test* | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinput[MC] 'fox' ./test* | sort) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
 echo "grepinput$" >testtemp1grep
 echo "grepinput8" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
 echo "grepinput$" >testtemp1grep
 echo "grepinput8" >>testtemp1grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinputM --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinput[MC] --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
 echo "grepinput$" >testtemp1grep
 echo "grepinput8" >testtemp2grep
-(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinput[MC] --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
@@ -566,7 +590,7 @@ echo "---------------------------- Test 108 ------------------------------" >>te
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 109 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -cq --exclude=grepinputC lazy ./testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 110 -----------------------------" >>testtrygrep
@@ -582,27 +606,27 @@ echo "---------------------------- Test 112 -----------------------------" >>tes
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 113 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep --total-count 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --total-count --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 114 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -tc 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -tc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 115 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -tlc 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -tlc --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinputM -th 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinput[MC] -th 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -tch 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -tch --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 118 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -tL 'the' testdata/grepinput*) >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -tL --exclude=grepinputC 'the' testdata/grepinput*) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 119 -----------------------------" >>testtrygrep
@@ -613,6 +637,18 @@ echo "RC=$?" >>testtrygrep
 echo "---------------------------- Test 120 ------------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -HO '$0:$2$1$3' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
 echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m 1 -O '$0:$a$b$e$f$r$t$v' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HO '${X}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HO 'XX$' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{12345678}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -O '$x{123Z' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --output '$x{1234}' '(\w+) binary (\w+)(\.)?' ./testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 121 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -F '\E and (regex)' testdata/grepinputv) >>testtrygrep
@@ -646,19 +682,24 @@ $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?<=\K[ac])' t
 echo "RC=$?" >>testtrygrep
 $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep
 echo "RC=$?" >>testtrygrep
+GREP_COLORS='ms=1;20' $valgrind $vjs $pcre2grep --colour=always --allow-lookaround-bsk '(?=[ac]\K)' testNinputgrep >>testtrygrep
+echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 126 -----------------------------" >>testtrygrep
 printf 'Next line pattern has binary zero\nABC\0XYZ\n' >testtemp1grep
 printf 'ABC\0XYZ\nABCDEF\nDEFABC\n' >testtemp2grep
 $valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep
 echo "RC=$?" >>testtrygrep
+printf 'Next line pattern is erroneous.\n^abc)(xy' >testtemp1grep
+$valgrind $vjs $pcre2grep -a -f testtemp1grep testtemp2grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 127 -----------------------------" >>testtrygrep
 (cd $srcdir; $valgrind $vjs $pcre2grep -o --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 128 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1M -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 129 -----------------------------" >>testtrygrep
@@ -674,13 +715,153 @@ echo "---------------------------- Test 131 -----------------------------" >>tes
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 132 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 
 echo "---------------------------- Test 133 -----------------------------" >>testtrygrep
-(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
+(cd $srcdir; exec 3<testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 134 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1 -nH -O '=$x{41}$x423$o{103}$o1045=' 'fox' -) <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 135 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -lZ 'word' ./testdata/grepinputv ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -A 1 -B 1 -HZ 'word' ./testdata/grepinputv) | $tr '\000' '@' >>testtrygrep
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -MHZn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 136 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -m1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --max-count=1MK -o1 --om-capture=0 'pattern()()()()' testdata/grepinput) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 137 -----------------------------" >>testtrygrep
+printf 'Last line\nhas no newline' >testtemp1grep
+$valgrind $vjs $pcre2grep -A1 Last testtemp1grep >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 138 -----------------------------" >>testtrygrep
+printf 'AbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\nAbC\n' >testtemp1grep
+$valgrind $vjs $pcre2grep --no-jit --heap-limit=0 b testtemp1grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 139 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --line-buffered 'fox' testdata/grepinputv) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 140 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --buffer-size=10 -A1 'brown' testdata/grepinputv) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 141 -----------------------------" >>testtrygrep
+printf "$srcdir/testdata/grepinputv\n-\n" >testtemp1grep
+printf 'This is a line from stdin.' >testtemp2grep
+$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" <testtemp2grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 142 -----------------------------" >>testtrygrep
+printf "/does/not/exist\n" >testtemp1grep
+printf 'This is a line from stdin.' >testtemp2grep
+$valgrind $vjs $pcre2grep --file-list testtemp1grep "line from stdin" >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 143 -----------------------------" >>testtrygrep
+printf 'fox|cat' >testtemp1grep
+$valgrind $vjs $pcre2grep -f - $srcdir/testdata/grepinputv <testtemp1grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 144 -----------------------------" >>testtrygrep
+$valgrind $vjs $pcre2grep -f /non/exist $srcdir/testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 145 -----------------------------" >>testtrygrep
+printf '*meta*\rdog.' >testtemp1grep
+$valgrind $vjs $pcre2grep -Ncr -F -f testtemp1grep $srcdir/testdata/grepinputv >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 146 -----------------------------" >>testtrygrep
+printf 'A123B' >testtemp1grep
+$valgrind $vjs $pcre2grep -H -e '123|fox' - <testtemp1grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep -h -e '123|fox' - $srcdir/testdata/grepinputv <testtemp1grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep - $srcdir/testdata/grepinputv <testtemp1grep >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 147 -----------------------------" >>testtrygrep
+$valgrind $vjs $pcre2grep -e '123|fox' -- -nonfile >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 148 -----------------------------" >>testtrygrep
+$valgrind $vjs $pcre2grep --nonexist >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep -n-n-bad >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --context >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --only-matching --output=xx >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --colour=badvalue >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --newline=badvalue >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep -d badvalue >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep -D badvalue >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --buffer-size=0 >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --exclude '(badpat' abc /dev/null >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --exclude-from /non/exist abc /dev/null >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --include-from /non/exist abc /dev/null >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+$valgrind $vjs $pcre2grep --file-list=/non/exist abc /dev/null >>testtrygrep 2>&1
 echo "RC=$?" >>testtrygrep
 
+echo "---------------------------- Test 149 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=binary "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --binary-files=wrong "dog" ./testdata/grepbinary) >>testtrygrep 2>&1
+echo "RC=$?" >>testtrygrep
+
+# This test runs the code that tests locale support. However, on some systems
+# (e.g. Alpine Linux) there is no locale support and running this test just
+# generates a "no match" result. Therefore, we test for locale support, and if
+# it is found missing, we pretend that the test has run as expected so that the
+# output matches.
+
+echo "---------------------------- Test 150 -----------------------------" >>testtrygrep
+which locale >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+  echo "pcre2grep: Failed to set locale badlocale (obtained from LC_CTYPE)" >>testtrygrep
+  echo "RC=2" >>testtrygrep
+else
+
+  (cd $srcdir; unset LC_ALL; env LC_CTYPE=badlocale $valgrind $vjs $pcre2grep abc /dev/null) >>testtrygrep 2>&1
+  echo "RC=$?" >>testtrygrep
+fi
+
+echo "---------------------------- Test 151 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep --colour=always -e this -e The -e 'The wo' testdata/grepinputv) >>testtrygrep
+
+echo "---------------------------- Test 152 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --group-separator='++' 'four' ./testdata/grepinputx) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+echo "---------------------------- Test 153 -----------------------------" >>testtrygrep
+(cd $srcdir; $valgrind $vjs $pcre2grep -nA3 --no-group-separator 'four' ./testdata/grepinputx) >>testtrygrep
+echo "RC=$?" >>testtrygrep
+
+
 # Now compare the results.
 
 $cf $srcdir/testdata/grepoutput testtrygrep
@@ -718,6 +899,22 @@ if [ $utf8 -ne 0 ] ; then
   (cd $srcdir; $valgrind $vjs $pcre2grep -u -m1 -O '=$x{1d3}$o{744}=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1
   echo "RC=$?" >>testtrygrep
 
+  echo "---------------------------- Test U7 ------------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -ui --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U8 ------------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -UiEP --colour=always 'k+|\babc\b' ./testdata/grepinput8) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U9 ------------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
+  echo "---------------------------- Test U10 ------------------------------" >>testtrygrep
+  (cd $srcdir; $valgrind $vjs $pcre2grep -u --posix-digit --colour=always 'A\d' ./testdata/grepinput8) >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+
   $cf $srcdir/testdata/grepoutput8 testtrygrep
   if [ $? != 0 ] ; then exit 1; fi
 
@@ -738,9 +935,11 @@ printf 'abc\rdef\r\nghi\njkl' >testNinputgrep
 
 printf '%c--------------------------- Test N1 ------------------------------\r\n' - >testtrygrep
 $valgrind $vjs $pcre2grep -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep -B1 -n -N CR "^def" testNinputgrep >>testtrygrep
 
 printf '%c--------------------------- Test N2 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep -B1 -n -N CRLF "^ghi" testNinputgrep >>testtrygrep
 
 printf '%c--------------------------- Test N3 ------------------------------\r\n' - >>testtrygrep
 pattern=`printf 'def\rjkl'`
@@ -751,34 +950,39 @@ $valgrind $vjs $pcre2grep -n --newline=crlf -F -f $srcdir/testdata/greppatN4 tes
 
 printf '%c--------------------------- Test N5 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+$valgrind $vjs $pcre2grep -B1 -n --newline=any "^def" testNinputgrep >>testtrygrep
 
 printf '%c--------------------------- Test N6 ------------------------------\r\n' - >>testtrygrep
 $valgrind $vjs $pcre2grep -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-
-# This next test involves NUL characters. It seems impossible to handle them
-# easily in many operating systems. An earlier version of this script used sed
-# to translate NUL into the string ZERO, but this didn't work on Solaris (aka
-# SunOS), where the version of sed explicitly doesn't like them, and also MacOS
-# (Darwin), OpenBSD, FreeBSD, NetBSD, and some Linux distributions like Alpine,
-# even when using GNU sed. A user suggested using tr instead, which
-# necessitates translating to a single character (@). However, on (some
-# versions of?) Solaris, the normal "tr" cannot handle binary zeros, but if
-# /usr/xpg4/bin/tr is available, it can do so, so test for that.
-
-if [ -x /usr/xpg4/bin/tr ] ; then
-  tr=/usr/xpg4/bin/tr
-else
-  tr=tr
-fi
+$valgrind $vjs $pcre2grep -B1 -n --newline=anycrlf "^jkl" testNinputgrep >>testtrygrep
 
 printf '%c--------------------------- Test N7 ------------------------------\r\n' - >>testtrygrep
-printf 'abc\0def' >testNinputgrep
+printf 'xyz\0abc\0def' >testNinputgrep
 $valgrind $vjs $pcre2grep -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
+$valgrind $vjs $pcre2grep -B1 -na --newline=nul "^(abc|def)" testNinputgrep | $tr '\000' '@' >>testtrygrep
 echo "" >>testtrygrep
 
 $cf $srcdir/testdata/grepoutputN testtrygrep
 if [ $? != 0 ] ; then exit 1; fi
 
+
+# These newline tests need UTF support.
+
+if [ $utf8 -ne 0 ] ; then
+  echo "Testing pcre2grep newline settings with UTF-8 features"
+
+  printf '%c--------------------------- Test UN1 ------------------------------\r\n' - >testtrygrep
+  printf 'abc\341\210\264def\nxyz' >testNinputgrep
+  $valgrind $vjs $pcre2grep -nau --newline=anycrlf "^(abc|def)" testNinputgrep >>testtrygrep
+  echo "" >>testtrygrep
+
+  $cf $srcdir/testdata/grepoutputUN testtrygrep
+  if [ $? != 0 ] ; then exit 1; fi
+else
+  echo "Skipping pcre2grep newline UTF-8 tests: no UTF-8 support in PCRE2 library"
+fi
+
+
 # If pcre2grep supports script callouts, run some tests on them. It is possible
 # to restrict these callouts to the non-fork case, either for security, or for
 # environments that do not support fork(). This is handled by comparing to a
@@ -789,20 +993,59 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
   $valgrind $vjs $pcre2grep '(T)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4) ($14) ($0)")()' $srcdir/testdata/grepinputv >testtrygrep
   $valgrind $vjs $pcre2grep '(T)(..(.))()()()()()()()(..)(?C"/bin/echo|Arg1: [$11] [${11}]")' $srcdir/testdata/grepinputv >>testtrygrep
   $valgrind $vjs $pcre2grep '(T)(?C"|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
+  $valgrind $vjs $pcre2grep '(T)(?C"/bin/echo|$0:$1$n")' $srcdir/testdata/grepinputv >>testtrygrep
   $valgrind $vjs $pcre2grep '(T)(?C"|$1$n")(*F)' $srcdir/testdata/grepinputv >>testtrygrep
   $valgrind $vjs $pcre2grep -m1 '(T)(?C"|$0:$1:$x{41}$o{101}$n")' $srcdir/testdata/grepinputv >>testtrygrep
 
   if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'Non-fork callout scripts in patterns are supported'; then
+    nonfork=1
     $cf $srcdir/testdata/grepoutputCN testtrygrep
   else
+    nonfork=0
     $cf $srcdir/testdata/grepoutputC testtrygrep
   fi
-
   if [ $? != 0 ] ; then exit 1; fi
+
+  # These callout tests need UTF support.
+
+  if [ $utf8 -ne 0 ] ; then
+    echo "Testing pcre2grep script callout with UTF-8 features"
+    $valgrind $vjs $pcre2grep -u '(T)(?C"|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >testtrygrep
+    $valgrind $vjs $pcre2grep -u '(T)(?C"/bin/echo|$0:$x{a6}$n")' $srcdir/testdata/grepinputv >>testtrygrep
+
+    if [ $nonfork = 1 ] ; then
+      $cf $srcdir/testdata/grepoutputCNU testtrygrep
+    else
+      $cf $srcdir/testdata/grepoutputCU testtrygrep
+    fi
+    if [ $? != 0 ] ; then exit 1; fi
+  fi
 else
   echo "Script callouts are not supported"
 fi
 
+
+# Test reading .gz and .bz2 files when supported.
+
+if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.gz are read using zlib'; then
+  echo "Testing reading .gz file"
+  $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.gz >testtrygrep
+  echo "RC=$?" >>testtrygrep
+  $cf $srcdir/testdata/grepoutputCgz testtrygrep
+  if [ $? != 0 ] ; then exit 1; fi
+fi
+
+if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q '\.bz2 are read using bzlib2'; then
+  echo "Testing reading .bz2 file"
+  $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepinputC.bz2 >testtrygrep
+  echo "RC=$?" >>testtrygrep
+  $valgrind $vjs $pcre2grep 'one|two' $srcdir/testdata/grepnot.bz2 >>testtrygrep
+  echo "RC=$?" >>testtrygrep
+  $cf $srcdir/testdata/grepoutputCbz2 testtrygrep
+  if [ $? != 0 ] ; then exit 1; fi
+fi
+
+
 # Finally, some tests to exercise code that is not tested above, just to be
 # sure that it runs OK. Doing this improves the coverage statistics. The output
 # is not checked.
@@ -812,6 +1055,9 @@ echo '' >testtrygrep
 checkspecial '-xxxxx' 2
 checkspecial '--help' 0
 checkspecial '--line-buffered --colour=auto abc /dev/null' 1
+checkspecial '--line-buffered --color abc /dev/null' 1
+checkspecial '-dskip abc .' 1
+checkspecial '-Dread -Dskip abc /dev/null' 1
 
 # Clean up local working files
 rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep

+ 699 - 699
regex.mod/pcre/RunGrepTest.bat

@@ -1,699 +1,699 @@
-@echo off
-
-:: Run pcre2grep tests. The assumption is that the PCRE2 tests check the library
-:: itself. What we are checking here is the file handling and options that are
-:: supported by pcre2grep. This script must be run in the build directory.
-:: (jmh: I've only tested in the main directory, using my own builds.)
-
-setlocal enabledelayedexpansion
-
-:: Remove any non-default colouring that the caller may have set.
-
-set PCRE2GREP_COLOUR=
-set PCRE2GREP_COLOR=
-set PCREGREP_COLOUR=
-set PCREGREP_COLOR=
-set GREP_COLORS=
-set GREP_COLOR=
-
-:: Remember the current (build) directory and set the program to be tested.
-
-set builddir="%CD%"
-set pcre2grep=%builddir%\pcre2grep.exe
-set pcre2test=%builddir%\pcre2test.exe
-
-if NOT exist %pcre2grep% (
-  echo ** %pcre2grep% does not exist.
-  exit /b 1
-)
-
-if NOT exist %pcre2test% (
-  echo ** %pcre2test% does not exist.
-  exit /b 1
-)
-
-for /f "delims=" %%a in ('"%pcre2grep%" -V') do set pcre2grep_version=%%a
-echo Testing %pcre2grep_version%
-
-:: Set up a suitable "diff" command for comparison. Some systems have a diff
-:: that lacks a -u option. Try to deal with this; better do the test for the -b
-:: option as well. Use FC if there's no diff, taking care to ignore equality.
-
-set cf=
-set cfout=
-diff -b  nul nul 2>nul && set cf=diff -b
-diff -u  nul nul 2>nul && set cf=diff -u
-diff -ub nul nul 2>nul && set cf=diff -ub
-if NOT defined cf (
-  set cf=fc /n
-  set "cfout=>testcf || (type testcf & cmd /c exit /b 1)"
-)
-
-:: Set srcdir to the current or parent directory, whichever one contains the
-:: test data. Subsequently, we run most of the pcre2grep tests in the source
-:: directory so that the file names in the output are always the same.
-
-if NOT defined srcdir set srcdir=.
-if NOT exist %srcdir%\testdata\ (
-  if exist testdata\ (
-    set srcdir=.
-  ) else if exist ..\testdata\ (
-    set srcdir=..
-  ) else if exist ..\..\testdata\ (
-    set srcdir=..\..
-  ) else (
-    echo Cannot find the testdata directory
-    exit /b 1
-  )
-)
-
-:: Check for the availability of UTF-8 support
-
-%pcre2test% -C unicode >nul
-set utf8=%ERRORLEVEL%
-
-:: Check default newline convention. If it does not include LF, force LF.
-
-for /f %%a in ('"%pcre2test%" -C newline') do set nl=%%a
-if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" (
-  set pcre2grep=%pcre2grep% -N LF
-  echo Default newline setting forced to LF
-)
-
-:: Create a simple printf via cscript/JScript (an actual printf may translate
-:: LF to CRLF, which this one does not).
-
-echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js
-set printf=cscript //nologo printf.js
-
-:: ------ Normal tests ------
-
-echo Testing pcre2grep main features
-
-echo ---------------------------- Test 1 ------------------------------>testtrygrep
-(pushd %srcdir% & %pcre2grep% PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 2 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% "^PATTERN" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 3 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 4 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -ic PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 5 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 6 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -inh PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 7 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -il PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 8 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -l PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 9 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -q PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 10 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 11 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -vn pattern ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 12 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -ix pattern ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 13 ----------------------------->>testtrygrep
-echo seventeen >testtemp1grep
-(pushd %srcdir% & %pcre2grep% -f./testdata/greplist -f %builddir%\testtemp1grep ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 14 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -w pat ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 15 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% "abc^*" ./testdata/grepinput & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 16 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% abc ./testdata/grepinput ./testdata/nonexistfile & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 17 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -M "the\noutput" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 18 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Mn "(the\noutput|dog\.\n--)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 19 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Mix "Pattern" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 20 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Mixn "complete pair\nof lines" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 21 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -nA3 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 22 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -nB3 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 23 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -C3 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 24 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -A9 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 25 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -nB9 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 26 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -A9 -B9 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 27 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -A10 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 28 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -nB10 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 29 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -C12 -B10 "four" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 30 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -inB3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 31 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -inA3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 32 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -L "fox" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 33 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 34 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -s "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 35 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 36 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 37 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep%  "^(a+)*\d" ./testdata/grepinput & popd) >>testtrygrep 2>teststderrgrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-echo ======== STDERR ========>>testtrygrep
-type teststderrgrep >>testtrygrep
-
-echo ---------------------------- Test 38 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% ">\x00<" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 39 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -A1 "before the binary zero" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 40 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -B1 "after the binary zero" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 41 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -B1 -o "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 42 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -B1 -onH "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 43 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -on "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 44 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -on -e before -ezero -e after ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 45 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -on -f ./testdata/greplist -e binary ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 46 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 47 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Fx AB.VE^
-
-elephant ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 48 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -F AB.VE^
-
-elephant ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 49 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -F -e DATA -e AB.VE^
-
-elephant ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 50 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% "^(abc|def|ghi|jkl)" ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 51 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Mv "brown\sfox" ./testdata/grepinputv & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 52 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% --colour=always jumps ./testdata/grepinputv & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 53 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% --file-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 54 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% --line-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 55 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -f./testdata/greplist --color=always ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 56 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 57 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 58 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --regex=PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 59 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --regexp=PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 60 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --regex PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 61 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --regexp PATTERN ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 62 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --match-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 63 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --recursion-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 64 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o1 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 65 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 66 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o3 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 67 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o12 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 68 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% --only-matching=2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 69 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -vn --colour=always pattern ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 70 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 71 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 72 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 73 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 74 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 75 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --color=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 76 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 77 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 78 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 79 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 80 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 81 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --color=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 82 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o --colour=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 83 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 84 ----------------------------->>testtrygrep
-echo testdata/grepinput3 >testtemp1grep
-(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --file-list %builddir%\testtemp1grep "fox|complete|t7" & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 85 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 86 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 87 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 88 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -v "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 89 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -I "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 90 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --binary-files=without-match "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 91 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -a "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 92 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --binary-files=text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 93 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 94 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 "fox" ./testdata/grepinput* | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 95 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete" & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 96 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 97 ----------------------------->>testtrygrep
-echo grepinput$>testtemp1grep
-echo grepinput8>>testtemp1grep
-(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 98 ----------------------------->>testtrygrep
-echo grepinput$>testtemp1grep
-echo grepinput8>>testtemp1grep
-(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 99 ----------------------------->>testtrygrep
-echo grepinput$>testtemp1grep
-echo grepinput8>testtemp2grep
-(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 100 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -Ho2 --only-matching=1 -o3 "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 101 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator="|" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 102 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -n "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 103 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 104 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -n --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 105 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --colour=always "ipsum|" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 106 ----------------------------->>testtrygrep
-(pushd %srcdir% & echo a| %pcre2grep% -M "|a" & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 107 ----------------------------->>testtrygrep
-echo a>testtemp1grep
-echo aaaaa>>testtemp1grep
-(pushd %srcdir% & %pcre2grep%  --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 108 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -lq PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 109 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 110 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --om-separator / -Mo0 -o1 -o2 "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 111 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --line-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 112 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --file-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 113 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 114 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 115 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 116 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 117 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 118 ----------------------------->>testtrygrep
-(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 119 ----------------------------->>testtrygrep
-%printf% "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
-%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-echo ---------------------------- Test 120 ------------------------------>>testtrygrep
-(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
-echo RC=^%ERRORLEVEL%>>testtrygrep
-
-:: Now compare the results.
-
-%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
-if ERRORLEVEL 1 exit /b 1
-
-
-:: These tests require UTF-8 support
-
-if %utf8% neq 0 (
-  echo Testing pcre2grep UTF-8 features
-
-  echo ---------------------------- Test U1 ------------------------------>testtrygrep
-  (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep
-  echo RC=^%ERRORLEVEL%>>testtrygrep
-
-  echo ---------------------------- Test U2 ------------------------------>>testtrygrep
-  (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep
-  echo RC=^%ERRORLEVEL%>>testtrygrep
-
-  echo ---------------------------- Test U3 ------------------------------>>testtrygrep
-  (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
-  echo RC=^%ERRORLEVEL%>>testtrygrep
-
-  %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout%
-  if ERRORLEVEL 1 exit /b 1
-
-) else (
-  echo Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library
-)
-
-
-:: We go to some contortions to try to ensure that the tests for the various
-:: newline settings will work in environments where the normal newline sequence
-:: is not \n. Do not use exported files, whose line endings might be changed.
-:: Instead, create an input file so that its contents are exactly what we want.
-:: These tests are run in the build directory.
-
-echo Testing pcre2grep newline settings
-%printf% "abc\rdef\r\nghi\njkl" >testNinputgrep
-
-echo ---------------------------- Test N1 ------------------------------>testtrygrep
-%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-
-echo ---------------------------- Test N2 ------------------------------>>testtrygrep
-%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-
-echo ---------------------------- Test N3 ------------------------------>>testtrygrep
-for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a
-%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep
-
-echo ---------------------------- Test N4 ------------------------------>>testtrygrep
-%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep
-
-echo ---------------------------- Test N5 ------------------------------>>testtrygrep
-%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-
-echo ---------------------------- Test N6 ------------------------------>>testtrygrep
-%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
-
-%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout%
-if ERRORLEVEL 1 exit /b 1
-
-:: If pcre2grep supports script callouts, run some tests on them.
-
-%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
-if %ERRORLEVEL% equ 0 (
-  echo Testing pcre2grep script callouts
-  %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
-  %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
-  %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
-  %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
-  %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
-  if %ERRORLEVEL% equ 0 (
-    %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
-  ) else (
-    %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
-  )
-  if ERRORLEVEL 1 exit /b 1
-) else (
-  echo Script callouts are not supported
-)
-
-:: Finally, some tests to exercise code that is not tested above, just to be
-:: sure that it runs OK. Doing this improves the coverage statistics. The output
-:: is not checked.
-
-echo Testing miscellaneous pcre2grep arguments (unchecked)
-%printf% "" >testtrygrep
-call :checkspecial "-xxxxx" 2 || exit /b 1
-call :checkspecial "--help" 0 || exit /b 1
-call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1
-
-:: Clean up local working files
-del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
-
-exit /b 0
-
-:: ------ Function to run and check a special pcre2grep arguments test -------
-
-:checkspecial
-  %pcre2grep% %~1 >>testtrygrep 2>&1
-  if %ERRORLEVEL% neq %2 (
-    echo ** pcre2grep %~1 failed - check testtrygrep
-    exit /b 1
-  )
-  exit /b 0
-
-:: End
+@echo off
+
+:: Run pcre2grep tests. The assumption is that the PCRE2 tests check the library
+:: itself. What we are checking here is the file handling and options that are
+:: supported by pcre2grep. This script must be run in the build directory.
+:: (jmh: I've only tested in the main directory, using my own builds.)
+
+setlocal enabledelayedexpansion
+
+:: Remove any non-default colouring that the caller may have set.
+
+set PCRE2GREP_COLOUR=
+set PCRE2GREP_COLOR=
+set PCREGREP_COLOUR=
+set PCREGREP_COLOR=
+set GREP_COLORS=
+set GREP_COLOR=
+
+:: Remember the current (build) directory and set the program to be tested.
+
+set builddir="%CD%"
+set pcre2grep=%builddir%\pcre2grep.exe
+set pcre2test=%builddir%\pcre2test.exe
+
+if NOT exist %pcre2grep% (
+  echo ** %pcre2grep% does not exist.
+  exit /b 1
+)
+
+if NOT exist %pcre2test% (
+  echo ** %pcre2test% does not exist.
+  exit /b 1
+)
+
+for /f "delims=" %%a in ('"%pcre2grep%" -V') do set pcre2grep_version=%%a
+echo Testing %pcre2grep_version%
+
+:: Set up a suitable "diff" command for comparison. Some systems have a diff
+:: that lacks a -u option. Try to deal with this; better do the test for the -b
+:: option as well. Use FC if there's no diff, taking care to ignore equality.
+
+set cf=
+set cfout=
+diff -b  nul nul 2>nul && set cf=diff -b
+diff -u  nul nul 2>nul && set cf=diff -u
+diff -ub nul nul 2>nul && set cf=diff -ub
+if NOT defined cf (
+  set cf=fc /n
+  set "cfout=>testcf || (type testcf & cmd /c exit /b 1)"
+)
+
+:: Set srcdir to the current or parent directory, whichever one contains the
+:: test data. Subsequently, we run most of the pcre2grep tests in the source
+:: directory so that the file names in the output are always the same.
+
+if NOT defined srcdir set srcdir=.
+if NOT exist %srcdir%\testdata\ (
+  if exist testdata\ (
+    set srcdir=.
+  ) else if exist ..\testdata\ (
+    set srcdir=..
+  ) else if exist ..\..\testdata\ (
+    set srcdir=..\..
+  ) else (
+    echo Cannot find the testdata directory
+    exit /b 1
+  )
+)
+
+:: Check for the availability of UTF-8 support
+
+%pcre2test% -C unicode >nul
+set utf8=%ERRORLEVEL%
+
+:: Check default newline convention. If it does not include LF, force LF.
+
+for /f %%a in ('"%pcre2test%" -C newline') do set nl=%%a
+if NOT "%nl%" == "LF" if NOT "%nl%" == "ANY" if NOT "%nl%" == "ANYCRLF" (
+  set pcre2grep=%pcre2grep% -N LF
+  echo Default newline setting forced to LF
+)
+
+:: Create a simple printf via cscript/JScript (an actual printf may translate
+:: LF to CRLF, which this one does not).
+
+echo WScript.StdOut.Write(WScript.Arguments(0).replace(/\\r/g, "\r").replace(/\\n/g, "\n")) >printf.js
+set printf=cscript //nologo printf.js
+
+:: ------ Normal tests ------
+
+echo Testing pcre2grep main features
+
+echo ---------------------------- Test 1 ------------------------------>testtrygrep
+(pushd %srcdir% & %pcre2grep% PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 2 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% "^PATTERN" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 3 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 4 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -ic PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 5 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -in PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 6 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -inh PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 7 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -il PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 8 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -l PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 9 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -q PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 10 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -q NEVER-PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 11 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -vn pattern ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 12 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -ix pattern ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 13 ----------------------------->>testtrygrep
+echo seventeen >testtemp1grep
+(pushd %srcdir% & %pcre2grep% -f./testdata/greplist -f %builddir%\testtemp1grep ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 14 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -w pat ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 15 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% "abc^*" ./testdata/grepinput & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 16 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% abc ./testdata/grepinput ./testdata/nonexistfile & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 17 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -M "the\noutput" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 18 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Mn "(the\noutput|dog\.\n--)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 19 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Mix "Pattern" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 20 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Mixn "complete pair\nof lines" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 21 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -nA3 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 22 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -nB3 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 23 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -C3 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 24 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -A9 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 25 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -nB9 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 26 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -A9 -B9 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 27 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -A10 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 28 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -nB10 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 29 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -C12 -B10 "four" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 30 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -inB3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 31 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -inA3 "pattern" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 32 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -L "fox" ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 33 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 34 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -s "fox" ./testdata/grepnonexist & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 35 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 36 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude "grepinput$" --exclude=grepinput8 --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 37 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep%  "^(a+)*\d" ./testdata/grepinput & popd) >>testtrygrep 2>teststderrgrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+echo ======== STDERR ========>>testtrygrep
+type teststderrgrep >>testtrygrep
+
+echo ---------------------------- Test 38 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% ">\x00<" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 39 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -A1 "before the binary zero" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 40 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -B1 "after the binary zero" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 41 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -B1 -o "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 42 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -B1 -onH "\w+ the binary zero" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 43 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -on "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 44 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -on -e before -ezero -e after ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 45 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -on -f ./testdata/greplist -e binary ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 46 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -eabc -e "(unclosed" ./testdata/grepinput & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 47 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Fx AB.VE^
+
+elephant ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 48 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -F AB.VE^
+
+elephant ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 49 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -F -e DATA -e AB.VE^
+
+elephant ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 50 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% "^(abc|def|ghi|jkl)" ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 51 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Mv "brown\sfox" ./testdata/grepinputv & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 52 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% --colour=always jumps ./testdata/grepinputv & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 53 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% --file-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 54 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% --line-offsets "before|zero|after" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 55 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -f./testdata/greplist --color=always ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 56 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -c lazy ./testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 57 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -c -l lazy ./testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 58 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --regex=PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 59 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --regexp=PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 60 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --regex PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 61 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --regexp PATTERN ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 62 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --match-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 63 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --recursion-limit=1000 --no-jit -M "This is a file(.|\R)*file." ./testdata/grepinput & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 64 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o1 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 65 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 66 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o3 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 67 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o12 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 68 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% --only-matching=2 "(?<=PAT)TERN (ap(pear)s)" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 69 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -vn --colour=always pattern ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 70 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3 & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 71 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 72 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 73 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 74 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 75 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --color=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 76 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|02|^03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 77 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 78 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --color=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 79 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o --colour=always "^01|^02|03" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 80 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 81 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --color=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 82 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o --colour=always "\b01|\b02" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 83 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --buffer-size=10 --max-buffer-size=100 "^a" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 84 ----------------------------->>testtrygrep
+echo testdata/grepinput3 >testtemp1grep
+(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --file-list %builddir%\testtemp1grep "fox|complete|t7" & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 85 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --file-list=./testdata/grepfilelist "dolor" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 86 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 87 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 88 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -v "cat" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 89 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -I "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 90 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --binary-files=without-match "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 91 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -a "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 92 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --binary-files=text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 93 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --text "dog" ./testdata/grepbinary & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 94 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -L -r --include=grepinputx --include grepinput8 "fox" ./testdata/grepinput* | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 95 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --file-list ./testdata/grepfilelist --exclude grepinputv "fox|complete" & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 96 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -L -r --include-dir=testdata --exclude "^^(?^!grepinput)" "fox" ./test* | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 97 ----------------------------->>testtrygrep
+echo grepinput$>testtemp1grep
+echo grepinput8>>testtemp1grep
+(pushd %srcdir% & %pcre2grep% -L -r --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 98 ----------------------------->>testtrygrep
+echo grepinput$>testtemp1grep
+echo grepinput8>>testtemp1grep
+(pushd %srcdir% & %pcre2grep% -L -r --exclude=grepinput3 --include=grepinput --exclude-from %builddir%\testtemp1grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 99 ----------------------------->>testtrygrep
+echo grepinput$>testtemp1grep
+echo grepinput8>testtemp2grep
+(pushd %srcdir% & %pcre2grep% -L -r --include grepinput --exclude-from %builddir%\testtemp1grep --exclude-from=%builddir%\testtemp2grep --exclude-dir="^\." "fox" ./testdata | sort & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 100 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -Ho2 --only-matching=1 -o3 "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 101 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -o3 -Ho2 -o12 --only-matching=1 -o3 --colour=always --om-separator="|" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 102 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -n "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 103 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 104 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -n --only-matching "^$" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 105 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --colour=always "ipsum|" ./testdata/grepinput3 & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 106 ----------------------------->>testtrygrep
+(pushd %srcdir% & echo a| %pcre2grep% -M "|a" & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 107 ----------------------------->>testtrygrep
+echo a>testtemp1grep
+echo aaaaa>>testtemp1grep
+(pushd %srcdir% & %pcre2grep%  --line-offsets "(?<=\Ka)" %builddir%\testtemp1grep & popd) >>testtrygrep 2>&1
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 108 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -lq PATTERN ./testdata/grepinput ./testdata/grepinputx & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 109 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -cq lazy ./testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 110 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --om-separator / -Mo0 -o1 -o2 "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 111 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --line-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 112 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --file-offsets -M "match (\d+):\n (.)\n" testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 113 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% --total-count "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 114 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -tc "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 115 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -tlc "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 116 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -th "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 117 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -tch "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 118 ----------------------------->>testtrygrep
+(pushd %srcdir% & %pcre2grep% -tL "the" testdata/grepinput* & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 119 ----------------------------->>testtrygrep
+%printf% "123\n456\n789\n---abc\ndef\nxyz\n---\n" >testNinputgrep
+%pcre2grep% -Mo "(\n|[^-])*---" testNinputgrep >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+echo ---------------------------- Test 120 ------------------------------>>testtrygrep
+(pushd %srcdir% & %pcre2grep% -HO "$0:$2$1$3" "(\w+) binary (\w+)(\.)?" ./testdata/grepinput & popd) >>testtrygrep
+echo RC=^%ERRORLEVEL%>>testtrygrep
+
+:: Now compare the results.
+
+%cf% %srcdir%\testdata\grepoutput testtrygrep %cfout%
+if ERRORLEVEL 1 exit /b 1
+
+
+:: These tests require UTF-8 support
+
+if %utf8% neq 0 (
+  echo Testing pcre2grep UTF-8 features
+
+  echo ---------------------------- Test U1 ------------------------------>testtrygrep
+  (pushd %srcdir% & %pcre2grep% -n -u --newline=any "^X" ./testdata/grepinput8 & popd) >>testtrygrep
+  echo RC=^%ERRORLEVEL%>>testtrygrep
+
+  echo ---------------------------- Test U2 ------------------------------>>testtrygrep
+  (pushd %srcdir% & %pcre2grep% -n -u -C 3 --newline=any "Match" ./testdata/grepinput8 & popd) >>testtrygrep
+  echo RC=^%ERRORLEVEL%>>testtrygrep
+
+  echo ---------------------------- Test U3 ------------------------------>>testtrygrep
+  (pushd %srcdir% & %pcre2grep% --line-offsets -u --newline=any "(?<=\K\x{17f})" ./testdata/grepinput8 & popd) >>testtrygrep
+  echo RC=^%ERRORLEVEL%>>testtrygrep
+
+  %cf% %srcdir%\testdata\grepoutput8 testtrygrep %cfout%
+  if ERRORLEVEL 1 exit /b 1
+
+) else (
+  echo Skipping pcre2grep UTF-8 tests: no UTF-8 support in PCRE2 library
+)
+
+
+:: We go to some contortions to try to ensure that the tests for the various
+:: newline settings will work in environments where the normal newline sequence
+:: is not \n. Do not use exported files, whose line endings might be changed.
+:: Instead, create an input file so that its contents are exactly what we want.
+:: These tests are run in the build directory.
+
+echo Testing pcre2grep newline settings
+%printf% "abc\rdef\r\nghi\njkl" >testNinputgrep
+
+echo ---------------------------- Test N1 ------------------------------>testtrygrep
+%pcre2grep% -n -N CR "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+
+echo ---------------------------- Test N2 ------------------------------>>testtrygrep
+%pcre2grep% -n --newline=crlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+
+echo ---------------------------- Test N3 ------------------------------>>testtrygrep
+for /f %%a in ('%printf% "def\rjkl"') do set pattern=%%a
+%pcre2grep% -n --newline=cr -F "!pattern!" testNinputgrep >>testtrygrep
+
+echo ---------------------------- Test N4 ------------------------------>>testtrygrep
+%pcre2grep% -n --newline=crlf -F -f %srcdir%/testdata/greppatN4 testNinputgrep >>testtrygrep
+
+echo ---------------------------- Test N5 ------------------------------>>testtrygrep
+%pcre2grep% -n --newline=any "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+
+echo ---------------------------- Test N6 ------------------------------>>testtrygrep
+%pcre2grep% -n --newline=anycrlf "^(abc|def|ghi|jkl)" testNinputgrep >>testtrygrep
+
+%cf% %srcdir%\testdata\grepoutputN testtrygrep %cfout%
+if ERRORLEVEL 1 exit /b 1
+
+:: If pcre2grep supports script callouts, run some tests on them.
+
+%pcre2grep% --help | %pcre2grep% -q "callout scripts in patterns are supported"
+if %ERRORLEVEL% equ 0 (
+  echo Testing pcre2grep script callouts
+  %pcre2grep% "(T)(..(.))(?C'cmd|/c echo|Arg1: [$1] [$2] [$3]|Arg2: ^$|${1}^$| ($4) ($14) ($0)')()" %srcdir%/testdata/grepinputv >testtrygrep
+  %pcre2grep% "(T)(..(.))()()()()()()()(..)(?C'cmd|/c echo|Arg1: [$11] [${11}]')" %srcdir%/testdata/grepinputv >>testtrygrep
+  %pcre2grep% "(T)(?C'|$0:$1$n')" %srcdir%/testdata/grepinputv >>testtrygrep
+  %pcre2grep% "(T)(?C'|$1$n')(*F)" %srcdir%/testdata/grepinputv >>testtrygrep
+  %pcre2grep% --help | %pcre2grep% -q "Non-script callout scripts in patterns are supported"
+  if %ERRORLEVEL% equ 0 (
+    %cf% %srcdir%\testdata\grepoutputCN testtrygrep %cfout%
+  ) else (
+    %cf% %srcdir%\testdata\grepoutputC testtrygrep %cfout%
+  )
+  if ERRORLEVEL 1 exit /b 1
+) else (
+  echo Script callouts are not supported
+)
+
+:: Finally, some tests to exercise code that is not tested above, just to be
+:: sure that it runs OK. Doing this improves the coverage statistics. The output
+:: is not checked.
+
+echo Testing miscellaneous pcre2grep arguments (unchecked)
+%printf% "" >testtrygrep
+call :checkspecial "-xxxxx" 2 || exit /b 1
+call :checkspecial "--help" 0 || exit /b 1
+call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1
+
+:: Clean up local working files
+del testcf printf.js testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
+
+exit /b 0
+
+:: ------ Function to run and check a special pcre2grep arguments test -------
+
+:checkspecial
+  %pcre2grep% %~1 >>testtrygrep 2>&1
+  if %ERRORLEVEL% neq %2 (
+    echo ** pcre2grep %~1 failed - check testtrygrep
+    exit /b 1
+  )
+  exit /b 0
+
+:: End

+ 57 - 10
regex.mod/pcre/RunTest

@@ -17,8 +17,16 @@
 # individual test numbers, ranges of tests such as 3-6 or 3- (meaning 3 to the
 # end), or a number preceded by ~ to exclude a test. For example, "3-15 ~10"
 # runs tests 3 to 15, excluding test 10, and just "~10" runs all the tests
-# except test 10. Whatever order the arguments are in, the tests are always run
-# in numerical order.
+# except test 10. Whatever order the arguments are in, these tests are always
+# run in numerical order.
+#
+# If no specific tests are selected (which is the case when this script is run
+# via 'make check') the default is to run all the numbered tests.
+#
+# There may also be named (as well as numbered) tests for special purposes. At
+# present there is just one, called "heap". This test's output contains the
+# sizes of heap frames and frame vectors, which depend on the environment. It
+# is therefore not run unless explicitly requested.
 #
 # Inappropriate tests are automatically skipped (with a comment to say so). For
 # example, if JIT support is not compiled, test 16 is skipped, whereas if JIT
@@ -80,7 +88,9 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)"
 title23="Test 23: \C disabled test"
 title24="Test 24: Non-UTF pattern conversion tests"
 title25="Test 25: UTF pattern conversion tests"
-maxtest=25
+title26="Test 26: Auto-generated unicode property tests"
+maxtest=26
+titleheap="Test 'heap': Environment-specific heap tests"
 
 if [ $# -eq 1 -a "$1" = "list" ]; then
   echo $title0
@@ -109,6 +119,12 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
   echo $title23
   echo $title24
   echo $title25
+  echo $title26
+  echo ""
+  echo $titleheap
+  echo ""
+  echo "Numbered tests are automatically run if nothing selected."
+  echo "Named tests must be explicitly selected."
   exit 0
 fi
 
@@ -238,6 +254,8 @@ do22=no
 do23=no
 do24=no
 do25=no
+do26=no
+doheap=no
 
 while [ $# -gt 0 ] ; do
   case $1 in
@@ -267,6 +285,8 @@ while [ $# -gt 0 ] ; do
    23) do23=yes;;
    24) do24=yes;;
    25) do25=yes;;
+   26) do26=yes;;
+ heap) doheap=yes;;
    -8) arg8=yes;;
   -16) arg16=yes;;
   -32) arg32=yes;;
@@ -320,7 +340,8 @@ fi
 # set up a large stack.
 
 $sim ./pcre2test -S 64 /dev/null /dev/null
-if [ $? -eq 0 -a "$bigstack" != "" ] ; then
+support_setstack=$?
+if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then
   setstack="-S 64"
 else
   setstack=""
@@ -407,8 +428,8 @@ if [ $jit -ne 0 -a "$nojit" != "yes" ] ; then
   fi
 fi
 
-# If no specific tests were requested, select all. Those that are not
-# relevant will be automatically skipped.
+# If no specific tests were requested, select all the numbered tests. Those
+# that are not relevant will be automatically skipped.
 
 if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
      $do4  = no -a $do5  = no -a $do6  = no -a $do7  = no -a \
@@ -416,7 +437,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
      $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \
      $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
      $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
-     $do24 = no -a $do25 = no \
+     $do24 = no -a $do25 = no -a $do26 = no -a $doheap = no \
    ]; then
   do0=yes
   do1=yes
@@ -444,6 +465,7 @@ if [ $do0  = no -a $do1  = no -a $do2  = no -a $do3  = no -a \
   do23=yes
   do24=yes
   do25=yes
+  do26=yes
 fi
 
 # Handle any explicit skips at this stage, so that an argument list may consist
@@ -479,7 +501,9 @@ for bmode in "$test8" "$test16" "$test32"; do
     echo '' >testtry
     checkspecial '-C'
     checkspecial '--help'
-    checkspecial '-S 1 -t 10 testSinput'
+    if [ $support_setstack -eq 0 ] ; then
+      checkspecial '-S 1 -t 10 testSinput'
+    fi
     echo "  OK"
   fi
 
@@ -503,7 +527,7 @@ for bmode in "$test8" "$test16" "$test32"; do
       $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
       saverc=$?
       if [ $saverc = 0 ] ; then
-        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,200 >>testtry
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -70,-62,-2,-1,0,100,101,191,300 >>testtry
         checkresult $? 2 "$opt"
       else
         checkresult $saverc 2 "$opt"
@@ -632,7 +656,7 @@ for bmode in "$test8" "$test16" "$test32"; do
   # Test of internal offsets and code sizes. This test is run only when there
   # is UTF/UCP support. The actual tests are mostly the same as in some of the
   # above, but in this test we inspect some offsets and sizes. This is a
-  # doublecheck for the maintainer, just in case something changes unexpectely.
+  # doublecheck for the maintainer, just in case something changes unexpectedly.
   # The output from this test is different in 8-bit, 16-bit, and 32-bit modes
   # and for different link sizes, so there are different output files for each
   # mode and link size.
@@ -860,6 +884,29 @@ for bmode in "$test8" "$test16" "$test32"; do
     fi
   fi
 
+  # Auto-generated unicode property tests
+
+  if [ $do26 = yes ] ; then
+    echo $title26
+    if [ $utf -eq 0 ] ; then
+      echo "  Skipped because UTF-$bits support is not available"
+    else
+      for opt in "" $jitopt; do
+        $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry
+        checkresult $? 26 "$opt"
+      done
+    fi
+  fi
+
+  # Manually selected heap tests - output may vary in different environments,
+  # which is why that are not automatically run.
+
+  if [ $doheap = yes ] ; then
+    echo $titleheap
+    $sim $valgrind ./pcre2test -q $setstack $bmode $testdata/testinputheap testtry
+    checkresult $? heap-$bits ""
+  fi
+
 # End of loop for 8/16/32-bit tests
 done
 

+ 6 - 3
regex.mod/pcre/RunTest.bat

@@ -27,6 +27,8 @@
 @rem Tidied and updated for new tests 21, 22, 23 by PH, October 2015.
 @rem PH added missing "set type" for test 22, April 2016.
 @rem PH added copy command for new testbtables file, November 2020
+@rem PH caused it to show comparison output when comparison faile, July 2023
+@rem PH updated unknown error number in test
 
 
 setlocal enabledelayedexpansion
@@ -135,9 +137,9 @@ if "%all%" == "yes" (
   set do7=yes
   set do8=yes
   set do9=yes
-  set do10=yes
+  set do10=no
   set do11=yes
-  set do12=yes
+  set do12=no
   set do13=yes
   set do14=yes
   set do15=yes
@@ -264,7 +266,7 @@ if errorlevel 1 (
   set failed="yes"
   goto :eof
 ) else if [%1]==[2] (
-  %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,200 >>%2%bits%\%testoutput%
+  %pcre2test% %mode% %4 %5 %6 %7 %8 %9 -error -70,-62,-2,-1,0,100,101,191,300 >>%2%bits%\%testoutput%
 )
 
 set type=
@@ -292,6 +294,7 @@ if errorlevel 1 (
     echo.
     goto :eof
 )
+  fc /n %srcdir%\testdata\%testoutput%%type% %2%bits%\%testoutput%
 
   set failed="yes"
   goto :eof

+ 19 - 19
regex.mod/pcre/aclocal.m4

@@ -14,14 +14,14 @@
 m4_ifndef([AC_CONFIG_MACRO_DIRS], [m4_defun([_AM_CONFIG_MACRO_DIRS], [])m4_defun([AC_CONFIG_MACRO_DIRS], [_AM_CONFIG_MACRO_DIRS($@)])])
 m4_ifndef([AC_AUTOCONF_VERSION],
   [m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.71],,
-[m4_warning([this file was generated for autoconf 2.71.
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.72],,
+[m4_warning([this file was generated for autoconf 2.72.
 You have another version of autoconf.  It may work, but is not guaranteed to.
 If you have problems, you may need to regenerate the build system entirely.
 To do so, use the procedure documented by the package, typically 'autoreconf'.])])
 
-# pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
-# serial 11 (pkg-config-0.29.1)
+# pkg.m4 - Macros to locate and use pkg-config.   -*- Autoconf -*-
+# serial 12 (pkg-config-0.29.2)
 
 dnl Copyright © 2004 Scott James Remnant <[email protected]>.
 dnl Copyright © 2012-2015 Dan Nicholson <[email protected]>
@@ -63,7 +63,7 @@ dnl
 dnl See the "Since" comment for each macro you use to see what version
 dnl of the macros you require.
 m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29.1])
+[m4_define([PKG_MACROS_VERSION], [0.29.2])
 m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
     [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
 ])dnl PKG_PREREQ
@@ -108,7 +108,7 @@ dnl Check to see whether a particular set of modules exists. Similar to
 dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
 dnl
 dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-dnl only at the first occurence in configure.ac, so if the first place
+dnl only at the first occurrence in configure.ac, so if the first place
 dnl it's called might be skipped (such as if it is within an "if", you
 dnl have to call PKG_CHECK_EXISTS manually
 AC_DEFUN([PKG_CHECK_EXISTS],
@@ -164,7 +164,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
 AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
 
 pkg_failed=no
-AC_MSG_CHECKING([for $1])
+AC_MSG_CHECKING([for $2])
 
 _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
 _PKG_CONFIG([$1][_LIBS], [libs], [$2])
@@ -174,17 +174,17 @@ and $1[]_LIBS to avoid the need to call pkg-config.
 See the pkg-config man page for more details.])
 
 if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
+        AC_MSG_RESULT([no])
         _PKG_SHORT_ERRORS_SUPPORTED
         if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
+                $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
+        else
+                $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
         fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+        # Put the nasty error message in config.log where it belongs
+        echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
 
-	m4_default([$4], [AC_MSG_ERROR(
+        m4_default([$4], [AC_MSG_ERROR(
 [Package requirements ($2) were not met:
 
 $$1_PKG_ERRORS
@@ -195,8 +195,8 @@ installed software in a non-standard prefix.
 _PKG_TEXT])[]dnl
         ])
 elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
+        AC_MSG_RESULT([no])
+        m4_default([$4], [AC_MSG_FAILURE(
 [The pkg-config script could not be found or is too old.  Make sure it
 is in your PATH or set the PKG_CONFIG environment variable to the full
 path to pkg-config.
@@ -206,10 +206,10 @@ _PKG_TEXT
 To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
         ])
 else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+        $1[]_LIBS=$pkg_cv_[]$1[]_LIBS
         AC_MSG_RESULT([yes])
-	$3
+        $3
 fi[]dnl
 ])dnl PKG_CHECK_MODULES
 

+ 7 - 8
regex.mod/pcre/ar-lib

@@ -1,17 +1,16 @@
 # Modified from FindReadline.cmake (PH Feb 2012)
 
-if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
   set(EDITLINE_FOUND TRUE)
-else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
-  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h
-    /usr/include/editline
-    /usr/include/edit/readline  
-    /usr/include/readline
+else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)
+  FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES
+    editline
+    edit/readline
   )
   
   FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit)
   include(FindPackageHandleStandardArgs)
-  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY )
+  FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
 
   MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY)
-endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY)
+endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY)

+ 4 - 3
regex.mod/pcre/cmake/pcre2-config.cmake.in

@@ -52,9 +52,9 @@ else ()
   endif ()
 endif ()
 find_library(PCRE2_8BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit PCRE2 library")
-find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
-find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
-find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_8BIT_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
+find_library(PCRE2_16BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_16BIT_NAME}d${PCRE2_SUFFIX} DOC "16 bit PCRE2 library")
+find_library(PCRE2_32BIT_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_32BIT_NAME}d${PCRE2_SUFFIX} DOC "32 bit PCRE2 library")
+find_library(PCRE2_POSIX_LIBRARY NAMES ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}${PCRE2_SUFFIX} ${PCRE2_PREFIX}${PCRE2_POSIX_NAME}d${PCRE2_SUFFIX} DOC "8 bit POSIX PCRE2 library")
 unset(PCRE2_NON_STANDARD_LIB_PREFIX)
 unset(PCRE2_NON_STANDARD_LIB_SUFFIX)
 unset(PCRE2_8BIT_NAME)
@@ -126,6 +126,7 @@ if (PCRE2_FOUND)
     endif ()
     set_target_properties(PCRE2::${component} PROPERTIES
       IMPORTED_LOCATION "${PCRE2_${component}_LIBRARY}"
+      IMPORTED_IMPLIB "${PCRE2_${component}_LIBRARY}"
       INTERFACE_INCLUDE_DIRECTORIES "${PCRE2_INCLUDE_DIR}"
     )
     if (component STREQUAL "POSIX")

+ 3 - 1
regex.mod/pcre/compile

@@ -1,8 +1,8 @@
 /* config.h for CMake builds */
 
+#cmakedefine HAVE_BUILTIN_MUL_OVERFLOW 1
 #cmakedefine HAVE_ATTRIBUTE_UNINITIALIZED 1
 #cmakedefine HAVE_DIRENT_H 1
-#cmakedefine HAVE_STRERROR 1
 #cmakedefine HAVE_SYS_STAT_H 1
 #cmakedefine HAVE_SYS_TYPES_H 1
 #cmakedefine HAVE_UNISTD_H 1
@@ -39,10 +39,12 @@
 #cmakedefine HEAP_MATCH_RECURSE 1
 #cmakedefine NEVER_BACKSLASH_C 1
 
+#define PCRE2_EXPORT		@PCRE2_EXPORT@
 #define LINK_SIZE		@PCRE2_LINK_SIZE@
 #define HEAP_LIMIT              @PCRE2_HEAP_LIMIT@
 #define MATCH_LIMIT		@PCRE2_MATCH_LIMIT@
 #define MATCH_LIMIT_DEPTH	@PCRE2_MATCH_LIMIT_DEPTH@
+#define MAX_VARLOOKBEHIND       @PCRE2_MAX_VARLOOKBEHIND@
 #define NEWLINE_DEFAULT         @NEWLINE_DEFAULT@
 #define PARENS_NEST_LIMIT       @PCRE2_PARENS_NEST_LIMIT@
 #define PCRE2GREP_BUFSIZE       @PCRE2GREP_BUFSIZE@

File diff suppressed because it is too large
+ 200 - 159
regex.mod/pcre/config.guess


+ 95 - 20
regex.mod/pcre/configure.ac

@@ -9,20 +9,20 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might
 dnl be defined as -RC2, for example. For real releases, it should be empty.
 
 m4_define(pcre2_major, [10])
-m4_define(pcre2_minor, [39])
+m4_define(pcre2_minor, [43])
 m4_define(pcre2_prerelease, [])
-m4_define(pcre2_date, [2021-10-29])
+m4_define(pcre2_date, [2024-02-16])
 
 # Libtool shared library interface versions (current:revision:age)
-m4_define(libpcre2_8_version,     [10:4:10])
-m4_define(libpcre2_16_version,    [10:4:10])
-m4_define(libpcre2_32_version,    [10:4:10])
-m4_define(libpcre2_posix_version, [3:1:0])
+m4_define(libpcre2_8_version,     [12:0:12])
+m4_define(libpcre2_16_version,    [12:0:12])
+m4_define(libpcre2_32_version,    [12:0:12])
+m4_define(libpcre2_posix_version, [3:5:0])
 
 # NOTE: The CMakeLists.txt file searches for the above variables in the first
 # 50 lines of this file. Please update that if the variables above are moved.
 
-AC_PREREQ([2.60])
+AC_PREREQ([2.62])
 AC_INIT([PCRE2],pcre2_major.pcre2_minor[]pcre2_prerelease,[],[pcre2])
 AC_CONFIG_SRCDIR([src/pcre2.h.in])
 AM_INIT_AUTOMAKE([dist-bzip2 dist-zip])
@@ -42,7 +42,7 @@ AC_CONFIG_MACRO_DIR([m4])
 
 remember_set_CFLAGS="$CFLAGS"
 
-AC_PROG_CC
+m4_version_prereq(2.70, [AC_PROG_CC], [AC_PROG_CC_C99])
 AM_PROG_CC_C_O
 AC_USE_SYSTEM_EXTENSIONS
 
@@ -67,10 +67,34 @@ AC_PROG_INSTALL
 LT_INIT([win32-dll])
 AC_PROG_LN_S
 
+AC_SYS_LARGEFILE
+
 # Check for GCC visibility feature
 
 PCRE2_VISIBILITY
 
+# Check for the mul_overflow() builtin
+
+AC_MSG_CHECKING([for __builtin_mul_overflow()])
+AC_LANG_PUSH([C])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+		#ifdef HAVE_SYS_TYPES_H
+		#include <sys/types.h>
+		#endif
+		#include <stddef.h>
+
+		int a, b;
+		size_t m;
+	]], [[__builtin_mul_overflow(a, b, &m)]])],
+	[pcre2_cc_cv_builtin_mul_overflow=yes],
+	[pcre2_cc_cv_builtin_mul_overflow=no])
+AC_MSG_RESULT([$pcre2_cc_cv_builtin_mul_overflow])
+if test "$pcre2_cc_cv_builtin_mul_overflow" = yes; then
+	AC_DEFINE([HAVE_BUILTIN_MUL_OVERFLOW], 1,
+		[Define this if your compiler provides __builtin_mul_overflow()])
+fi
+AC_LANG_POP([C])
+
 # Check for Clang __attribute__((uninitialized)) feature
 
 AC_MSG_CHECKING([for __attribute__((uninitialized))])
@@ -312,6 +336,12 @@ AC_ARG_WITH(link-size,
                            [internal link size (2, 3, or 4 allowed; default=2)]),
             , with_link_size=2)
 
+# Handle --with-max-varlookbehind=N
+AC_ARG_WITH(max-varlookbehind,
+            AS_HELP_STRING([--with-max-varlookbehind=N],
+                           [maximum length of variable lookbehind (default=255)]),
+            , with_max_varlookbehind=255)
+
 # Handle --with-parens-nest-limit=N
 AC_ARG_WITH(parens-nest-limit,
             AS_HELP_STRING([--with-parens-nest-limit=N],
@@ -365,6 +395,12 @@ AC_ARG_ENABLE(fuzz_support,
                              [enable fuzzer support]),
               , enable_fuzz_support=no)
 
+# Handle --enable-diff-fuzz-support
+AC_ARG_ENABLE(diff_fuzz_support,
+              AS_HELP_STRING([--enable-diff-fuzz-support],
+                             [enable differential fuzzer support]),
+              , enable_diff_fuzz_support=no)
+
 # Handle --disable-stack-for-recursion
 # This option became obsolete at release 10.30.
 AC_ARG_ENABLE(stack-for-recursion,,
@@ -499,12 +535,26 @@ AM_CONDITIONAL(WITH_JIT, test "x$enable_jit" = "xyes")
 AM_CONDITIONAL(WITH_UNICODE, test "x$enable_unicode" = "xyes")
 AM_CONDITIONAL(WITH_VALGRIND, test "x$enable_valgrind" = "xyes")
 AM_CONDITIONAL(WITH_FUZZ_SUPPORT, test "x$enable_fuzz_support" = "xyes")
+AM_CONDITIONAL(WITH_DIFF_FUZZ_SUPPORT, test "x$enable_diff_fuzz_support" = "xyes")
 
 if test "$enable_fuzz_support" = "yes" -a "$enable_pcre2_8" = "no"; then
   echo "** ERROR: Fuzzer support requires the 8-bit library"
   exit 1
 fi
 
+if test "$enable_diff_fuzz_support" = "yes"; then
+  if test "$enable_fuzz_support" = "no"; then
+    echo "** ERROR: Differential fuzzing support requires fuzzing support"
+    exit 1
+  fi
+  if test "$enable_jit" = "no"; then
+    echo "** ERROR: Differential fuzzing support requires Just-in-Time compilation support"
+    exit 1
+  fi
+  AC_DEFINE([SUPPORT_DIFF_FUZZ], [], [
+    Define to any value to enable differential fuzzing support.])
+fi
+
 # Checks for typedefs, structures, and compiler characteristics.
 
 AC_C_CONST
@@ -512,7 +562,20 @@ AC_TYPE_SIZE_T
 
 # Checks for library functions.
 
-AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp realpath secure_getenv strerror)
+AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror)
+AC_MSG_CHECKING([for realpath])
+AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+#include <stdlib.h>
+#include <limits.h>
+]],[[
+char buffer[PATH_MAX];
+realpath(".", buffer);
+]])],
+[AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_REALPATH], 1,
+  [Define to 1 if you have the `realpath' function.])
+],
+AC_MSG_RESULT([no]))
 
 # Check for the availability of libz (aka zlib)
 
@@ -584,14 +647,14 @@ if test "$enable_pcre2test_libreadline" = "yes"; then
  fi
 fi
 
-
 # Check for the availability of libedit. Different distributions put its
 # headers in different places. Try to cover the most common ones.
 
 if test "$enable_pcre2test_libedit" = "yes"; then
-  AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1],
-    [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1],
-      [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])])
+  AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [
+    HAVE_LIBEDIT_HEADER=1
+    break
+  ])
   AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"])
 fi
 
@@ -603,6 +666,12 @@ if test "x$enable_shared" = "xno" ; then
 fi
 AC_SUBST(PCRE2_STATIC_CFLAG)
 
+PCRE2POSIX_CFLAG=""
+if test "x$enable_shared" = "xyes" ; then
+  PCRE2POSIX_CFLAG="-DPCRE2POSIX_SHARED"
+fi
+AC_SUBST(PCRE2POSIX_CFLAG)
+
 # Here is where PCRE2-specific defines are handled
 
 if test "$enable_pcre2_8" = "yes"; then
@@ -764,6 +833,10 @@ AC_DEFINE_UNQUOTED([LINK_SIZE], [$with_link_size], [
   vast majority of cases. However, PCRE2 can also be compiled to use 3 or 4
   bytes instead. This allows for longer patterns in extreme cases.])
 
+AC_DEFINE_UNQUOTED([MAX_VARLOOKBEHIND], [$with_max_varlookbehind], [
+  The value of MAX_VARLOOKBEHIND specifies the default maximum length, in
+  characters, for a variable-length lookbehind assertion.])
+
 AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
   The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
   parentheses (of any kind) in a pattern. This limits the amount of system
@@ -775,7 +848,7 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
   matching attempt. The value is also used to limit a loop counter in
   pcre2_dfa_match(). There is a runtime interface for setting a different
   limit. The limit exists in order to catch runaway regular expressions that
-  take for ever to determine that they do not match. The default is set very
+  take forever to determine that they do not match. The default is set very
   large so that it does not accidentally catch legitimate cases.])
 
 # --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
@@ -825,8 +898,9 @@ AH_VERBATIM([PCRE2_EXP_DEFN], [
    Win32, and it needs some magic to be inserted before the definition
    of a function that is exported by the library, define this macro to
    contain the relevant magic. If you do not define this macro, a suitable
-    __declspec value is used for Windows systems; in other environments
-   "extern" is used for a C compiler and "extern C" for a C++ compiler.
+   __declspec value is used for Windows systems; in other environments
+   a compiler relevant "extern" is used with any "visibility" related
+   attributes from PCRE2_EXPORT included.
    This macro apears at the start of every exported function that is part
    of the external API. It does not appear on functions that are "external"
    in the C sense, but which are internal to the library. */
@@ -927,10 +1001,9 @@ if test "$enable_pcre2test_libedit" = "yes"; then
     echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline"
     exit 1
   fi
-  if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \
-          "$HAVE_READLINE_READLINE_H" != "1"; then
-    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h"
-    echo "** nor readline/readline.h was found."
+  if test -z "$HAVE_LIBEDIT_HEADER"; then
+    echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h,"
+    echo "** edit/readline/readline.h nor a compatible header was found."
     exit 1
   fi
   if test -z "$LIBEDIT"; then
@@ -1090,6 +1163,7 @@ $PACKAGE-$VERSION configuration summary:
     EBCDIC code for NL ................. : ${ebcdic_nl_code}
     Rebuild char tables ................ : ${enable_rebuild_chartables}
     Internal link size ................. : ${with_link_size}
+    Maximum variable lookbehind ........ : ${with_max_varlookbehind}
     Nested parentheses limit ........... : ${with_parens_nest_limit}
     Heap limit ......................... : ${with_heap_limit} kibibytes
     Match limit ........................ : ${with_match_limit}
@@ -1108,6 +1182,7 @@ $PACKAGE-$VERSION configuration summary:
     Valgrind support ................... : ${enable_valgrind}
     Code coverage ...................... : ${enable_coverage}
     Fuzzer support ..................... : ${enable_fuzz_support}
+    Differential fuzzer support ........ : ${enable_diff_fuzz_support}
     Use %zu and %td .................... : ${enable_percent_zt}
 
 EOF

+ 41 - 30
regex.mod/pcre/depcomp

@@ -4,7 +4,7 @@ Building PCRE2 without using autotools
 This document contains the following sections:
 
   General
-  Generic instructions for the PCRE2 C library
+  Generic instructions for the PCRE2 C libraries
   Stack size in Windows environments
   Linking programs in Windows environments
   Calling conventions in Windows environments
@@ -17,9 +17,9 @@ This document contains the following sections:
 
 GENERAL
 
-The basic PCRE2 library consists entirely of code written in Standard C, and so
-should compile successfully on any system that has a Standard C compiler and
-library.
+The source of the PCRE2 libraries consists entirely of code written in Standard
+C, and so should compile successfully on any system that has a Standard C
+compiler and library.
 
 The PCRE2 distribution includes a "configure" file for use by the
 configure/make (autotools) build system, as found in many Unix-like
@@ -36,21 +36,25 @@ provided for those who build PCRE2 without using "configure" or CMake. If you
 use "configure" or CMake, the .generic versions are not used.
 
 
-GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARY
+GENERIC INSTRUCTIONS FOR THE PCRE2 C LIBRARIES
 
-The following are generic instructions for building the PCRE2 C library "by
-hand". If you are going to use CMake, this section does not apply to you; you
-can skip ahead to the CMake section. Note that the settings concerned with
-8-bit, 16-bit, and 32-bit code units relate to the type of data string that
-PCRE2 processes. They are NOT referring to the underlying operating system bit
-width. You do not have to do anything special to compile in a 64-bit
-environment, for example.
+There are three possible PCRE2 libraries, each handling data with a specific
+code unit width: 8, 16, or 32 bits. You can build any combination of them. The
+following are generic instructions for building a PCRE2 C library "by hand". If
+you are going to use CMake, this section does not apply to you; you can skip
+ahead to the CMake section. Note that the settings concerned with 8-bit,
+16-bit, and 32-bit code units relate to the type of data string that PCRE2
+processes. They are NOT referring to the underlying operating system bit width.
+You do not have to do anything special to compile in a 64-bit environment, for
+example.
 
  (1) Copy or rename the file src/config.h.generic as src/config.h, and edit the
      macro settings that it contains to whatever is appropriate for your
      environment. In particular, you can alter the definition of the NEWLINE
      macro to specify what character(s) you want to be interpreted as line
-     terminators by default.
+     terminators by default. You need to #define at least one of
+     SUPPORT_PCRE2_8, SUPPORT_PCRE2_16, or SUPPORT_PCRE2_32, depending on which
+     libraries you are going to build. You must set all that apply.
 
      When you subsequently compile any of the PCRE2 modules, you must specify
      -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
@@ -69,7 +73,7 @@ environment, for example.
      Note also that the src/config.h.generic file is created from a config.h
      that was generated by Autotools, which automatically includes settings of
      a number of macros that are not actually used by PCRE2 (for example,
-     HAVE_MEMORY_H).
+     HAVE_DLFCN_H).
 
  (2) Copy or rename the file src/pcre2.h.generic as src/pcre2.h.
 
@@ -97,6 +101,7 @@ environment, for example.
      or else use other -D settings to change the configuration as required.
 
        pcre2_auto_possess.c
+       pcre2_chkdint.c
        pcre2_chartables.c
        pcre2_compile.c
        pcre2_config.c
@@ -135,29 +140,31 @@ environment, for example.
      pcre2_jit_compile.c #includes other files from the sljit subdirectory,
      all of whose names begin with "sljit". It also #includes
      src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
-     these yourself.
+     those yourself.
 
      Note also that the pcre2_fuzzsupport.c file contains special code that is
      useful to those who want to run fuzzing tests on the PCRE2 library. Unless
      you are doing that, you can ignore it.
 
  (5) Now link all the compiled code into an object library in whichever form
-     your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
-     If your system has static and shared libraries, you may have to do this
-     once for each type.
+     your system keeps such libraries. This is the PCRE2 C 8-bit library,
+     typically called something like libpcre2-8. If your system has static and
+     shared libraries, you may have to do this once for each type.
 
  (6) If you want to build a library that supports 16-bit or 32-bit code units,
-     (as well as, or instead of the 8-bit library) just supply 16 or 32 as the
-     value of -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
+     set 16 or 32 as the value of -DPCRE2_CODE_UNIT_WIDTH when obeying step 4
+     above. If you want to build more than one PCRE2 library, repeat steps 4
+     and 5 as necessary.
 
  (7) If you want to build the POSIX wrapper functions (which apply only to the
      8-bit library), ensure that you have the src/pcre2posix.h file and then
      compile src/pcre2posix.c. Link the result (on its own) as the pcre2posix
-     library.
+     library. If targeting a DLL in Windows, make sure to include
+     -DPCRE2POSIX_SHARED with your compiler flags.
 
  (8) The pcre2test program can be linked with any combination of the 8-bit,
-     16-bit and 32-bit libraries (depending on what you selected in
-     src/config.h). Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
+     16-bit and 32-bit libraries (depending on what you specfied in
+     src/config.h) . Compile src/pcre2test.c; don't forget -DHAVE_CONFIG_H if
      necessary, but do NOT define PCRE2_CODE_UNIT_WIDTH. Then link with the
      appropriate library/ies. If you compiled an 8-bit library, pcre2test also
      needs the pcre2posix wrapper library.
@@ -185,9 +192,13 @@ environment, for example.
      the RunTest script. You might also like to build and run the freestanding
      JIT test program, src/pcre2_jit_test.c.
 
-(11) If you want to use the pcre2grep command, compile and link
-     src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
-     need the pcre2posix library). If you have built the PCRE2 library with JIT
+(11) The pcre2test program tests the POSIX wrapper library, but there is also a
+     freestanding test program in src/pcre2posix_test.c. It must be linked with
+     both the pcre2posix library and the 8-bit PCRE2 library.
+
+(12) If you want to use the pcre2grep command, compile and link
+     src/pcre2grep.c; it uses only the 8-bit PCRE2 library (it does not need
+     the pcre2posix library). If you have built the PCRE2 library with JIT
      support by defining SUPPORT_JIT in src/config.h, you can also define
      SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
      it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
@@ -211,7 +222,7 @@ CALLING CONVENTIONS IN WINDOWS ENVIRONMENTS
 
 It is possible to compile programs to use different calling conventions using
 MSVC. Search the web for "calling conventions" for more information. To make it
-easier to change the calling convention for the exported functions in the
+easier to change the calling convention for the exported functions in a
 PCRE2 library, the macro PCRE2_CALL_CONVENTION is present in all the external
 definitions. It can be set externally when compiling (e.g. in CFLAGS). If it is
 not set, it defaults to empty; the default calling convention is then used
@@ -306,7 +317,7 @@ cache can be deleted by selecting "File > Delete Cache".
 3.  Create a new, empty build directory, preferably a subdirectory of the
     source dir. For example, C:\pcre2\pcre2-xx\build.
 
-4.  Run cmake-gui from the Shell envirornment of your build tool, for example,
+4.  Run cmake-gui from the Shell environment of your build tool, for example,
     Msys for Msys/MinGW or Visual Studio Command Prompt for VC/VC++. Do not try
     to start Cmake from the Windows Start menu, as this can lead to errors.
 
@@ -373,7 +384,7 @@ Otherwise:
 1. Copy RunTest.bat into the directory where pcre2test.exe and pcre2grep.exe
    have been created.
 
-2. Edit RunTest.bat to indentify the full or relative location of
+2. Edit RunTest.bat to identify the full or relative location of
    the pcre2 source (wherein which the testdata folder resides), e.g.:
 
    set srcdir=C:\pcre2\pcre2-10.00
@@ -406,5 +417,5 @@ z/OS file formats. The port provides an API for LE languages such as COBOL and
 for the z/OS and z/VM versions of the Rexx languages.
 
 ===========================
-Last Updated: 28 April 2021
+Last Updated: 15 April 2023
 ===========================

+ 77 - 35
regex.mod/pcre/doc/html/README.txt

@@ -8,7 +8,7 @@ features, and the internals have been improved. The original PCRE1 library is
 now obsolete and no longer maintained. The latest release of PCRE2 is available
 in .tar.gz, tar.bz2, or .zip form from this GitHub repository:
 
-https://github.com/PhilipHazel/pcre2/releases
+https://github.com/PCRE2Project/pcre2/releases
 
 There is a mailing list for discussion about the development of PCRE2 at
 [email protected]. You can subscribe by sending an email to
@@ -17,7 +17,7 @@ [email protected].
 You can access the archives and also subscribe or manage your subscription
 here:
 
-https://groups.google.com/pcre2-dev
+https://groups.google.com/g/pcre2-dev
 
 Please read the NEWS file if you are upgrading from a previous release. The
 contents of this README file are:
@@ -114,12 +114,18 @@ Building PCRE2 using autotools
 The following instructions assume the use of the widely used "configure; make;
 make install" (autotools) process.
 
-To build PCRE2 on system that supports autotools, first run the "configure"
-command from the PCRE2 distribution directory, with your current directory set
+If you have downloaded and unpacked a PCRE2 release tarball, run the
+"configure" command from the PCRE2 directory, with your current directory set
 to the directory where you want the files to be created. This command is a
 standard GNU "autoconf" configuration script, for which generic instructions
 are supplied in the file INSTALL.
 
+The files in the GitHub repository do not contain "configure". If you have
+downloaded the PCRE2 source files from GitHub, before you can run "configure"
+you must run the shell script called autogen.sh. This runs a number of
+autotools to create a "configure" script (you must of course have the autotools
+commands installed in order to do this).
+
 Most commonly, people build PCRE2 within its own distribution directory, and in
 this case, on many systems, just running "./configure" is sufficient. However,
 the usual methods of changing standard defaults are available. For example:
@@ -151,7 +157,18 @@ library. They are also documented in the pcre2build man page.
   --disable-shared
   --disable-static
 
-  (See also "Shared libraries on Unix-like systems" below.)
+  Setting --disable-shared ensures that PCRE2 libraries are built as static
+  libraries. The binaries that are then created as part of the build process
+  (for example, pcre2test and pcre2grep) are linked statically with one or more
+  PCRE2 libraries, but may also be dynamically linked with other libraries such
+  as libc. If you want these binaries to be fully statically linked, you can
+  set LDFLAGS like this:
+
+  LDFLAGS=--static ./configure --disable-shared
+
+  Note the two hyphens in --static. Of course, this works only if static
+  versions of all the relevant libraries are available for linking. See also
+  "Shared libraries" below.
 
 . By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
   the "configure" command, the 16-bit library is also built. If you add
@@ -188,10 +205,10 @@ library. They are also documented in the pcre2build man page.
 
   As well as supporting UTF strings, Unicode support includes support for the
   \P, \p, and \X sequences that recognize Unicode character properties.
-  However, only the basic two-letter properties such as Lu are supported.
-  Escape sequences such as \d and \w in patterns do not by default make use of
-  Unicode properties, but can be made to do so by setting the PCRE2_UCP option
-  or starting a pattern with (*UCP).
+  However, only a subset of Unicode properties are supported; see the
+  pcre2pattern man page for details. Escape sequences such as \d and \w in
+  patterns do not by default make use of Unicode properties, but can be made to
+  do so by setting the PCRE2_UCP option or starting a pattern with (*UCP).
 
 . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
   of the preceding, or any of the Unicode newline sequences, or the NUL (zero)
@@ -265,6 +282,17 @@ library. They are also documented in the pcre2build man page.
   performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
   link size setting is ignored, as 4-byte offsets are always used.
 
+. Lookbehind assertions in which one or more branches can match a variable
+  number of characters are supported only if there is a maximum matching length
+  for each top-level branch. There is a limit to this maximum that defaults to
+  255 characters. You can alter this default by a setting such as
+
+  --with-max-varlookbehind=100
+
+  The limit can be changed at runtime by calling pcre2_set_max_varlookbehind().
+  Lookbehind assertions in which every branch matches a fixed number of
+  characters (not necessarily all the same) are not constrained by this limit.
+
 . For speed, PCRE2 uses four tables for manipulating and identifying characters
   whose code point values are less than 256. By default, it uses a set of
   tables for ASCII encoding that is part of the distribution. If you specify
@@ -363,16 +391,16 @@ library. They are also documented in the pcre2build man page.
   avoided by linking with libedit (which has a BSD licence) instead.
 
   Enabling libreadline causes the -lreadline option to be added to the
-  pcre2test build. In many operating environments with a sytem-installed
+  pcre2test build. In many operating environments with a system-installed
   readline library this is sufficient. However, in some environments (e.g. if
   an unmodified distribution version of readline is in use), it may be
   necessary to specify something like LIBS="-lncurses" as well. This is
   because, to quote the readline INSTALL, "Readline uses the termcap functions,
   but does not link with the termcap or curses library itself, allowing
-  applications which link with readline the to choose an appropriate library."
-  If you get error messages about missing functions tgetstr, tgetent, tputs,
-  tgetflag, or tgoto, this is the problem, and linking with the ncurses library
-  should fix it.
+  applications which link with readline the option to choose an appropriate
+  library." If you get error messages about missing functions tgetstr, tgetent,
+  tputs, tgetflag, or tgoto, this is the problem, and linking with the ncurses
+  library should fix it.
 
 . The C99 standard defines formatting modifiers z and t for size_t and
   ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
@@ -394,24 +422,24 @@ library. They are also documented in the pcre2build man page.
   Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
   be created. This is normally run under valgrind or used when PCRE2 is
   compiled with address sanitizing enabled. It calls the fuzzing function and
-  outputs information about it is doing. The input strings are specified by
-  arguments: if an argument starts with "=" the rest of it is a literal input
-  string. Otherwise, it is assumed to be a file name, and the contents of the
-  file are the test string.
+  outputs information about what it is doing. The input strings are specified
+  by arguments: if an argument starts with "=" the rest of it is a literal
+  input string. Otherwise, it is assumed to be a file name, and the contents
+  of the file are the test string.
 
 . Releases before 10.30 could be compiled with --disable-stack-for-recursion,
   which caused pcre2_match() to use individual blocks on the heap for
   backtracking instead of recursive function calls (which use the stack). This
-  is now obsolete since pcre2_match() was refactored always to use the heap (in
-  a much more efficient way than before). This option is retained for backwards
-  compatibility, but has no effect other than to output a warning.
+  is now obsolete because pcre2_match() was refactored always to use the heap
+  (in a much more efficient way than before). This option is retained for
+  backwards compatibility, but has no effect other than to output a warning.
 
 The "configure" script builds the following files for the basic C library:
 
 . Makefile             the makefile that builds the library
 . src/config.h         build-time configuration options for the library
 . src/pcre2.h          the public PCRE2 header file
-. pcre2-config          script that shows the building settings such as CFLAGS
+. pcre2-config         script that shows the building settings such as CFLAGS
                          that were set for "configure"
 . libpcre2-8.pc        )
 . libpcre2-16.pc       ) data for the pkg-config command
@@ -432,8 +460,9 @@ Once "configure" has run, you can run "make". This builds whichever of the
 libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
 program called pcre2test. If you enabled JIT support with --enable-jit, another
 test program called pcre2_jit_test is built as well. If the 8-bit library is
-built, libpcre2-posix and the pcre2grep command are also built. Running
-"make" with the -j option may speed up compilation on multiprocessor systems.
+built, libpcre2-posix, pcre2posix_test, and the pcre2grep command are also
+built. Running "make" with the -j option may speed up compilation on
+multiprocessor systems.
 
 The command "make check" runs all the appropriate tests. Details of the PCRE2
 tests are given below in a separate section of this document. The -j option of
@@ -542,7 +571,10 @@ configuring it. For example:
 ./configure --prefix=/usr/gnu --disable-shared
 
 Then run "make" in the usual way. Similarly, you can use --disable-static to
-build only shared libraries.
+build only shared libraries. Note, however, that when you build only static
+libraries, binary programs such as pcre2test and pcre2grep may still be
+dynamically linked with other libraries (for example, libc) unless you set
+LDFLAGS to --static when running "configure".
 
 
 Cross-compiling using autotools
@@ -571,9 +603,9 @@ at build time" for more details.
 Making new tarballs
 -------------------
 
-The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats.
-The command "make distcheck" does the same, but then does a trial build of the
-new distribution to ensure that it works.
+The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and
+zip formats. The command "make distcheck" does the same, but then does a trial
+build of the new distribution to ensure that it works.
 
 If you have modified any of the man page sources in the doc directory, you
 should first run the PrepareRelease script before making a distribution. This
@@ -585,9 +617,11 @@ Testing PCRE2
 
 To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
 There is another script called RunGrepTest that tests the pcre2grep command.
-When JIT support is enabled, a third test program called pcre2_jit_test is
-built. Both the scripts and all the program tests are run if you obey "make
-check". For other environments, see the instructions in NON-AUTOTOOLS-BUILD.
+When the 8-bit library is built, a test program for the POSIX wrapper, called
+pcre2posix_test, is compiled, and when JIT support is enabled, a test program
+called pcre2_jit_test is built. The scripts and the program tests are all run
+when you obey "make check". For other environments, see the instructions in
+NON-AUTOTOOLS-BUILD.
 
 The RunTest script runs the pcre2test test program (which is documented in its
 own man page) on each of the relevant testinput files in the testdata
@@ -602,13 +636,13 @@ is available. RunTest outputs a comment when it skips a test.
 
 Many (but not all) of the tests that are not skipped are run twice if JIT
 support is available. On the second run, JIT compilation is forced. This
-testing can be suppressed by putting "nojit" on the RunTest command line.
+testing can be suppressed by putting "-nojit" on the RunTest command line.
 
 The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit
 libraries that are enabled. If you want to run just one set of tests, call
 RunTest with either the -8, -16 or -32 option.
 
-If valgrind is installed, you can run the tests under it by putting "valgrind"
+If valgrind is installed, you can run the tests under it by putting "-valgrind"
 on the RunTest command line. To run pcre2test on just one or more specific test
 files, give their numbers as arguments to RunTest, for example:
 
@@ -689,7 +723,7 @@ Test 14 contains some special UTF and UCP tests that give different output for
 different code unit widths.
 
 Test 15 contains a number of tests that must not be run with JIT. They check,
-among other non-JIT things, the match-limiting features of the intepretive
+among other non-JIT things, the match-limiting features of the interpretive
 matcher.
 
 Test 16 is run only when JIT support is not available. It checks that an
@@ -710,6 +744,9 @@ and with UTF support, respectively. Test 23 tests \C when it is locked out.
 Tests 24 and 25 test the experimental pattern conversion functions, without and
 with UTF support, respectively.
 
+Test 26 checks Unicode property support using tests that are generated
+automatically from the Unicode data tables.
+
 
 Character tables
 ----------------
@@ -788,6 +825,7 @@ The distribution should contain the files listed below.
 
   src/pcre2posix.c         )
   src/pcre2_auto_possess.c )
+  src/pcre2_chkdint.c      )
   src/pcre2_compile.c      )
   src/pcre2_config.c       )
   src/pcre2_context.c      )
@@ -813,6 +851,7 @@ The distribution should contain the files listed below.
   src/pcre2_substring.c    )
   src/pcre2_tables.c       )
   src/pcre2_ucd.c          )
+  src/pcre2_ucptables.c    )
   src/pcre2_valid_utf.c    )
   src/pcre2_xclass.c       )
 
@@ -824,6 +863,8 @@ The distribution should contain the files listed below.
   src/pcre2posix.h         header for the external POSIX wrapper API
   src/pcre2_internal.h     header for internal use
   src/pcre2_intmodedep.h   a mode-specific internal header
+  src/pcre2_jit_neon_inc.h header used by JIT
+  src/pcre2_jit_simd_inc.h header used by JIT
   src/pcre2_ucp.h          header for Unicode property handling
 
   sljit/*                  source files for the JIT compiler
@@ -834,6 +875,7 @@ The distribution should contain the files listed below.
   src/pcre2grep.c          source of a grep utility that uses PCRE2
   src/pcre2test.c          comprehensive test program
   src/pcre2_jit_test.c     JIT test program
+  src/pcre2posix_test.c    POSIX wrapper API test program
 
 (C) Auxiliary files:
 
@@ -905,4 +947,4 @@ The distribution should contain the files listed below.
 Philip Hazel
 Email local part: Philip.Hazel
 Email domain: gmail.com
-Last updated: 29 October 2021
+Last updated: 24 November 2023

+ 3 - 0
regex.mod/pcre/doc/html/index.html

@@ -255,6 +255,9 @@ in the library.
 <tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
     <td>&nbsp;&nbsp;Set the maximum length of pattern</td></tr>
 
+<tr><td><a href="pcre2_set_max_varlookbehind.html">pcre2_set_max_varlookbehind</a></td>
+    <td>&nbsp;&nbsp;Set the maximum match length for a variable-length lookbehind</td></tr>
+
 <tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
     <td>&nbsp;&nbsp;Set the newline convention</td></tr>
 

+ 21 - 9
regex.mod/pcre/doc/html/pcre2_compile.html

@@ -32,24 +32,26 @@ arguments are:
 <pre>
   <i>pattern</i>       A string containing expression to be compiled
   <i>length</i>        The length of the string or PCRE2_ZERO_TERMINATED
-  <i>options</i>       Option bits
+  <i>options</i>       Primary option bits
   <i>errorcode</i>     Where to put an error code
   <i>erroffset</i>     Where to put an error offset
   <i>ccontext</i>      Pointer to a compile context or NULL
 </pre>
 The length of the pattern and any error offset that is returned are in code
-units, not characters. A compile context is needed only if you want to provide
-custom memory allocation functions, or to provide an external function for
-system stack size checking, or to change one or more of these parameters:
+units, not characters. A NULL pattern with zero length is treated as an empty
+string. A compile context is needed only if you want to provide custom memory
+allocation functions, or to provide an external function for system stack size
+checking (see <b>pcre2_set_compile_recursion_guard()</b>), or to change one or
+more of these parameters:
 <pre>
   What \R matches (Unicode newlines, or CR, LF, CRLF only);
   PCRE2's character tables;
   The newline character sequence;
   The compile time nested parentheses limit;
-  The maximum pattern length (in code units) that is allowed.
-  The additional options bits (see pcre2_set_compile_extra_options())
+  The maximum pattern length (in code units) that is allowed;
+  The additional options bits.
 </pre>
-The option bits are:
+The primary option bits are:
 <pre>
   PCRE2_ANCHORED           Force pattern anchoring
   PCRE2_ALLOW_EMPTY_CLASS  Allow empty classes
@@ -92,8 +94,18 @@ Additional options may be set in the compile context via the
 function.
 </P>
 <P>
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of <i>errorcode</i> or <i>erroroffset</i> is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the <i>errorcode</i> argument to the
+<b>pcre2_get_error_message()</b> function. The offset (in code units) where the
+error was encountered is returned via the <i>erroroffset</i> argument.
+</P>
+<P>
+If there is no error, the value passed via <i>errorcode</i> returns the message
+"no error" if passed to <b>pcre2_get_error_message()</b>, and the value passed
+via <i>erroroffset</i> is zero.
 </P>
 <P>
 There is a complete description of the PCRE2 native API, with more detail on

+ 1 - 1
regex.mod/pcre/doc/html/pcre2_general_context_create.html

@@ -20,7 +20,7 @@ SYNOPSIS
 </P>
 <P>
 <b>pcre2_general_context *pcre2_general_context_create(</b>
-<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void *(*<i>private_malloc</i>)(size_t, void *),</b>
 <b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
 </P>
 <br><b>

+ 40 - 0
regex.mod/pcre/doc/html/pcre2_get_match_data_heapframes_size.html

@@ -0,0 +1,40 @@
+<html>
+<head>
+<title>pcre2_get_match_data_heapframes_size specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_get_match_data_heapframes_size man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>PCRE2_SIZE pcre2_get_match_data_heapframes_size(</b>
+<b>  pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This function returns the size, in bytes, of the heapframes data block that is
+owned by its argument.
+</P>
+<P>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page and a description of the POSIX API in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>

+ 12 - 2
regex.mod/pcre/doc/html/pcre2_jit_match.html

@@ -32,7 +32,17 @@ This function matches a compiled regular expression that has been successfully
 processed by the JIT compiler against a given subject string, using a matching
 algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
 it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
-Its arguments are exactly the same as for
+</P>
+<P>
+In UTF mode, the subject string is not checked for UTF validity. Unless
+PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an
+invalid UTF string results in undefined behaviour. Your program may crash or
+loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you
+should only call <b>pcre2_jit_match()</b> in UTF mode if you are sure the
+subject is valid.
+</P>
+<P>
+The arguments for <b>pcre2_jit_match()</b> are exactly the same as for
 <a href="pcre2_match.html"><b>pcre2_match()</b>,</a>
 except that the subject string must be specified with a length;
 PCRE2_ZERO_TERMINATED is not supported.
@@ -40,7 +50,7 @@ PCRE2_ZERO_TERMINATED is not supported.
 <P>
 The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
 PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
-options are ignored. The subject string is not checked for UTF validity.
+options are ignored.
 </P>
 <P>
 The return values are the same as for <b>pcre2_match()</b> plus

+ 4 - 3
regex.mod/pcre/doc/html/pcre2_jit_stack_create.html

@@ -19,8 +19,8 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE <i>startsize</i>,</b>
-<b>  PCRE2_SIZE <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
+<b>pcre2_jit_stack *pcre2_jit_stack_create(size_t <i>startsize</i>,</b>
+<b>  size_t <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <br><b>
 DESCRIPTION
@@ -34,7 +34,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 <b>pcre2_jit_stack_assign()</b> to associate the stack with a compiled pattern,
 which can then be processed by <b>pcre2_match()</b> or <b>pcre2_jit_match()</b>.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
 page.
 </P>

+ 2 - 0
regex.mod/pcre/doc/html/pcre2_match.html

@@ -62,6 +62,8 @@ terminated by a binary zero code unit. The options are:
   PCRE2_ANCHORED          Match only at the first position
   PCRE2_COPY_MATCHED_SUBJECT
                           On success, make a private subject copy
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+                          Only useful in rare cases; use with care
   PCRE2_ENDANCHORED       Pattern can match only at end of subject
   PCRE2_NOTBOL            Subject string is not the beginning of a line
   PCRE2_NOTEOL            Subject string is not the end of a line

+ 1 - 1
regex.mod/pcre/doc/html/pcre2_match_data_create_from_pattern.html

@@ -33,7 +33,7 @@ offsets that are required in the match data block. These form the "output
 vector" (ovector) within the match data block, and are used to identify the
 matched string and any captured substrings when matching with
 <b>pcre2_match()</b>. If you are using <b>pcre2_dfa_match()</b>, which uses the
-outut vector in a different way, you should use <b>pcre2_match_data_create()</b>
+output vector in a different way, you should use <b>pcre2_match_data_create()</b>
 instead of this function.
 </P>
 <P>

+ 5 - 3
regex.mod/pcre/doc/html/pcre2_match_data_free.html

@@ -28,12 +28,14 @@ DESCRIPTION
 If <i>match_data</i> is NULL, this function does nothing. Otherwise,
 <i>match_data</i> must point to a match data block, which this function frees,
 using the memory freeing function from the general context or compiled pattern
-with which it was created, or <b>free()</b> if that was not set.
+with which it was created, or <b>free()</b> if that was not set. If the match
+data block was previously passed to <b>pcre2_match()</b>, it will have an
+attached heapframe vector; this is also freed.
 </P>
 <P>
 If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
-match data block, the copy of the subject that was remembered with the block is
-also freed.
+match data block, the copy of the subject that was referenced within the block
+is also freed.
 </P>
 <P>
 There is a complete description of the PCRE2 native API in the

+ 1 - 1
regex.mod/pcre/doc/html/pcre2_serialize_decode.html

@@ -48,7 +48,7 @@ the following negative error codes:
   PCRE2_ERROR_BADDATA   <i>number_of_codes</i> is zero or less
   PCRE2_ERROR_BADMAGIC  mismatch of id bytes in <i>bytes</i>
   PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
   PCRE2_ERROR_NULL      <i>codes</i> or <i>bytes</i> is NULL
 </pre>
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled

+ 8 - 2
regex.mod/pcre/doc/html/pcre2_set_compile_extra_options.html

@@ -30,10 +30,16 @@ This function sets additional option bits for <b>pcre2_compile()</b> that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 <pre>
-  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff}
-                                         in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
   PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
+  PCRE2_EXTRA_ASCII_BSD                \d remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSS                \s remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSW                \w remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_DIGIT              [:digit:] and [:xdigit:] POSIX classes remain ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_POSIX              POSIX classes remain ASCII in UCP mode
   PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
+  PCRE2_EXTRA_CASELESS_RESTRICT        Disable mixed ASCII/non-ASCII case folding
   PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
   PCRE2_EXTRA_MATCH_LINE               Pattern matches whole lines
   PCRE2_EXTRA_MATCH_WORD               Pattern matches "words"

+ 42 - 0
regex.mod/pcre/doc/html/pcre2_set_max_varlookbehind.html

@@ -0,0 +1,42 @@
+<html>
+<head>
+<title>pcre2_set_max_varlookbehind specification</title>
+</head>
+<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
+<h1>pcre2_set_max_varlookbehind man page</h1>
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>
+<p>
+This page is part of the PCRE2 HTML documentation. It was generated
+automatically from the original man page. If there is any nonsense in it,
+please consult the man page, in case the conversion went wrong.
+<br>
+<br><b>
+SYNOPSIS
+</b><br>
+<P>
+<b>#include &#60;pcre2.h&#62;</b>
+</P>
+<P>
+<b>int pcre2_set_max_varlookbehind(pcre2_compile_context *<i>ccontext</i>,</b>
+<b>  uint32_t <i>value</i>);</b>
+</P>
+<br><b>
+DESCRIPTION
+</b><br>
+<P>
+This sets a maximum length for the number of characters matched by a
+variable-length lookbehind assertion. The default is set when PCRE2 is built,
+with the ultimate default being 255, the same as Perl. Lookbehind assertions
+without a bounding length are not supported. The result is always zero.
+</P>
+<P>
+There is a complete description of the PCRE2 native API in the
+<a href="pcre2api.html"><b>pcre2api</b></a>
+page and a description of the POSIX API in the
+<a href="pcre2posix.html"><b>pcre2posix</b></a>
+page.
+<p>
+Return to the <a href="index.html">PCRE2 index page</a>.
+</p>

+ 1 - 1
regex.mod/pcre/doc/html/pcre2_set_recursion_memory_management.html

@@ -21,7 +21,7 @@ SYNOPSIS
 <P>
 <b>int pcre2_set_recursion_memory_management(</b>
 <b>  pcre2_match_context *<i>mcontext</i>,</b>
-<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void *(*<i>private_malloc</i>)(size_t, void *),</b>
 <b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
 </P>
 <br><b>

+ 17 - 17
regex.mod/pcre/doc/html/pcre2_substitute.html

@@ -68,29 +68,29 @@ automatically added.
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 <pre>
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_LITERAL   The replacement string is literal
-  PCRE2_SUBSTITUTE_MATCHED   Use pre-existing match data for 1st match
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
   PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 </pre>
 If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
 </P>
 <P>
-If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-zero; its
+If PCRE2_SUBSTITUTE_MATCHED is set, <i>match_data</i> must be non-NULL; its
 contents must be the result of a call to <b>pcre2_match()</b> using the same
 pattern and subject.
 </P>

+ 1 - 1
regex.mod/pcre/doc/html/pcre2_substring_list_free.html

@@ -19,7 +19,7 @@ SYNOPSIS
 <b>#include &#60;pcre2.h&#62;</b>
 </P>
 <P>
-<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
+<b>void pcre2_substring_list_free(PCRE2_UCHAR **<i>list</i>);</b>
 </P>
 <br><b>
 DESCRIPTION

+ 301 - 148
regex.mod/pcre/doc/html/pcre2api.html

@@ -39,22 +39,23 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
 <li><a name="TOC25" href="#SEC25">SERIALIZATION AND PRECOMPILING</a>
 <li><a name="TOC26" href="#SEC26">THE MATCH DATA BLOCK</a>
-<li><a name="TOC27" href="#SEC27">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
-<li><a name="TOC28" href="#SEC28">NEWLINE HANDLING WHEN MATCHING</a>
-<li><a name="TOC29" href="#SEC29">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
-<li><a name="TOC30" href="#SEC30">OTHER INFORMATION ABOUT A MATCH</a>
-<li><a name="TOC31" href="#SEC31">ERROR RETURNS FROM <b>pcre2_match()</b></a>
-<li><a name="TOC32" href="#SEC32">OBTAINING A TEXTUAL ERROR MESSAGE</a>
-<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
-<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
-<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
-<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
-<li><a name="TOC37" href="#SEC37">DUPLICATE CAPTURE GROUP NAMES</a>
-<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
-<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
-<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
-<li><a name="TOC41" href="#SEC41">AUTHOR</a>
-<li><a name="TOC42" href="#SEC42">REVISION</a>
+<li><a name="TOC27" href="#SEC27">MEMORY USE FOR MATCH DATA BLOCKS</a>
+<li><a name="TOC28" href="#SEC28">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
+<li><a name="TOC29" href="#SEC29">NEWLINE HANDLING WHEN MATCHING</a>
+<li><a name="TOC30" href="#SEC30">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
+<li><a name="TOC31" href="#SEC31">OTHER INFORMATION ABOUT A MATCH</a>
+<li><a name="TOC32" href="#SEC32">ERROR RETURNS FROM <b>pcre2_match()</b></a>
+<li><a name="TOC33" href="#SEC33">OBTAINING A TEXTUAL ERROR MESSAGE</a>
+<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
+<li><a name="TOC35" href="#SEC35">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
+<li><a name="TOC36" href="#SEC36">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
+<li><a name="TOC37" href="#SEC37">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
+<li><a name="TOC38" href="#SEC38">DUPLICATE CAPTURE GROUP NAMES</a>
+<li><a name="TOC39" href="#SEC39">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
+<li><a name="TOC40" href="#SEC40">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
+<li><a name="TOC41" href="#SEC41">SEE ALSO</a>
+<li><a name="TOC42" href="#SEC42">AUTHOR</a>
+<li><a name="TOC43" href="#SEC43">REVISION</a>
 </ul>
 <P>
 <b>#include &#60;pcre2.h&#62;</b>
@@ -103,6 +104,13 @@ document for an overview of all the PCRE2 documentation.
 <b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
+<b>PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_match_data_heapframes_size(</b>
+<b>  pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
 <b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
 <br>
 <br>
@@ -153,6 +161,10 @@ document for an overview of all the PCRE2 documentation.
 <b>  PCRE2_SIZE <i>value</i>);</b>
 <br>
 <br>
+<b>int pcre2_set_max_varlookbehind(pcre2_compile_contest *<i>ccontext</i>,</b>
+<b>"  uint32_t <i>value</i>);</b>
+<br>
+<br>
 <b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@@ -241,7 +253,7 @@ document for an overview of all the PCRE2 documentation.
 <b>  PCRE2_SPTR <i>name</i>);</b>
 <br>
 <br>
-<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
+<b>void pcre2_substring_list_free(PCRE2_UCHAR **<i>list</i>);</b>
 <br>
 <br>
 <b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
@@ -270,8 +282,8 @@ document for an overview of all the PCRE2 documentation.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE <i>startsize</i>,</b>
-<b>  PCRE2_SIZE <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
+<b>pcre2_jit_stack *pcre2_jit_stack_create(size_t <i>startsize</i>,</b>
+<b>  size_t <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
 <b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
@@ -335,7 +347,7 @@ document for an overview of all the PCRE2 documentation.
 <br>
 <b>int pcre2_set_recursion_memory_management(</b>
 <b>  pcre2_match_context *<i>mcontext</i>,</b>
-<b>  void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
+<b>  void *(*<i>private_malloc</i>)(size_t, void *),</b>
 <b>  void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
 <br>
 <br>
@@ -388,11 +400,8 @@ This contains the function prototypes and other definitions for all three
 libraries. One, two, or all three can be installed simultaneously. On Unix-like
 systems the libraries are called <b>libpcre2-8</b>, <b>libpcre2-16</b>, and
 <b>libpcre2-32</b>, and they can also co-exist with the original PCRE libraries.
-</P>
-<P>
-Character strings are passed to and from a PCRE2 library as a sequence of
-unsigned integers in code units of the appropriate width. Every PCRE2 function
-comes in three different forms, one for each library, for example:
+Every PCRE2 function comes in three different forms, one for each library, for
+example:
 <pre>
   <b>pcre2_compile_8()</b>
   <b>pcre2_compile_16()</b>
@@ -403,10 +412,16 @@ There are also three different sets of data types:
   <b>PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32</b>
   <b>PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32</b>
 </pre>
-The UCHAR types define unsigned code units of the appropriate widths. For
-example, PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR types are
-constant pointers to the equivalent UCHAR types, that is, they are pointers to
-vectors of unsigned code units.
+The UCHAR types define unsigned code units of the appropriate widths.
+For example, PCRE2_UCHAR16 is usually defined as `uint16_t'.
+The SPTR types are pointers to constants of the equivalent UCHAR types,
+that is, they are pointers to vectors of unsigned code units.
+</P>
+<P>
+Character strings are passed to a PCRE2 library as sequences of unsigned
+integers in code units of the appropriate width. The length of a string may
+be given as a number of code units, or the string may be specified as
+zero-terminated.
 </P>
 <P>
 Many applications use only one code unit width. For their convenience, macros
@@ -446,7 +461,7 @@ names, without the _8, _16, or _32 suffix.
 PCRE2 has its own native API, which is described in this document. There are
 also some wrapper functions for the 8-bit library that correspond to the
 POSIX regular expression API, but they do not give access to all the
-functionality of PCRE2. They are described in the
+functionality of PCRE2 and they are not thread-safe. They are described in the
 <a href="pcre2posix.html"><b>pcre2posix</b></a>
 documentation. Both these APIs define a set of C function calls.
 </P>
@@ -559,7 +574,8 @@ unsigned integer type, currently always defined as <i>size_t</i>. The largest
 value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved
 as a special indicator for zero-terminated strings and unset offsets.
 Therefore, the longest string that can be handled is one less than this
-maximum.
+maximum. Note that string lengths are always given in code units. Only in the
+8-bit library is such a length the same as the number of bytes in the string.
 <a name="newlines"></a></P>
 <br><a name="SEC16" href="#TOC1">NEWLINES</a><br>
 <P>
@@ -858,6 +874,16 @@ external sources can limit their size. The default is the largest number that a
 PCRE2_SIZE variable can hold, which is effectively unlimited.
 <br>
 <br>
+<b>int pcre2_set_max_varlookbehind(pcre2_compile_contest *<i>ccontext</i>,</b>
+<b>"  uint32_t <i>value</i>);</b>
+<br>
+<br>
+This sets a maximum length for the number of characters matched by a
+variable-length lookbehind assertion. The default is set when PCRE2 is built,
+with the ultimate default being 255, the same as Perl. Lookbehind assertions
+without a bounding length are not supported.
+<br>
+<br>
 <b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
 <b>  uint32_t <i>value</i>);</b>
 <br>
@@ -1017,7 +1043,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 </P>
 <P>
 A value for the heap limit may also be supplied by an item at the start of a
@@ -1030,19 +1056,17 @@ less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
 limit is set, less than the default.
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The <b>pcre2_match()</b> function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+<b>pcre2_match()</b> uses the heap are given in the
+<a href="pcre2perform.html"><b>pcre2perform</b></a>
+documentation.
 </P>
 <P>
-Similarly, for <b>pcre2_dfa_match()</b>, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For <b>pcre2_dfa_match()</b>, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 <br>
 <br>
 <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
@@ -1072,10 +1096,9 @@ is also used in this case (but in a different way) to limit how long the
 matching can continue.
 </P>
 <P>
-The default value for the limit can be set when PCRE2 is built; the default
-default is 10 million, which handles all but the most extreme cases. A value
-for the match limit may also be supplied by an item at the start of a pattern
-of the form
+The default value for the limit can be set when PCRE2 is built; the default is
+10 million, which handles all but the most extreme cases. A value for the match
+limit may also be supplied by an item at the start of a pattern of the form
 <pre>
   (*LIMIT_MATCH=ddd)
 </pre>
@@ -1089,10 +1112,10 @@ less than the limit set by the caller of <b>pcre2_match()</b> or
 <br>
 <br>
 This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@@ -1187,7 +1210,11 @@ for the amount of heap memory used by <b>pcre2_match()</b> or
   PCRE2_CONFIG_JIT
 </pre>
 The output is a uint32_t integer that is set to one if support for just-in-time
-compiling is available; otherwise it is set to zero.
+compiling is included in the library; otherwise it is set to zero. Note that
+having the support in the library does not guarantee that JIT will be used for
+any given match. See the
+<a href="pcre2jit.html"><b>pcre2jit</b></a>
+documentation for more details.
 <pre>
   PCRE2_CONFIG_JITTARGET
 </pre>
@@ -1304,10 +1331,12 @@ zero.
 </P>
 <P>
 The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
-The pattern is defined by a pointer to a string of code units and a length (in
-code units). If the pattern is zero-terminated, the length can be specified as
-PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
-contains the compiled pattern and related data, or NULL if an error occurred.
+The pattern is defined by a pointer to a string of code units and a length in
+code units. If the pattern is zero-terminated, the length can be specified as
+PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated
+as an empty string (NULL with a non-zero length causes an error return). The
+function returns a pointer to a block of memory that contains the compiled
+pattern and related data, or NULL if an error occurred.
 </P>
 <P>
 If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
@@ -1383,8 +1412,7 @@ If <i>errorcode</i> or <i>erroroffset</i> is NULL, <b>pcre2_compile()</b> return
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and <b>pcre2_compile()</b> returns a non-NULL value.
+error has occurred.
 </P>
 <P>
 There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
@@ -1399,15 +1427,18 @@ because the textual error messages that are obtained by calling the
 message"
 <a href="#geterrormessage">below)</a>
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in <b>pcre2.h</b>.
+for both positive and negative error codes in <b>pcre2.h</b>. When compilation
+is successful <i>errorcode</i> is set to a value that returns the message "no
+error" if passed to <b>pcre2_get_error_message()</b>.
 </P>
 <P>
 The value returned in <i>erroroffset</i> is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 </P>
 <P>
 Some errors are not detected until the whole pattern has been scanned; in these
@@ -1524,11 +1555,14 @@ PCRE2_UCP is set, Unicode properties are used for all characters with more than
 one other case, and for all characters whose code points are greater than
 U+007F. Note that there are two ASCII characters, K and S, that, in addition to
 their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
-sign) and U+017F (long S) respectively. For lower valued characters with only
-one other case, a lookup table is used for speed. When neither PCRE2_UTF nor
-PCRE2_UCP is set, a lookup table is used for all code points less than 256, and
-higher code points (available only in 16-bit or 32-bit mode) are treated as not
-having another case.
+sign) and U+017F (long S) respectively. If you do not want this case
+equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT.
+</P>
+<P>
+For lower valued characters with only one other case, a lookup table is used
+for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used
+for all code points less than 256, and higher code points (available only in
+16-bit or 32-bit mode) are treated as not having another case.
 <pre>
   PCRE2_DOLLAR_ENDONLY
 </pre>
@@ -1586,13 +1620,13 @@ the end of the subject.
   PCRE2_EXTENDED
 </pre>
 If this bit is set, most white space characters in the pattern are totally
-ignored except when escaped or inside a character class. However, white space
-is not allowed within sequences such as (?&#62; that introduce various
-parenthesized groups, nor within numerical quantifiers such as {1,3}. Ignorable
-white space is permitted between an item and a following quantifier and between
-a quantifier and a following + that indicates possessiveness. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+ignored except when escaped, inside a character class, or inside a \Q...\E
+sequence. However, white space is not allowed within sequences such as (?&#62; that
+introduce various parenthesized groups, nor within numerical quantifiers such
+as {1,3}. Ignorable white space is permitted between an item and a following
+quantifier and between a quantifier and a following + that indicates
+possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be
+changed within a pattern by a (?x) option setting.
 </P>
 <P>
 When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
@@ -1651,7 +1685,7 @@ PCRE2_FIRSTLINE if <i>startoffset</i> is greater than 3. See also
 PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If
 PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first
 line and also within the offset limit. In other words, whichever limit comes
-first is used.
+first is used. This option has no effect for anchored patterns.
 <pre>
   PCRE2_LITERAL
 </pre>
@@ -1670,7 +1704,11 @@ PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error.
 </pre>
 This option forces PCRE2_UTF (see below) and also enables support for matching
 by <b>pcre2_match()</b> in subject strings that contain invalid UTF sequences.
-This facility is not supported for DFA matching. For details, see the
+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as
+sequences of uint16_t or uint32_t code points. They cannot find valid UTF
+sequences within an arbitrary string of bytes unless such sequences are
+suitably aligned. This facility is not supported for DFA matching. For details,
+see the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 documentation.
 <pre>
@@ -1845,7 +1883,7 @@ undefined. It may cause your program to crash or loop.
 </P>
 <P>
 Note that this option can also be passed to <b>pcre2_match()</b> and
-<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject
+<b>pcre2_dfa_match()</b>, to suppress UTF validity checking of the subject
 string.
 </P>
 <P>
@@ -1864,20 +1902,22 @@ are not representable in UTF-16.
 This option has two effects. Firstly, it change the way PCRE2 processes \B,
 \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes. By
 default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
-properties are used instead to classify characters. More details are given in
-the section on
+properties are used to classify characters. There are some PCRE2_EXTRA
+options (see below) that add finer control to this behaviour. More details are
+given in the section on
 <a href="pcre2pattern.html#genericchartypes">generic character types</a>
 in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
-page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer.
+page.
 </P>
 <P>
 The second effect of PCRE2_UCP is to force the use of Unicode properties for
-upper/lower casing operations on characters with code points greater than 127,
-even when PCRE2_UTF is not set. This makes it possible, for example, to process
-strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
-been compiled with Unicode support (which is the default).
+upper/lower casing operations, even when PCRE2_UTF is not set. This makes it
+possible to process strings in the 16-bit UCS-2 code. This option is available
+only if PCRE2 has been compiled with Unicode support (which is the default).
+The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless
+matching such that ASCII characters match only ASCII characters and non-ASCII
+characters match only non-ASCII characters.
 <pre>
   PCRE2_UNGREEDY
 </pre>
@@ -1905,8 +1945,7 @@ Unicode support (which is the default). If Unicode support is not available,
 the use of this option provokes an error. Details of how PCRE2_UTF changes the
 behaviour of PCRE2 are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
-page. In particular, note that it changes the way PCRE2_CASELESS handles
-characters with code points greater than 127.
+page. In particular, note that it changes the way PCRE2_CASELESS works.
 <a name="extracompileoptions"></a></P>
 <br><b>
 Extra compile options
@@ -1953,6 +1992,37 @@ the way that ECMAscript (aka JavaScript) does. Additional functionality was
 defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
 PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
 character code, where hhh.. is any number of hexadecimal digits.
+<pre>
+  PCRE2_EXTRA_ASCII_BSD
+</pre>
+This option forces \d to match only ASCII digits, even when PCRE2_UCP is set.
+It can be changed within a pattern by means of the (?aD) option setting.
+<pre>
+  PCRE2_EXTRA_ASCII_BSS
+</pre>
+This option forces \s to match only ASCII space characters, even when
+PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS)
+option setting.
+<pre>
+  PCRE2_EXTRA_ASCII_BSW
+</pre>
+This option forces \w to match only ASCII word characters, even when PCRE2_UCP
+is set. It can be changed within a pattern by means of the (?aW) option
+setting.
+<pre>
+  PCRE2_EXTRA_ASCII_DIGIT
+</pre>
+This option forces the POSIX character classes [:digit:] and [:xdigit:] to
+match only ASCII digits, even when PCRE2_UCP is set. It can be changed within
+a pattern by means of the (?aT) option setting.
+<pre>
+  PCRE2_EXTRA_ASCII_POSIX
+</pre>
+This option forces all the POSIX character classes, including [:digit:] and
+[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can
+be changed within a pattern by means of the (?aP) option setting, but note that
+this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets
+all ASCII restrictions for POSIX classes.
 <pre>
   PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
 </pre>
@@ -1974,6 +2044,17 @@ that a sequence such as [\N{] is interpreted as a malformed attempt at
 [\N{...}] and so is treated as [N{] whereas [\N] gives an error because an
 unqualified \N is a valid escape sequence but is not supported in a character
 class. To reiterate: this is a dangerous option. Use with great care.
+<pre>
+  PCRE2_EXTRA_CASELESS_RESTRICT
+</pre>
+When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode
+rules, which allow for more than two cases per character. There are two
+case-equivalent character sets that contain both ASCII and non-ASCII
+characters. The ASCII letter S is case-equivalent to U+017f (long S) and the
+ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables
+recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a
+caseless match, both characters must either be ASCII or non-ASCII. The option
+can be changed with a pattern by the (?r) option setting.
 <pre>
   PCRE2_EXTRA_ESCAPED_CR_IS_LF
 </pre>
@@ -2015,8 +2096,8 @@ also set.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE <i>startsize</i>,</b>
-<b>  PCRE2_SIZE <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
+<b>pcre2_jit_stack *pcre2_jit_stack_create(size_t <i>startsize</i>,</b>
+<b>  size_t <i>maxsize</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
 <b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
@@ -2055,13 +2136,14 @@ point. However, this applies only to characters whose code points are less than
 \d.
 </P>
 <P>
-When PCRE2 is built with Unicode support (the default), the Unicode properties
-of all characters can be tested with \p and \P, or, alternatively, the
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \p and \P, or, alternatively, the
 PCRE2_UCP option can be set when a pattern is compiled; this causes \w and
 friends to use Unicode property support instead of the built-in tables.
 PCRE2_UCP also causes upper/lower casing operations on characters with code
 points greater than 127 to use Unicode properties. These effects apply even
-when PCRE2_UTF is not set.
+when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see
+above) that can be used to modify or suppress them.
 </P>
 <P>
 The use of locales with Unicode is discouraged. If you are handling characters
@@ -2316,7 +2398,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
   PCRE2_INFO_LASTCODETYPE
 </pre>
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 <b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@@ -2543,7 +2625,9 @@ large enough to hold as many as are expected.
 A minimum of at least 1 pair is imposed by <b>pcre2_match_data_create()</b>, so
 it is always possible to return the overall matched string in the case of
 <b>pcre2_match()</b> or the longest match in the case of
-<b>pcre2_dfa_match()</b>.
+<b>pcre2_dfa_match()</b>. The maximum number of pairs is 65535; if the first
+argument of <b>pcre2_match_data_create()</b> is greater than this, 65535 is
+used.
 </P>
 <P>
 The second argument of <b>pcre2_match_data_create()</b> is a pointer to a
@@ -2591,7 +2675,44 @@ When a match data block itself is no longer needed, it should be freed by
 calling <b>pcre2_match_data_free()</b>. If this function is called with a NULL
 argument, it returns immediately, without doing anything.
 </P>
-<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
+<br><a name="SEC27" href="#TOC1">MEMORY USE FOR MATCH DATA BLOCKS</a><br>
+<P>
+<b>PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *<i>match_data</i>);</b>
+<br>
+<br>
+<b>PCRE2_SIZE pcre2_get_match_data_heapframes_size(</b>
+<b>  pcre2_match_data *<i>match_data</i>);</b>
+</P>
+<P>
+The size of a match data block depends on the size of the ovector that it
+contains. The function <b>pcre2_get_match_data_size()</b> returns the size, in
+bytes, of the block that is its argument.
+</P>
+<P>
+When <b>pcre2_match()</b> runs interpretively (that is, without using JIT), it
+makes use of a vector of data frames for remembering backtracking positions.
+The size of each individual frame depends on the number of capturing
+parentheses in the pattern and can be obtained by calling
+<b>pcre2_pattern_info()</b> with the PCRE2_INFO_FRAMESIZE option (see the
+section entitled "Information about a compiled pattern"
+<a href="#infoaboutpattern>">above).</a>
+</P>
+<P>
+Heap memory is used for the frames vector; if the initial memory block turns
+out to be too small during matching, it is automatically expanded. When
+<b>pcre2_match()</b> returns, the memory is not freed, but remains attached to
+the match data block, for use by any subsequent matches that use the same
+block. It is automatically freed when the match data block itself is freed.
+</P>
+<P>
+You can find the current size of the frames vector that a match data block owns
+by calling <b>pcre2_get_match_data_heapframes_size()</b>. For a newly created
+match data block the size will be zero. Some types of match may require a lot
+of frames and thus a large vector; applications that run in environments where
+memory is constrained can check this and free the match data block if the heap
+frames vector has become too big.
+</P>
+<br><a name="SEC28" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
 <P>
 <b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
 <b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -2640,7 +2761,9 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
 <i>startoffset</i>. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if <i>subject</i> is NULL and
+<i>length</i> is zero, the subject is assumed to be an empty string. If
+<i>length</i> is non-zero, an error occurs if <i>subject</i> is NULL.
 </P>
 <P>
 If <i>startoffset</i> is greater than the length of the subject,
@@ -2697,14 +2820,16 @@ Option bits for <b>pcre2_match()</b>
 <P>
 The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
 zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
-PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
+PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
+Their action is described below.
 </P>
 <P>
 Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
 the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
-interpretive code in <b>pcre2_match()</b> is run. Apart from PCRE2_NO_JIT
+interpretive code in <b>pcre2_match()</b> is run.
+PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT
 (obviously), the remaining options are supported for JIT matching.
 <pre>
   PCRE2_ANCHORED
@@ -2730,6 +2855,25 @@ the match block itself is used. The copy is automatically freed when
 <b>pcre2_match_data_free()</b> is called to free the match data block. It is also
 automatically freed if the match data block is re-used for another match
 operation.
+<pre>
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+</pre>
+This option is relevant only to <b>pcre2_match()</b> for interpretive matching.
+It is ignored when JIT is used, and is forbidden for <b>pcre2_dfa_match()</b>.
+</P>
+<P>
+The use of recursion in patterns can lead to infinite loops. In the
+interpretive matcher these would be eventually caught by the match or heap
+limits, but this could take a long time and/or use a lot of memory if the
+limits are large. There is therefore a check at the start of each recursion.
+If the same group is still active from a previous call, and the current subject
+pointer is the same as it was at the start of that group, and the furthest
+inspected character of the subject has not changed, an error is generated.
+</P>
+<P>
+There are rare cases of matches that would complete, but nevertheless trigger
+this error. This option disables the check. It is provided mainly for testing
+when comparing JIT and interpretive behaviour.
 <pre>
   PCRE2_ENDANCHORED
 </pre>
@@ -2858,7 +3002,7 @@ examples, in the
 <a href="pcre2partial.html"><b>pcre2partial</b></a>
 documentation.
 </P>
-<br><a name="SEC28" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
+<br><a name="SEC29" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
 <P>
 When PCRE2 is built, a default newline convention is set; this is usually the
 standard convention for the operating system. The default can be overridden in
@@ -2898,7 +3042,7 @@ does \s, even though it includes CR and LF in the characters that it matches.
 Notwithstanding the above, anomalous effects may still occur when CRLF is a
 valid newline sequence and explicit \r or \n escapes appear in the pattern.
 <a name="matchedstrings"></a></P>
-<br><a name="SEC29" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
+<br><a name="SEC30" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
 <P>
 <b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
 <br>
@@ -2985,8 +3129,8 @@ Offset values that correspond to unused groups at the end of the expression are
 also set to PCRE2_UNSET. For example, if the string "abc" is matched against
 the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the
 function is 2, because the highest used capture group number is 1. The offsets
-for for the second and third capture groupss (assuming the vector is large
-enough, of course) are set to PCRE2_UNSET.
+for the second and third capture groups (assuming the vector is large enough,
+of course) are set to PCRE2_UNSET.
 </P>
 <P>
 Elements in the ovector that do not correspond to capturing parentheses in the
@@ -2995,7 +3139,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
 <b>pcre2_match()</b>. The other elements retain whatever values they previously
 had. After a failed match attempt, the contents of the ovector are unchanged.
 <a name="matchotherdata"></a></P>
-<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
+<br><a name="SEC31" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
 <P>
 <b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
 <br>
@@ -3058,7 +3202,7 @@ the code unit offset of the invalid UTF character. Details are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
 <a name="errorlist"></a></P>
-<br><a name="SEC31" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
+<br><a name="SEC32" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
 <P>
 If <b>pcre2_match()</b> fails, it returns a negative number. This can be
 converted to a text string by calling the <b>pcre2_get_error_message()</b>
@@ -3144,11 +3288,11 @@ The backtracking match limit was reached.
 <pre>
   PCRE2_ERROR_NOMEMORY
 </pre>
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backtracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 <pre>
   PCRE2_ERROR_NULL
 </pre>
@@ -3165,7 +3309,7 @@ detected and faulted at compile time, but more complicated cases, in particular
 mutual recursions between two different groups, cannot be detected until
 matching is attempted.
 <a name="geterrormessage"></a></P>
-<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
+<br><a name="SEC33" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
 <P>
 <b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
 <b>  PCRE2_SIZE <i>bufflen</i>);</b>
@@ -3186,7 +3330,7 @@ returned. If the buffer is too small, the message is truncated (but still with
 a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
 None of the messages are very long; a buffer size of 120 code units is ample.
 <a name="extractbynumber"></a></P>
-<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
+<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
 <P>
 <b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
 <b>  uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
@@ -3283,13 +3427,13 @@ The substring did not participate in the match. For example, if the pattern is
 (abc)|(def) and the subject is "def", and the ovector contains at least two
 capturing slots, substring number 1 is unset.
 </P>
-<br><a name="SEC34" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
+<br><a name="SEC35" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
 <P>
 <b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
 <b>"  PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
 <br>
 <br>
-<b>void pcre2_substring_list_free(PCRE2_SPTR *<i>list</i>);</b>
+<b>void pcre2_substring_list_free(PCRE2_UCHAR **<i>list</i>);</b>
 </P>
 <P>
 The <b>pcre2_substring_list_get()</b> function extracts all available substrings
@@ -3322,7 +3466,7 @@ distinguished from a genuine zero-length substring by inspecting the
 appropriate offset in the ovector, which contain PCRE2_UNSET for unset
 substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
 <a name="extractbyname"></a></P>
-<br><a name="SEC35" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
+<br><a name="SEC36" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
 <P>
 <b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
 <b>  PCRE2_SPTR <i>name</i>);</b>
@@ -3382,7 +3526,7 @@ names are not included in the compiled code. The matching process uses only
 numbers. For this reason, the use of different names for groups with the
 same number causes an error at compile time.
 <a name="substitutions"></a></P>
-<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
+<br><a name="SEC37" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
 <P>
 <b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
 <b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -3394,12 +3538,17 @@ same number causes an error at compile time.
 <P>
 This function optionally calls <b>pcre2_match()</b> and then makes a copy of the
 subject string in <i>outputbuffer</i>, replacing parts that were matched with
-the <i>replacement</i> string, whose length is supplied in <b>rlength</b>. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the <i>replacement</i> string, whose length is supplied in <b>rlength</b>, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if <i>replacement</i> is NULL and <i>rlength</i> is zero, the
+replacement is assumed to be an empty string. If <i>rlength</i> is non-zero, an
+error occurs if <i>replacement</i> is NULL.
+</P>
+<P>
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 </P>
 <P>
 If successful, <b>pcre2_substitute()</b> returns the number of substitutions
@@ -3433,12 +3582,12 @@ block may or may not have been changed.
 As well as the usual options for <b>pcre2_match()</b>, a number of additional
 options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
 One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
-<i>match_data</i> block must be provided, and it must have been used for an
-external call to <b>pcre2_match()</b>. The data in the <i>match_data</i> block
-(return code, offset vector) is used for the first substitution instead of
-calling <b>pcre2_match()</b> from within <b>pcre2_substitute()</b>. This allows
-an application to check for a match before choosing to substitute, without
-having to repeat the match.
+<i>match_data</i> block must be provided, and it must have already been used for
+an external call to <b>pcre2_match()</b> with the same pattern and subject
+arguments. The data in the <i>match_data</i> block (return code, offset vector)
+is then used for the first substitution instead of calling <b>pcre2_match()</b>
+from within <b>pcre2_substitute()</b>. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
 </P>
 <P>
 The contents of the externally supplied match data block are not changed when
@@ -3501,7 +3650,8 @@ replacement string causes an immediate return with the relevant UTF error code.
 If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
 in any way. By default, however, a dollar character is an escape character that
 can specify the insertion of characters from capture groups and names from
-(*MARK) or other control verbs in the pattern. The following forms are always
+(*MARK) or other control verbs in the pattern. Dollar is the only escape
+character (backslash is treated as literal). The following forms are always
 recognized:
 <pre>
   $$                  insert a dollar character
@@ -3583,7 +3733,7 @@ and force lower case. The escape sequences change the current state: \U and
 terminating a \Q quoted sequence) reverts to no case forcing. The sequences
 \u and \l force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
+forcing. Case forcing applies to all inserted characters, including those from
 capture groups and letters within \Q...\E quoted sequences. If either
 PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
 properties are used for case forcing characters whose code points are greater
@@ -3655,7 +3805,9 @@ default.
 </P>
 <P>
 PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
-<i>match_data</i> argument is NULL.
+<i>match_data</i> argument is NULL or if the <i>subject</i> or <i>replacement</i>
+arguments are NULL. For backward compatibility reasons an exception is made for
+the <i>replacement</i> argument if the <i>rlength</i> argument is also 0.
 </P>
 <P>
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
@@ -3731,11 +3883,11 @@ PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next
 match. If the value is not zero, the current replacement is not accepted. If
 the value is greater than zero, processing continues when
 PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or
-PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is copied to the
+PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the
 output and the call to <b>pcre2_substitute()</b> exits, returning the number of
 matches so far.
 </P>
-<br><a name="SEC37" href="#TOC1">DUPLICATE CAPTURE GROUP NAMES</a><br>
+<br><a name="SEC38" href="#TOC1">DUPLICATE CAPTURE GROUP NAMES</a><br>
 <P>
 <b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
 <b>  PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
@@ -3781,7 +3933,7 @@ in the section entitled <i>Information about a pattern</i>. Given all the
 relevant entries for the name, you can extract each of their numbers, and hence
 the captured data.
 </P>
-<br><a name="SEC38" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
+<br><a name="SEC39" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
 <P>
 The traditional matching function uses a similar algorithm to Perl, which stops
 when it finds the first match at a given point in the subject. If you want to
@@ -3799,7 +3951,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
 other alternatives. Ultimately, when it runs out of matches,
 <b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
 <a name="dfamatch"></a></P>
-<br><a name="SEC39" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
+<br><a name="SEC40" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
 <P>
 <b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
 <b>  PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
@@ -3810,12 +3962,13 @@ other alternatives. Ultimately, when it runs out of matches,
 <P>
 The function <b>pcre2_dfa_match()</b> is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-<b>pcre2_dfa_match()</b> does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that <b>pcre2_dfa_match()</b> does
+not support, see the
 <a href="pcre2matching.html"><b>pcre2matching</b></a>
 documentation.
 </P>
@@ -3850,7 +4003,7 @@ Here is an example of a simple call to <b>pcre2_dfa_match()</b>:
 </PRE>
 </P>
 <br><b>
-Option bits for <b>pcre_dfa_match()</b>
+Option bits for <b>pcre2_dfa_match()</b>
 </b><br>
 <P>
 The unused bits of the <i>options</i> argument for <b>pcre2_dfa_match()</b> must
@@ -3991,13 +4144,13 @@ some plausibility checks are made on the contents of the workspace, which
 should contain data about the previous partial match. If any of these checks
 fail, this error is given.
 </P>
-<br><a name="SEC40" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC41" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
 <b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
 <b>pcre2sample</b>(3), <b>pcre2unicode</b>(3).
 </P>
-<br><a name="SEC41" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC42" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@@ -4006,11 +4159,11 @@ Retired from University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC42" href="#TOC1">REVISION</a><br>
+<br><a name="SEC43" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 27 January 2024
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2024 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 71 - 44
regex.mod/pcre/doc/html/pcre2build.html

@@ -24,21 +24,22 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC9" href="#SEC9">WHAT \R MATCHES</a>
 <li><a name="TOC10" href="#SEC10">HANDLING VERY LARGE PATTERNS</a>
 <li><a name="TOC11" href="#SEC11">LIMITING PCRE2 RESOURCE USAGE</a>
-<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
-<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
-<li><a name="TOC14" href="#SEC14">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a>
-<li><a name="TOC15" href="#SEC15">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
-<li><a name="TOC16" href="#SEC16">PCRE2GREP BUFFER SIZE</a>
-<li><a name="TOC17" href="#SEC17">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
-<li><a name="TOC18" href="#SEC18">INCLUDING DEBUGGING CODE</a>
-<li><a name="TOC19" href="#SEC19">DEBUGGING WITH VALGRIND SUPPORT</a>
-<li><a name="TOC20" href="#SEC20">CODE COVERAGE REPORTING</a>
-<li><a name="TOC21" href="#SEC21">DISABLING THE Z AND T FORMATTING MODIFIERS</a>
-<li><a name="TOC22" href="#SEC22">SUPPORT FOR FUZZERS</a>
-<li><a name="TOC23" href="#SEC23">OBSOLETE OPTION</a>
-<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
-<li><a name="TOC25" href="#SEC25">AUTHOR</a>
-<li><a name="TOC26" href="#SEC26">REVISION</a>
+<li><a name="TOC12" href="#SEC12">LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS</a>
+<li><a name="TOC13" href="#SEC13">CREATING CHARACTER TABLES AT BUILD TIME</a>
+<li><a name="TOC14" href="#SEC14">USING EBCDIC CODE</a>
+<li><a name="TOC15" href="#SEC15">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a>
+<li><a name="TOC16" href="#SEC16">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
+<li><a name="TOC17" href="#SEC17">PCRE2GREP BUFFER SIZE</a>
+<li><a name="TOC18" href="#SEC18">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a>
+<li><a name="TOC19" href="#SEC19">INCLUDING DEBUGGING CODE</a>
+<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
+<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
+<li><a name="TOC22" href="#SEC22">DISABLING THE Z AND T FORMATTING MODIFIERS</a>
+<li><a name="TOC23" href="#SEC23">SUPPORT FOR FUZZERS</a>
+<li><a name="TOC24" href="#SEC24">OBSOLETE OPTION</a>
+<li><a name="TOC25" href="#SEC25">SEE ALSO</a>
+<li><a name="TOC26" href="#SEC26">AUTHOR</a>
+<li><a name="TOC27" href="#SEC27">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
 <P>
@@ -118,7 +119,19 @@ one of
   --disable-shared
   --disable-static
 </pre>
-to the <b>configure</b> command.
+to the <b>configure</b> command. Setting --disable-shared ensures that PCRE2
+libraries are built as static libraries. The binaries that are then created as
+part of the build process (for example, <b>pcre2test</b> and <b>pcre2grep</b>)
+are linked statically with one or more PCRE2 libraries, but may also be
+dynamically linked with other libraries such as <b>libc</b>. If you want these
+binaries to be fully statically linked, you can set LDFLAGS like this:
+<br>
+<br>
+LDFLAGS=--static ./configure --disable-shared
+<br>
+<br>
+Note the two hyphens in --static. Of course, this works only if static versions
+of all the relevant libraries are available for linking.
 </P>
 <br><a name="SEC5" href="#TOC1">UNICODE AND UTF SUPPORT</a><br>
 <P>
@@ -142,8 +155,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \P, \p,
-and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
-supported. Details are given in the
+and \X. Only the general category properties such as <i>Lu</i> and <i>Nd</i>,
+script names, and some bi-directional properties are supported. Details are
+given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
@@ -283,12 +297,11 @@ to the <b>configure</b> command. This setting also applies to the
 counting is done differently).
 </P>
 <P>
-The <b>pcre2_match()</b> function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The <b>pcre2_match()</b> function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 <a href="pcre2api.html"><b>pcre2api</b></a>
 documentation. The default limit (in effect unlimited) is 20 million. You can
 change this by a setting such as
@@ -307,7 +320,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 <pre>
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 </pre>
 to the <b>configure</b> command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@@ -321,8 +334,22 @@ As well as applying to <b>pcre2_match()</b>, the depth limit also controls
 the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
 used for lookaround assertions, atomic groups, and recursion within patterns.
 The limit does not apply to JIT matching.
+</P>
+<br><a name="SEC12" href="#TOC1">LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS</a><br>
+<P>
+Lookbehind assertions in which one or more branches can match a variable number
+of characters are supported only if there is a maximum matching length for each
+top-level branch. There is a limit to this maximum that defaults to 255
+characters. You can alter this default by a setting such as
+<pre>
+  --with-max-varlookbehind=100
+</pre>
+The limit can be changed at runtime by calling
+<b>pcre2_set_max_varlookbehind()</b>. Lookbehind assertions in which every
+branch matches a fixed number of characters (not necessarily all the same) are
+not constrained by this limit.
 <a name="createtables"></a></P>
-<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
+<br><a name="SEC13" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
 <P>
 PCRE2 uses fixed tables for processing characters whose code points are less
 than 256. By default, PCRE2 is built with a set of tables that are distributed
@@ -361,7 +388,7 @@ just a string of bytes, independent of hardware characteristics such as
 endianness. This means they can be bundled with an application that runs in
 different environments, to ensure consistent behaviour.
 </P>
-<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
+<br><a name="SEC14" href="#TOC1">USING EBCDIC CODE</a><br>
 <P>
 PCRE2 assumes by default that it will run in an environment where the character
 code is ASCII or Unicode, which is a superset of ASCII. This is the case for
@@ -396,7 +423,7 @@ The options that select newline behaviour, such as --enable-newline-is-cr,
 and equivalent run-time options, refer to these character values in an EBCDIC
 environment.
 </P>
-<br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
+<br><a name="SEC15" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
 <P>
 By default <b>pcre2grep</b> supports the use of callouts with string arguments
 within the patterns it is matching. There are two kinds: one that generates
@@ -408,7 +435,7 @@ callouts, see the
 <a href="pcre2grep.html"><b>pcre2grep</b></a>
 documentation.
 </P>
-<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
+<br><a name="SEC16" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
 <P>
 By default, <b>pcre2grep</b> reads all files as plain text. You can build it so
 that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
@@ -421,7 +448,7 @@ to the <b>configure</b> command. These options naturally require that the
 relevant libraries are installed on your system. Configuration will fail if
 they are not.
 </P>
-<br><a name="SEC16" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
+<br><a name="SEC17" href="#TOC1">PCRE2GREP BUFFER SIZE</a><br>
 <P>
 <b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
 scanning, in order to be able to output "before" and "after" lines when it
@@ -439,7 +466,7 @@ default parameter values by adding, for example,
 to the <b>configure</b> command. The caller of <b>pcre2grep</b> can override
 these values by using --buffer-size and --max-buffer-size on the command line.
 </P>
-<br><a name="SEC17" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
+<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
 <P>
 If you add one of
 <pre>
@@ -457,7 +484,7 @@ with <b>libedit</b>, which has a BSD licence.
 <P>
 Setting --enable-pcre2test-libreadline causes the <b>-lreadline</b> option to be
 added to the <b>pcre2test</b> build. In many operating environments with a
-sytem-installed readline library this is sufficient. However, in some
+system-installed readline library this is sufficient. However, in some
 environments (e.g. if an unmodified distribution version of readline is in
 use), some extra configuration may be necessary. The INSTALL file for
 <b>libreadline</b> says this:
@@ -473,7 +500,7 @@ automatically included, you may need to add something like
 </pre>
 immediately before the <b>configure</b> command.
 </P>
-<br><a name="SEC18" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
+<br><a name="SEC19" href="#TOC1">INCLUDING DEBUGGING CODE</a><br>
 <P>
 If you add
 <pre>
@@ -482,7 +509,7 @@ If you add
 to the <b>configure</b> command, additional debugging code is included in the
 build. This feature is intended for use by the PCRE2 maintainers.
 </P>
-<br><a name="SEC19" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
+<br><a name="SEC20" href="#TOC1">DEBUGGING WITH VALGRIND SUPPORT</a><br>
 <P>
 If you add
 <pre>
@@ -492,7 +519,7 @@ to the <b>configure</b> command, PCRE2 will use valgrind annotations to mark
 certain memory regions as unaddressable. This allows it to detect invalid
 memory accesses, and is mostly useful for debugging PCRE2 itself.
 </P>
-<br><a name="SEC20" href="#TOC1">CODE COVERAGE REPORTING</a><br>
+<br><a name="SEC21" href="#TOC1">CODE COVERAGE REPORTING</a><br>
 <P>
 If your C compiler is gcc, you can build a version of PCRE2 that can generate a
 code coverage report for its test suite. To enable this, you must install
@@ -549,7 +576,7 @@ This cleans all coverage data including the generated coverage report. For more
 information about code coverage, see the <b>gcov</b> and <b>lcov</b>
 documentation.
 </P>
-<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
+<br><a name="SEC22" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
 <P>
 The C99 standard defines formatting modifiers z and t for size_t and
 ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
@@ -564,7 +591,7 @@ support these modifiers. If
 is specified, no use is made of the z or t modifiers. Instead of %td or %zu,
 a suitable format is used depending in the size of long for the platform.
 </P>
-<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
+<br><a name="SEC23" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
 <P>
 There is a special option for use by people who want to run fuzzing tests on
 PCRE2:
@@ -588,7 +615,7 @@ arguments: if an argument starts with "=" the rest of it is a literal input
 string. Otherwise, it is assumed to be a file name, and the contents of the
 file are the test string.
 </P>
-<br><a name="SEC23" href="#TOC1">OBSOLETE OPTION</a><br>
+<br><a name="SEC24" href="#TOC1">OBSOLETE OPTION</a><br>
 <P>
 In versions of PCRE2 prior to 10.30, there were two ways of handling
 backtracking in the <b>pcre2_match()</b> function. The default was to use the
@@ -600,24 +627,24 @@ was set, memory on the heap was used. From release 10.30 onwards this has
 changed (the stack is no longer used) and this option now does nothing except
 give a warning.
 </P>
-<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2api</b>(3), <b>pcre2-config</b>(3).
 </P>
-<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC26" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC26" href="#TOC1">REVISION</a><br>
+<br><a name="SEC27" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 20 March 2020
+Last updated: 24 November 2023
 <br>
-Copyright &copy; 1997-2020 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 9 - 9
regex.mod/pcre/doc/html/pcre2callout.html

@@ -350,12 +350,12 @@ The <i>next_item_length</i> field contains the length of the next item to be
 processed in the pattern string. When the callout is at the end of the pattern,
 the length is zero. When the callout precedes an opening parenthesis, the
 length includes meta characters that follow the parenthesis. For example, in a
-callout before an assertion such as (?=ab) the length is 3. For an an
-alternation bar or a closing parenthesis, the length is one, unless a closing
-parenthesis is followed by a quantifier, in which case its length is included.
-(This changed in release 10.23. In earlier releases, before an opening
-parenthesis the length was that of the entire group, and before an alternation
-bar or a closing parenthesis the length was zero.)
+callout before an assertion such as (?=ab) the length is 3. For an alternation
+bar or a closing parenthesis, the length is one, unless a closing parenthesis
+is followed by a quantifier, in which case its length is included. (This
+changed in release 10.23. In earlier releases, before an opening parenthesis
+the length was that of the entire group, and before an alternation bar or a
+closing parenthesis the length was zero.)
 </P>
 <P>
 The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
@@ -464,16 +464,16 @@ value, scanning the pattern stops, and that value is returned from
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 19 January 2024
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2024 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 84 - 65
regex.mod/pcre/doc/html/pcre2compat.html

@@ -16,35 +16,48 @@ please consult the man page, in case the conversion went wrong.
 DIFFERENCES BETWEEN PCRE2 AND PERL
 </b><br>
 <P>
-This document describes some of the differences in the ways that PCRE2 and Perl
-handle regular expressions. The differences described here are with respect to
-Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the
-information may at times be out of date.
+This document describes some of the known differences in the ways that PCRE2
+and Perl handle regular expressions. The differences described here are with
+respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually
+changing, the information may at times be out of date.
 </P>
 <P>
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+</P>
+<P>
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 page.
 </P>
 <P>
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these
-do not seem to have any use. PCRE2 does not allow any kind of quantifier on
-non-lookaround assertions.
+for example, \b* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
+</P>
+<P>
+4. If a braced quantifier such as {1,2} appears where there is nothing to
+repeat (for example, at the start of a branch), PCRE2 raises an error whereas
+Perl treats the quantifier characters as literal.
 </P>
 <P>
-3. Capture groups that occur inside negative lookaround assertions are counted,
+5. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
 Perl may set such capture groups in other circumstances.
 </P>
 <P>
-4. The following Perl escape sequences are not supported: \F, \l, \L, \u,
+6. The following Perl escape sequences are not supported: \F, \l, \L, \u,
 \U, and \N when followed by a character name. \N on its own, matching a
 non-newline character, and \N{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@@ -55,26 +68,27 @@ PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript
 interprets them.
 </P>
 <P>
-5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
+7. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \p and \P are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use
-is limited. See the
+Nd, the derived properties Any and LC (synonym L&), script names such as Greek
+or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and
+Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See
+the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation for details. The long synonyms for property names that Perl
 supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted
 to prefix any of these properties with "Is".
 </P>
 <P>
-6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
+8. PCRE2 supports the \Q...\E escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \Q and \E which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \Q and \E just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \Q
+and \E which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \Q and \E just like any other character. Note the
+following examples:
 <pre>
     Pattern            PCRE2 matches     Perl matches
 
@@ -88,19 +102,19 @@ The \Q...\E sequence is recognized both inside and outside character classes
 by both PCRE2 and Perl.
 </P>
 <P>
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 <a href="pcre2callout.html"><b>pcre2callout</b></a>
 documentation for details.
 </P>
 <P>
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
-to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
-into subroutine calls is now supported, as in Perl.
+10. Subroutine calls (whether recursive or not) were treated as atomic groups
+up to PCRE2 release 10.23, but from release 10.30 this changed, and
+backtracking into subroutine calls is now supported, as in Perl.
 </P>
 <P>
-9. In PCRE2, if any of the backtracking control verbs are used in a group that
+11. In PCRE2, if any of the backtracking control verbs are used in a group that
 is called as a subroutine (whether or not recursively), their effect is
 confined to that group; it does not extend to the surrounding pattern. This is
 not always the case in Perl. In particular, if (*THEN) is present in a group
@@ -109,20 +123,20 @@ the group does not contain any | characters. Note that such groups are
 processed as anchored at the point where they are tested.
 </P>
 <P>
-10. If a pattern contains more than one backtracking control verb, the first
+12. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 </P>
 <P>
-11. There are some differences that are concerned with the settings of captured
+13. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 "b".
 </P>
 <P>
-12. PCRE2's handling of duplicate capture group numbers and names is not as
+14. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
 names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b&#62;B)), where the two
@@ -132,107 +146,112 @@ to distinguish which group matched, because both names map to capture group
 number 1. To avoid this confusing situation, an error is given at compile time.
 </P>
 <P>
-13. Perl used to recognize comments in some places that PCRE2 does not, for
+15. Perl used to recognize comments in some places that PCRE2 does not, for
 example, between the ( and ? at the start of a group. If the /x modifier is
 set, Perl allowed white space between ( and ? though the latest Perls give an
 error (for a while it was just deprecated). There may still be some cases where
 Perl behaves differently.
 </P>
 <P>
-14. Perl, when in warning mode, gives warnings for character classes such as
+16. Perl, when in warning mode, gives warnings for character classes such as
 [A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
 warning features, so it gives an error in these cases because they are almost
 certainly user mistakes.
 </P>
 <P>
-15. In PCRE2, the upper/lower case character properties Lu and Ll are not
+17. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \p{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all
+in the release at the time of writing (5.38), \p{Lu} and \p{Ll} match all
 letters, regardless of case, when case independence is specified.
 </P>
 <P>
-16. From release 5.32.0, Perl locks out the use of \K in lookaround
+18. From release 5.32.0, Perl locks out the use of \K in lookaround
 assertions. From release 10.38 PCRE2 does the same by default. However, there
 is an option for re-enabling the previous behaviour. When this option is set,
 \K is acted on when it occurs in positive assertions, but is ignored in
 negative assertions.
 </P>
 <P>
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
+19. PCRE2 provides some extensions to the Perl regular expression facilities.
 Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.32:
-<br>
-<br>
-(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
-each alternative toplevel branch of a lookbehind assertion can match a
-different length of string. Perl requires them all to have the same length.
-<br>
-<br>
-(b) From PCRE2 10.23, backreferences to groups of fixed length are supported
-in lookbehinds, provided that there is no possibility of referencing a
-non-unique number or name. Perl does not support backreferences in lookbehinds.
+list is with respect to Perl 5.38:
 <br>
 <br>
-(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
+(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
 meta-character matches only at the very end of the string.
 <br>
 <br>
-(d) A backslash followed by a letter with no special meaning is faulted. (Perl
+(b) A backslash followed by a letter with no special meaning is faulted. (Perl
 can be made to issue a warning.)
 <br>
 <br>
-(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
+(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
 inverted, that is, by default they are not greedy, but if followed by a
 question mark they are.
 <br>
 <br>
-(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
+(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
 only at the first matching position in the subject string.
 <br>
 <br>
-(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART
+(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART
 options have no Perl equivalents.
 <br>
 <br>
-(h) The \R escape sequence can be restricted to match only CR, LF, or CRLF
+(f) The \R escape sequence can be restricted to match only CR, LF, or CRLF
 by the PCRE2_BSR_ANYCRLF option.
 <br>
 <br>
-(i) The callout facility is PCRE2-specific. Perl supports codeblocks and
+(g) The callout facility is PCRE2-specific. Perl supports codeblocks and
 variable interpolation, but not general hooks on every match.
 <br>
 <br>
-(j) The partial matching facility is PCRE2-specific.
+(h) The partial matching facility is PCRE2-specific.
 <br>
 <br>
-(k) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
+(i) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
 different way and is not Perl-compatible.
 <br>
 <br>
-(l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
+(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
 the start of a pattern. These set overall options that cannot be changed within
 the pattern.
 <br>
 <br>
-(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
+(k) PCRE2 supports non-atomic positive lookaround assertions. This is an
 extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
+<br>
+<br>
+(l) There are three syntactical items in patterns that can refer to a capturing
+group by number: back references such as \g{2}, subroutine calls such as (?3),
+and condition references such as (?(4)...). PCRE2 supports relative group
+numbers such as +2 and -4 in all three cases. Perl supports both plus and minus
+for subroutine calls, but only minus for back references, and no relative
+numbering at all for conditions.
 </P>
 <P>
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
-modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
-rules. This separation cannot be represented with PCRE2_UCP.
-</P>
-<P>
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 <a href="pcre2limit.html"><b>pcre2limit</b></a>
 documentation for details. Perl went with 5.10 from recursion to iteration
 keeping the intermediate matches on the heap, which is ~10% slower but does not
 fall into any stack-overflow limit. PCRE2 made a similar change at release
 10.30, and also has many build-time and run-time customizable limits.
 </P>
+<P>
+21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way
+to set characters by context just like Perl's "/d". A regular expression using
+PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer
+to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top.
+</P>
+<P>
+22. Some recursive patterns that Perl diagnoses as infinite recursions can be
+handled by PCRE2, either by the interpreter or the JIT. An example is
+/(?:|(?0)abcd)(?(R)|\z)/, which matches a sequence of any number of repeated
+"abcd" substrings at the end of the subject.
+</P>
 <br><b>
 AUTHOR
 </b><br>
@@ -248,9 +267,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 30 November 2023
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 1 - 1
regex.mod/pcre/doc/html/pcre2convert.html

@@ -175,7 +175,7 @@ neither do POSIX extended patterns).
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>

+ 3 - 2
regex.mod/pcre/doc/html/pcre2demo.html

@@ -12,8 +12,9 @@ This page is part of the PCRE2 HTML documentation. It was generated
 automatically from the original man page. If there is any nonsense in it,
 please consult the man page, in case the conversion went wrong.
 <br>
-<ul>
-</ul>
+<br><b>
+SOURCE CODE
+</b><br>
 <PRE>
 /*************************************************
 *           PCRE2 DEMONSTRATION PROGRAM          *

+ 191 - 128
regex.mod/pcre/doc/html/pcre2grep.html

@@ -21,7 +21,7 @@ please consult the man page, in case the conversion went wrong.
 <li><a name="TOC6" href="#SEC6">OPTIONS</a>
 <li><a name="TOC7" href="#SEC7">ENVIRONMENT VARIABLES</a>
 <li><a name="TOC8" href="#SEC8">NEWLINES</a>
-<li><a name="TOC9" href="#SEC9">OPTIONS COMPATIBILITY</a>
+<li><a name="TOC9" href="#SEC9">OPTIONS COMPATIBILITY WITH GNU GREP</a>
 <li><a name="TOC10" href="#SEC10">OPTIONS WITH DATA</a>
 <li><a name="TOC11" href="#SEC11">USING PCRE2'S CALLOUT FACILITY</a>
 <li><a name="TOC12" href="#SEC12">MATCHING ERRORS</a>
@@ -71,13 +71,16 @@ For example:
 <pre>
   pcre2grep some-pattern file1 - file3
 </pre>
-Input files are searched line by line. By default, each line that matches a
-pattern is copied to the standard output, and if there is more than one file,
-the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how <b>pcre2grep</b> behaves. In
-particular, the <b>-M</b> option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-<b>-N</b> (<b>--newline</b>) option.
+By default, input files are searched line by line, so pattern assertions about
+the beginning and end of a subject string (^, $, \A, \Z, and \z) match at
+the beginning and end of each line. When a line matches a pattern, it is copied
+to the standard output, and if there is more than one file, the file name is
+output at the start of each line, followed by a colon. However, there are
+options that can change how <b>pcre2grep</b> behaves. For example, the <b>-M</b>
+option makes it possible to search for strings that span line boundaries. What
+defines a line boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
+The <b>-h</b> and <b>-H</b> options control whether or not file names are shown,
+and the <b>-Z</b> option changes the file name terminator to a zero byte.
 </P>
 <P>
 The amount of memory used for buffering files that are being scanned is
@@ -97,6 +100,10 @@ allow for buffering "before" and "after" lines. If the buffer size is too
 small, fewer than requested "before" and "after" lines may be output.
 </P>
 <P>
+When matching with a multiline pattern, the size of the buffer must be at least
+half of the maximum match expected or the pattern might fail to match.
+</P>
+<P>
 Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
 BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
 (specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
@@ -106,19 +113,24 @@ patterns are tried before the <b>-f</b> patterns.
 <P>
 By default, as soon as one pattern matches a line, no further patterns are
 considered. However, if <b>--colour</b> (or <b>--color</b>) is used to colour the
-matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>, or
-<b>--line-offsets</b> is used to output only the part of the line that matched
-(either shown literally, or as an offset), scanning resumes immediately
-following the match, so that further matches on the same line can be found. If
-there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier
-matched part of the line.
+matching substrings, or if <b>--only-matching</b>, <b>--file-offsets</b>,
+<b>--line-offsets</b>, or <b>--output</b> is used to output only the part of the
+line that matched (either shown literally, or as an offset), the behaviour is
+different. In this situation, all the patterns are applied to the line. If
+there is more than one match, the one that begins nearest to the start of the
+subject is processed; if there is more than one match at that position, the one
+with the longest matching substring is processed; if the matching substrings
+are equal, the first match found is processed.
+</P>
+<P>
+Scanning with all the patterns resumes immediately following the match, so that
+later matches on the same line can be found. Note, however, that an overlapping
+match that starts in the middle of another match will not be processed.
 </P>
 <P>
-This behaviour means that the order in which multiple patterns are specified
-can affect the output when one of the above options is used. This is no longer
-the same behaviour as GNU grep, which now manages to display earlier matches
-for later patterns (as long as there is no overlap).
+The above behaviour was changed at release 10.41 to be more compatible with GNU
+grep. In earlier releases, <b>pcre2grep</b> did not recognize matches from
+later patterns that were earlier in the subject.
 </P>
 <P>
 Patterns that can match an empty string are accepted, but empty string
@@ -134,14 +146,15 @@ The <b>--locale</b> option can be used to override this.
 </P>
 <br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
 <P>
-It is possible to compile <b>pcre2grep</b> so that it uses <b>libz</b> or
-<b>libbz2</b> to read compressed files whose names end in <b>.gz</b> or
+Compile-time options for <b>pcre2grep</b> can set it up to use <b>libz</b> or
+<b>libbz2</b> for reading compressed files whose names end in <b>.gz</b> or
 <b>.bz2</b>, respectively. You can find out whether your <b>pcre2grep</b> binary
 has support for one or both of these file types by running it with the
 <b>--help</b> option. If the appropriate support is not present, all files are
-treated as plain text. The standard input is always so treated. When input is
-from a compressed .gz or .bz2 file, the <b>--line-buffered</b> option is
-ignored.
+treated as plain text. The standard input is always so treated. If a file with
+a <b>.gz</b> or <b>.bz2</b> extension is not in fact compressed, it is read as a
+plain text file. When input is from a compressed .gz or .bz2 file, the
+<b>--line-buffered</b> option is ignored.
 </P>
 <br><a name="SEC4" href="#TOC1">BINARY FILES</a><br>
 <P>
@@ -178,9 +191,11 @@ Output up to <i>number</i> lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of <i>number</i>
-is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
+context lines (the <b>-Z</b> option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+<i>number</i> is expected to be relatively small. When <b>-c</b> is used,
+<b>-A</b> is ignored.
 </P>
 <P>
 <b>-a</b>, <b>--text</b>
@@ -199,9 +214,10 @@ Output up to <i>number</i> lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 <i>number</i> lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of <i>number</i> is expected to be relatively small. When
+instead of a colon for the context lines (the <b>-Z</b> option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of <i>number</i> is expected to be relatively small. When
 <b>-c</b> is used, <b>-B</b> is ignored.
 </P>
 <P>
@@ -238,7 +254,7 @@ exactly the same as the number of lines that would have been output, but if the
 suppressed lines than the count (that is, the number of matches).
 <br>
 <br>
-If no lines are selected, the number zero is output. If several files are are
+If no lines are selected, the number zero is output. If several files are
 being scanned, a count is output for each of them and the <b>-t</b> option can
 be used to cause a total to be output at the end. However, if the
 <b>--files-with-matches</b> option is also used, only those files whose counts
@@ -254,12 +270,14 @@ equals sign.
 <P>
 <b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
 This option specifies under what circumstances the parts of a line that matched
-a pattern should be coloured in the output. By default, the output is not
-coloured. The value (which is optional, see above) may be "never", "always", or
-"auto". In the latter case, colouring happens only if the standard output is
-connected to a terminal. More resources are used when colouring is enabled,
-because <b>pcre2grep</b> has to search for all possible matches in a line, not
-just one, in order to colour them all.
+a pattern should be coloured in the output. It is ignored if
+<b>--file-offsets</b>, <b>--line-offsets</b>, or <b>--output</b> is set. By
+default, output is not coloured. The value for the <b>--colour</b> option (which
+is optional, see above) may be "never", "always", or "auto". In the latter
+case, colouring happens only if the standard output is connected to a terminal.
+More resources are used when colouring is enabled, because <b>pcre2grep</b> has
+to search for all possible matches in a line, not just one, in order to colour
+them all.
 <br>
 <br>
 The colour that is used can be specified by setting one of the environment
@@ -301,24 +319,26 @@ end-of-file; in others it may provoke an error.
 See <b>--match-limit</b> below.
 </P>
 <P>
+<b>-E</b>, <b>--case-restrict</b>
+When case distinctions are being ignored in Unicode mode, two ASCII letters (K
+and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F
+(long S) respectively, as well as their lower case ASCII counterparts. When
+this option is set, case equivalences are restricted such that no ASCII
+character matches a non-ASCII character, and vice versa.
+</P>
+<P>
 <b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
 Specify a pattern to be matched. This option can be used multiple times in
 order to specify several patterns. It can also be used as a way of specifying a
 single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
 pattern is taken from the command line; all arguments are treated as file
 names. There is no limit to the number of patterns. They are applied to each
-line in the order in which they are defined until one matches.
+line in the order in which they are defined.
 <br>
 <br>
 If <b>-f</b> is used with <b>-e</b>, the command line patterns are matched first,
 followed by the patterns from the file(s), independent of the order in which
-these options are specified. Note that multiple use of <b>-e</b> is not the same
-as a single pattern with alternatives. For example, X|Y finds the first
-character in a line that is X or Y, whereas if the two patterns are given
-separately, with X first, <b>pcre2grep</b> finds X if it is present, even if it
-follows Y in the line. It finds Y only if there is no X in the line. This
-matters only if you are using <b>-o</b> or <b>--colo(u)r</b> to show the part(s)
-of the line that matched.
+these options are specified.
 </P>
 <P>
 <b>--exclude</b>=<i>pattern</i>
@@ -367,23 +387,20 @@ files; it does not apply to patterns specified by any of the <b>--include</b> or
 </P>
 <P>
 <b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
-Read patterns from the file, one per line, and match them against each line of
-input. As is the case with patterns on the command line, no delimiters should
-be used. What constitutes a newline when reading the file is the operating
-system's default interpretation of \n. The <b>--newline</b> option has no
-effect on this option. Trailing white space is removed from each line, and
-blank lines are ignored. An empty file contains no patterns and therefore
-matches nothing. Patterns read from a file in this way may contain binary
-zeros, which are treated as ordinary data characters. See also the comments
-about multiple patterns versus a single pattern with alternatives in the
-description of <b>-e</b> above.
+Read patterns from the file, one per line. As is the case with patterns on the
+command line, no delimiters should be used. What constitutes a newline when
+reading the file is the operating system's default interpretation of \n. The
+<b>--newline</b> option has no effect on this option. Trailing white space is
+removed from each line, and blank lines are ignored. An empty file contains no
+patterns and therefore matches nothing. Patterns read from a file in this way
+may contain binary zeros, which are treated as ordinary data characters.
 <br>
 <br>
 If this option is given more than once, all the specified files are read. A
 data line is output if any of the patterns match it. A file name can be given
 as "-" to refer to the standard input. When <b>-f</b> is used, patterns
 specified on the command line using <b>-e</b> may also be present; they are
-tested before the file's patterns. However, no other pattern is taken from the
+matched before the file's patterns. However, no pattern is taken from the
 command line; all arguments are treated as the names of paths to be searched.
 </P>
 <P>
@@ -403,28 +420,35 @@ specified files are read.
 <b>--file-offsets</b>
 Instead of showing lines or parts of lines that match, show each match as an
 offset from the start of the file and a length, separated by a comma. In this
-mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
-options are ignored. If there is more than one match in a line, each of them is
-shown separately. This option is mutually exclusive with <b>--output</b>,
-<b>--line-offsets</b>, and <b>--only-matching</b>.
+mode, <b>--colour</b> has no effect, and no context is shown. That is, the
+<b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is more than one
+match in a line, each of them is shown separately. This option is mutually
+exclusive with <b>--output</b>, <b>--line-offsets</b>, and <b>--only-matching</b>.
+</P>
+<P>
+<b>--group-separator</b>=<i>text</i>
+Output this text string instead of two hyphens between groups of lines when
+<b>-A</b>, <b>-B</b>, or <b>-C</b> is in use. See also <b>--no-group-separator</b>.
 </P>
 <P>
 <b>-H</b>, <b>--with-filename</b>
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the <b>-M</b> option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The <b>-Z</b> option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the <b>-M</b> option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous <b>-h</b>, <b>-l</b>, or <b>-L</b> options.
 </P>
 <P>
 <b>-h</b>, <b>--no-filename</b>
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The <b>-Z</b> option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous <b>-H</b>, <b>-L</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>--heap-limit</b>=<i>number</i>
@@ -443,7 +467,9 @@ Ignore binary files. This is equivalent to
 </P>
 <P>
 <b>-i</b>, <b>--ignore-case</b>
-Ignore upper/lower case distinctions during comparisons.
+Ignore upper/lower case distinctions when pattern matching. This applies when
+matching path names for inclusion or exclusion as well as when matching lines
+in files.
 </P>
 <P>
 <b>--include</b>=<i>pattern</i>
@@ -481,18 +507,20 @@ given any number of times. If a directory matches both <b>--include-dir</b> and
 <b>-L</b>, <b>--files-without-match</b>
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous <b>-H</b>,
-<b>-h</b>, or <b>-l</b> options.
+output once, on a separate line by default, but if the <b>-Z</b> option is set,
+they are separated by zero bytes instead of newlines. This option overrides any
+previous <b>-H</b>, <b>-h</b>, or <b>-l</b> options.
 </P>
 <P>
 <b>-l</b>, <b>--files-with-matches</b>
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the <b>-c</b> (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-<b>-c</b> is a way of suppressing the listing of files with no matches that
+a separate line, but if the <b>-Z</b> option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the <b>-c</b> (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with <b>-c</b> is a way of suppressing the listing of files with no matches that
 occurs with <b>-c</b> on its own. This option overrides any previous <b>-H</b>,
 <b>-h</b>, or <b>-L</b> options.
 </P>
@@ -520,11 +548,11 @@ ceases to work. When input is from a compressed .gz or .bz2 file,
 Instead of showing lines or parts of lines that match, show each match as a
 line number, the offset from the start of the line, and a length. The line
 number is terminated by a colon (as usual; see the <b>-n</b> option), and the
-offset and length are separated by a comma. In this mode, no context is shown.
-That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
-more than one match in a line, each of them is shown separately. This option is
-mutually exclusive with <b>--output</b>, <b>--file-offsets</b>, and
-<b>--only-matching</b>.
+offset and length are separated by a comma. In this mode, <b>--colour</b> has no
+effect, and no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with <b>--output</b>,
+<b>--file-offsets</b>, and <b>--only-matching</b>.
 </P>
 <P>
 <b>--locale</b>=<i>locale-name</i>
@@ -536,16 +564,24 @@ used. There is no short form for this option.
 <P>
 <b>-M</b>, <b>--multiline</b>
 Allow patterns to match more than one line. When this option is set, the PCRE2
-library is called in "multiline" mode. This allows a matched string to extend
-past the end of a line and continue on one or more subsequent lines. Patterns
-used with <b>-M</b> may usefully contain literal newline characters and internal
-occurrences of ^ and $ characters. The output for a successful match may
-consist of more than one line. The first line is the line in which the match
-started, and the last line is the line in which the match ended. If the matched
-string ends with a newline sequence, the output ends at the end of that line.
-If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
-match has been handled, scanning restarts at the beginning of the line after
-the one in which the match ended.
+library is called in "multiline" mode, and a match is allowed to continue past
+the end of the initial line and onto one or more subsequent lines.
+<br>
+<br>
+Patterns used with <b>-M</b> may usefully contain literal newline characters and
+internal occurrences of ^ and $ characters, because in multiline mode these can
+match at internal newlines. Because <b>pcre2grep</b> is scanning multiple lines,
+the \Z and \z assertions match only at the end of the last line in the file.
+The \A assertion matches at the start of the first line of a match. This can
+be any line in the file; it is not anchored to the first line.
+<br>
+<br>
+The output for a successful match may consist of more than one line. The first
+line is the line in which the match started, and the last line is the line in
+which the match ended. If the matched string ends with a newline sequence, the
+output ends at the end of that line. If <b>-v</b> is set, none of the lines in a
+multi-line match are output. Once a match has been handled, scanning restarts
+at the beginning of the line after the one in which the match ended.
 <br>
 <br>
 The newline sequence that separates multiple lines must be matched as part of
@@ -562,8 +598,11 @@ well as possibly handling a two-character newline sequence.
 <br>
 There is a limit to the number of lines that can be matched, imposed by the way
 that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
-large processing buffer, this should not be a problem, but the <b>-M</b> option
-does not work when input is read line by line (see <b>--line-buffered</b>.)
+large processing buffer, this should not be a problem.
+<br>
+<br>
+The <b>-M</b> option does not work when input is read line by line (see
+<b>--line-buffered</b>.)
 </P>
 <P>
 <b>-m</b> <i>number</i>, <b>--max-count</b>=<i>number</i>
@@ -592,10 +631,7 @@ value set by <b>--match-limit</b> is reached, an error occurs.
 <br>
 <br>
 The <b>--heap-limit</b> option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 <br>
 <br>
 The <b>--depth-limit</b> option limits the depth of nested backtracking points,
@@ -656,23 +692,29 @@ pattern to match more than one line, only the first is preceded by its line
 number. This option is forced if <b>--line-offsets</b> is used.
 </P>
 <P>
+<b>--no-group-separator</b>
+Do not output a separator between groups of lines when <b>-A</b>, <b>-B</b>, or
+<b>-C</b> is in use. The default is to output a line containing two hyphens. See
+also <b>--group-separator</b>.
+</P>
+<P>
 <b>--no-jit</b>
 If the PCRE2 library is built with support for just-in-time compiling (which
 speeds up matching), <b>pcre2grep</b> automatically makes use of this, unless it
 was explicitly disabled at build time. This option can be used to disable the
-use of JIT at run time. It is provided for testing and working round problems.
+use of JIT at run time. It is provided for testing and working around problems.
 It should never be needed in normal use.
 </P>
 <P>
 <b>-O</b> <i>text</i>, <b>--output</b>=<i>text</i>
 When there is a match, instead of outputting the line that matched, output just
 the text specified in this option, followed by an operating-system standard
-newline. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>,
-and <b>-C</b> options are ignored. The <b>--newline</b> option has no effect on
-this option, which is mutually exclusive with <b>--only-matching</b>,
-<b>--file-offsets</b>, and <b>--line-offsets</b>. However, like
-<b>--only-matching</b>, if there is more than one match in a line, each of them
-causes a line of output.
+newline. In this mode, <b>--colour</b> has no effect, and no context is shown.
+That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. The
+<b>--newline</b> option has no effect on this option, which is mutually
+exclusive with <b>--only-matching</b>, <b>--file-offsets</b>, and
+<b>--line-offsets</b>. However, like <b>--only-matching</b>, if there is more
+than one match in a line, each of them causes a line of output.
 <br>
 <br>
 Escape sequences starting with a dollar character may be used to insert the
@@ -754,6 +796,18 @@ Specify a separating string for multiple occurrences of <b>-o</b>. The default
 is an empty string. Separating strings are never coloured.
 </P>
 <P>
+<b>-P</b>, <b>--no-ucp</b>
+Starting from release 10.43, when UTF/Unicode mode is specified with <b>-u</b>
+or <b>-U</b>, the PCRE2_UCP option is used by default. This means that the
+POSIX classes in patterns match more than just ASCII characters. For example,
+[:digit:] matches any Unicode decimal digit. The <b>--no-ucp</b> option
+suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters,
+as was the case in earlier releases. Note that there are now more fine-grained
+option settings within patterns that affect individual classes. For example,
+when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while
+allowing \w to match Unicode letters and digits.
+</P>
+<P>
 <b>-q</b>, <b>--quiet</b>
 Work quietly, that is, display nothing except error messages. The exit
 status indicates whether or not any matches were found.
@@ -791,11 +845,11 @@ total would always be zero.
 </P>
 <P>
 <b>-u</b>, <b>--utf</b>
-Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
-with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
-<b>--include</b> options) and all lines that are scanned must be valid strings
-of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
-occurs.
+Operate in UTF/Unicode mode. This option is available only if PCRE2 has been
+compiled with UTF-8 support. All patterns (including those for any
+<b>--exclude</b> and <b>--include</b> options) and all lines that are scanned
+must be valid strings of UTF-8 characters. If an invalid UTF-8 string is
+encountered, an error occurs.
 </P>
 <P>
 <b>-U</b>, <b>--utf-allow-invalid</b>
@@ -839,6 +893,13 @@ pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the <b>--include</b> or <b>--exclude</b> options.
 </P>
+<P>
+<b>-Z</b>, <b>--null</b>
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
+</P>
 <br><a name="SEC7" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
 <P>
 The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
@@ -871,25 +932,27 @@ ends of output lines that are copied from the input is not converted to
 standard output must end with "\r\n". For all other operating systems, and
 for all messages to the standard error stream, "\n" is used.
 </P>
-<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
+<br><a name="SEC9" href="#TOC1">OPTIONS COMPATIBILITY WITH GNU GREP</a><br>
 <P>
-Many of the short and long forms of <b>pcre2grep</b>'s options are the same
-as in the GNU <b>grep</b> program. Any long option of the form
-<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
-(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
-<b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
-<b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
-<b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
-<b>--output</b>, <b>-u</b>, <b>--utf</b>, <b>-U</b>, and <b>--utf-allow-invalid</b>
-options are specific to <b>pcre2grep</b>, as is the use of the
-<b>--only-matching</b> option with a capturing parentheses number.
+Many of the short and long forms of <b>pcre2grep</b>'s options are the same as
+in the GNU <b>grep</b> program. Any long option of the form <b>--xxx-regexp</b>
+(GNU terminology) is also available as <b>--xxx-regex</b> (PCRE2 terminology).
+However, the <b>--case-restrict</b>, <b>--depth-limit</b>, <b>-E</b>,
+<b>--file-list</b>, <b>--file-offsets</b>, <b>--heap-limit</b>,
+<b>--include-dir</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>,
+<b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--no-ucp</b>,
+<b>--om-separator</b>, <b>--output</b>, <b>-P</b>, <b>-u</b>, <b>--utf</b>,
+<b>-U</b>, and <b>--utf-allow-invalid</b> options are specific to
+<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
+capturing parentheses number.
 </P>
 <P>
 Although most of the common options work the same way, a few are different in
 <b>pcre2grep</b>. For example, the <b>--include</b> option's argument is a glob
-for GNU <b>grep</b>, but a regular expression for <b>pcre2grep</b>. If both the
-<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
-without counts, but <b>pcre2grep</b> gives the counts as well.
+for GNU <b>grep</b>, but in <b>pcre2grep</b> it is a regular expression to which
+the <b>-i</b> option applies. If both the <b>-c</b> and <b>-l</b> options are
+given, GNU grep lists only file names, without counts, but <b>pcre2grep</b>
+gives the counts as well.
 </P>
 <br><a name="SEC10" href="#TOC1">OPTIONS WITH DATA</a><br>
 <P>
@@ -1053,9 +1116,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC16" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 31 August 2021
+Last updated: 22 December 2023
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 67 - 46
regex.mod/pcre/doc/html/pcre2jit.html

@@ -43,7 +43,7 @@ one-off matches. JIT support is available for all of the 8-bit, 16-bit and
 <P>
 JIT support applies only to the traditional Perl-compatible matching function.
 It does not apply when the DFA matching function is being used. The code for
-this support was written by Zoltan Herczeg.
+JIT support was written by Zoltan Herczeg.
 </P>
 <br><a name="SEC2" href="#TOC1">AVAILABILITY OF JIT SUPPORT</a><br>
 <P>
@@ -56,19 +56,33 @@ platforms:
   ARM 64-bit
   IBM s390x 64 bit
   Intel x86 32-bit and 64-bit
+  LoongArch 64 bit
   MIPS 32-bit and 64-bit
   Power PC 32-bit and 64-bit
-  SPARC 32-bit
+  RISC-V 32-bit and 64-bit
 </pre>
 If --enable-jit is set on an unsupported platform, compilation fails.
 </P>
 <P>
-A program can tell if JIT support is available by calling <b>pcre2_config()</b>
-with the PCRE2_CONFIG_JIT option. The result is 1 when JIT is available, and 0
-otherwise. However, a simple program does not need to check this in order to
-use JIT. The API is implemented in a way that falls back to the interpretive
-code if JIT is not available. For programs that need the best possible
-performance, there is also a "fast path" API that is JIT-specific.
+A client program can tell if JIT support is available by calling
+<b>pcre2_config()</b> with the PCRE2_CONFIG_JIT option. The result is one if
+PCRE2 was built with JIT support, and zero otherwise. However, having the JIT
+code available does not guarantee that it will be used for any particular
+match. One reason for this is that there are a number of options and pattern
+items that are
+<a href="#unsupported">not supported by JIT</a>
+(see below). Another reason is that in some environments JIT is unable to get
+memory in which to build its compiled code. The only guarantee from
+<b>pcre2_config()</b> is that if it returns zero, JIT will definitely <i>not</i>
+be used.
+</P>
+<P>
+A simple program does not need to check availability in order to use JIT when
+possible. The API is implemented in a way that falls back to the interpretive
+code if JIT is not available or cannot be used for a given match. For programs
+that need the best possible performance, there is a
+<a href="#fastpath">"fast path"</a>
+API that is JIT-specific.
 </P>
 <br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
 <P>
@@ -127,9 +141,10 @@ below.
 <P>
 There are some <b>pcre2_match()</b> options that are not supported by JIT, and
 there are also some pattern items that JIT cannot handle. Details are given
-below. In both cases, matching automatically falls back to the interpretive
-code. If you want to know whether JIT was actually used for a particular match,
-you should arrange for a JIT callback function to be set up as described in the
+<a href="#unsupported">below.</a>
+In both cases, matching automatically falls back to the interpretive code. If
+you want to know whether JIT was actually used for a particular match, you
+should arrange for a JIT callback function to be set up as described in the
 section entitled
 <a href="#stackcontrol">"Controlling the JIT stack"</a>
 below, even if you do not need to supply a non-default JIT stack. Such a
@@ -139,12 +154,14 @@ not obeyed.
 </P>
 <P>
 If the JIT compiler finds an unsupported item, no JIT data is generated. You
-can find out if JIT matching is available after compiling a pattern by calling
-<b>pcre2_pattern_info()</b> with the PCRE2_INFO_JITSIZE option. A non-zero
-result means that JIT compilation was successful. A result of 0 means that JIT
-support is not available, or the pattern was not processed by
+can find out if JIT compilation was successful for a compiled pattern by
+calling <b>pcre2_pattern_info()</b> with the PCRE2_INFO_JITSIZE option. A
+non-zero result means that JIT compilation was successful. A result of 0 means
+that JIT support is not available, or the pattern was not processed by
 <b>pcre2_jit_compile()</b>, or the JIT compiler was not able to handle the
-pattern.
+pattern. Successful JIT compilation does not, however, guarantee the use of JIT
+at match time because there are some match time options that are not supported
+by JIT.
 </P>
 <br><a name="SEC4" href="#TOC1">MATCHING SUBJECTS CONTAINING INVALID UTF</a><br>
 <P>
@@ -154,15 +171,16 @@ checked at the start of matching and an error is generated if invalid UTF is
 detected. The PCRE2_NO_UTF_CHECK option can be passed to <b>pcre2_match()</b> to
 skip the check (for improved performance) if you are sure that a subject string
 is valid. If this option is used with an invalid string, the result is
-undefined.
+undefined. The calling program may crash or loop or otherwise misbehave.
 </P>
 <P>
 However, a way of running matches on strings that may contain invalid UTF
 sequences is available. Calling <b>pcre2_compile()</b> with the
 PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in
 <b>pcre2_match()</b> to support invalid UTF, and, if <b>pcre2_jit_compile()</b>
-is called, the compiled JIT code also supports invalid UTF. Details of how this
-support works, in both the JIT and the interpretive cases, is given in the
+is subsequently called, the compiled JIT code also supports invalid UTF.
+Details of how this support works, in both the JIT and the interpretive cases,
+is given in the
 <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
 documentation.
 </P>
@@ -171,7 +189,7 @@ There is also an obsolete option for <b>pcre2_jit_compile()</b> called
 PCRE2_JIT_INVALID_UTF, which currently exists only for backward compatibility.
 It is superseded by the <b>pcre2_compile()</b> option PCRE2_MATCH_INVALID_UTF
 and should no longer be used. It may be removed in future.
-</P>
+<a name="unsupported"></a></P>
 <br><a name="SEC5" href="#TOC1">UNSUPPORTED OPTIONS AND PATTERN ITEMS</a><br>
 <P>
 The <b>pcre2_match()</b> options that are supported for JIT matching are
@@ -191,10 +209,10 @@ in a conditional group.
 </P>
 <br><a name="SEC6" href="#TOC1">RETURN VALUES FROM JIT MATCHING</a><br>
 <P>
-When a pattern is matched using JIT matching, the return values are the same
-as those given by the interpretive <b>pcre2_match()</b> code, with the addition
-of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory
-used for the JIT stack was insufficient. See
+When a pattern is matched using JIT, the return values are the same as those
+given by the interpretive <b>pcre2_match()</b> code, with the addition of one
+new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for
+the JIT stack was insufficient. See
 <a href="#stackcontrol">"Controlling the JIT stack"</a>
 below for a discussion of JIT stack usage.
 </P>
@@ -269,11 +287,11 @@ starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 </P>
 <P>
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 </P>
 <P>
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
@@ -363,7 +381,7 @@ pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the
 stack is freed?
 <br>
 <br>
-Especially on embedded sytems, it might be a good idea to release memory
+Especially on embedded systems, it might be a good idea to release memory
 sometimes without freeing the stack. There is no API for this at the moment.
 Probably a function call which returns with the currently allocated memory for
 any stack and another which allows releasing memory (shrinking the stack) would
@@ -382,8 +400,8 @@ out this complicated API.
 <b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
 </P>
 <P>
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@@ -416,7 +434,7 @@ calls.
   pcre2_match_context_free(mcontext);
   pcre2_jit_stack_free(jit_stack);
 
-</PRE>
+<a name="fastpath"></a></PRE>
 </P>
 <br><a name="SEC11" href="#TOC1">JIT FAST PATH API</a><br>
 <P>
@@ -433,19 +451,22 @@ processed by <b>pcre2_jit_compile()</b>).
 The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
 the same arguments as <b>pcre2_match()</b>. However, the subject string must be
 specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported
-option bits (for example, PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is the PCRE2_NO_JIT option. The
-return values are also the same as for <b>pcre2_match()</b>, plus
-PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
-that was not compiled.
+option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as
+is the PCRE2_NO_JIT option. The return values are also the same as for
+<b>pcre2_match()</b>, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
+or complete) is requested that was not compiled.
 </P>
 <P>
 When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not
+set for <b>pcre2_compile()</b>, the result is undefined. The program may crash
+or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you
+should call <b>pcre2_jit_match()</b> in UTF mode only if you are sure the
+subject is valid.
 </P>
 <P>
 Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
@@ -453,22 +474,22 @@ speedups of more than 10%.
 </P>
 <br><a name="SEC12" href="#TOC1">SEE ALSO</a><br>
 <P>
-<b>pcre2api</b>(3)
+<b>pcre2api</b>(3), <b>pcre2unicode</b>(3)
 </P>
 <br><a name="SEC13" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel (FAQ by Zoltan Herczeg)
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC14" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 23 May 2019
+Last updated: 23 January 2023
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 14 - 4
regex.mod/pcre/doc/html/pcre2limits.html

@@ -47,7 +47,12 @@ and unset offsets.
 All values in repeating quantifiers must be less than 65536.
 </P>
 <P>
-The maximum length of a lookbehind assertion is 65535 characters.
+There are two different limits that apply to branches of lookbehind assertions.
+If every branch in such an assertion matches a fixed number of characters,
+the maximum length of any branch is 65535 characters. If any branch matches a
+variable number of characters, then the maximum matching length for every
+branch is limited. The default limit is set at compile time, defaulting to 255,
+but can be changed by the calling program.
 </P>
 <P>
 There is no limit to the number of parenthesized groups, but there can be no
@@ -71,13 +76,18 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
 </P>
+<P>
+The maximum amount of heap memory used for matching is controlled by the heap
+limit, which can be set in a pattern or in a match context. The default is a
+very large number, effectively unlimited.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@@ -86,9 +96,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 02 February 2019
+Last updated: August 2023
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 3 - 3
regex.mod/pcre/doc/html/pcre2matching.html

@@ -27,7 +27,7 @@ please consult the man page, in case the conversion went wrong.
 This document describes the two different algorithms that are available in
 PCRE2 for matching a compiled regular expression against a given subject
 string. The "standard" algorithm is the one provided by the <b>pcre2_match()</b>
-function. This works in the same as as Perl's matching function, and provide a
+function. This works in the same as Perl's matching function, and provide a
 Perl-compatible matching operation. The just-in-time (JIT) optimization that is
 described in the
 <a href="pcre2jit.html"><b>pcre2jit</b></a>
@@ -244,9 +244,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC8" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 28 August 2021
+Last updated: 19 January 2024
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2024 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 1 - 1
regex.mod/pcre/doc/html/pcre2partial.html

@@ -392,7 +392,7 @@ can then try a new match starting at offset <i>n+1</i> in the first buffer.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>

File diff suppressed because it is too large
+ 327 - 342
regex.mod/pcre/doc/html/pcre2pattern.html


+ 28 - 9
regex.mod/pcre/doc/html/pcre2perform.html

@@ -83,12 +83,31 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code.
+</P>
+<P>
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+</P>
+<P>
+Until release 10.41, an initial 20KiB frames vector was allocated on the system
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to <b>pcre2_match()</b>. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+</P>
+<P>
+The size of the initial block is the larger of 20KiB or ten times the pattern's
+frame size, unless the heap limit is less than this, in which case the heap
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is
+checked only when a new block is to be allocated. Reducing the heap limit
+between calls to <b>pcre2_match()</b> with the same match data block does not
+affect the saved block.
 </P>
 <P>
 In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@@ -245,16 +264,16 @@ pattern to match. This is done by repeatedly matching with different limits.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC6" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 03 February 2019
+Last updated: 27 July 2022
 <br>
-Copyright &copy; 1997-2019 University of Cambridge.
+Copyright &copy; 1997-2022 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 34 - 11
regex.mod/pcre/doc/html/pcre2posix.html

@@ -53,9 +53,14 @@ documentation for a description of PCRE2's native API, which contains much
 additional functionality.
 </P>
 <P>
-The functions described here are wrapper functions that ultimately call the
-PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
-file, and they all have unique names starting with <b>pcre2_</b>. However, the
+<b>IMPORTANT NOTE</b>: The functions described here are NOT thread-safe, and
+should not be used in multi-threaded applications. They are also limited to
+processing subjects that are not bigger than 2GB. Use the native API instead.
+</P>
+<P>
+These functions are wrapper functions that ultimately call the PCRE2 native
+API. Their prototypes are defined in the <b>pcre2posix.h</b> header file, and
+they all have unique names starting with <b>pcre2_</b>. However, the
 <b>pcre2posix.h</b> header also contains macro definitions that convert the
 standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
 means that a program can use the usual POSIX names without running the risk of
@@ -68,7 +73,13 @@ application. Because the POSIX functions call the native ones, it is also
 necessary to add <b>-lpcre2-8</b>.
 </P>
 <P>
-Although they were not defined as protypes in <b>pcre2posix.h</b>, releases
+On Windows systems, if you are linking to a DLL version of the library, it is
+recommended that <b>PCRE2POSIX_SHARED</b> is defined before including the
+<b>pcre2posix.h</b> header, as it will allow for a more efficient way to
+invoke the functions by adding the <b>__declspec(dllimport)</b> decorator.
+</P>
+<P>
+Although they were not defined as prototypes in <b>pcre2posix.h</b>, releases
 10.33 to 10.36 of the library contained functions with the POSIX names
 <b>regcomp()</b> etc. These simply passed their arguments to the PCRE2
 functions. These functions were provided for backwards compatibility with
@@ -87,6 +98,11 @@ captured substrings. It also defines some constants whose names start with
 </P>
 <br><a name="SEC3" href="#TOC1">USING THE POSIX FUNCTIONS</a><br>
 <P>
+Note that these functions are just POSIX-style wrappers for PCRE2's native API.
+They do not give POSIX regular expression behaviour, and they are not
+thread-safe or even POSIX compatible.
+</P>
+<P>
 Those POSIX option bits that can reasonably be mapped to PCRE2 native options
 have been implemented. In addition, the option REG_EXTENDED is defined with the
 value zero. This has no effect, but since programs that are written to the
@@ -117,8 +133,10 @@ The function <b>pcre2_regcomp()</b> is called to compile a pattern into an
 internal form. By default, the pattern is a C string terminated by a binary
 zero (but see REG_PEND below). The <i>preg</i> argument is a pointer to a
 <b>regex_t</b> structure that is used as a base for storing information about
-the compiled regular expression. (It is also used for input when REG_PEND is
-set.)
+the compiled regular expression. It is also used for input when REG_PEND is
+set. The <b>regex_t</b> structure used by <b>pcre2_regcomp()</b> is defined in
+<b>pcre2posix.h</b> and is not the same as the structure used by other libraries
+that provide POSIX-style matching.
 </P>
 <P>
 The argument <i>cflags</i> is either zero, or contains one or more of the bits
@@ -171,7 +189,7 @@ caution in software intended to be portable to other systems.
 </pre>
 The PCRE2_UCP option is set when the regular expression is passed for
 compilation to the native function. This causes PCRE2 to use Unicode properties
-when matchine \d, \w, etc., instead of just recognizing ASCII values. Note
+when matching \d, \w, etc., instead of just recognizing ASCII values. Note
 that REG_UCP is not part of the POSIX standard.
 <pre>
   REG_UNGREEDY
@@ -189,7 +207,7 @@ is not part of the POSIX standard.
 </P>
 <P>
 In the absence of these flags, no options are passed to the native function.
-This means the the regex is compiled with PCRE2 default semantics. In
+This means that the regex is compiled with PCRE2 default semantics. In
 particular, the way it handles newline characters in the subject string is the
 Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only
 <i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
@@ -315,6 +333,11 @@ the capturing subpatterns of the regular expression. Unused entries in the
 array have both structure members set to -1.
 </P>
 <P>
+<i>regmatch_t</i> as well as the <i>regoff_t</i> typedef it uses are defined in
+<b>pcre2posix.h</b> and are not warranted to have the same size or layout as other
+similarly named types from other libraries that provide POSIX-style matching.
+</P>
+<P>
 A successful match yields a zero return; various error codes are defined in the
 header file, of which REG_NOMATCH is the "expected" failure code.
 </P>
@@ -340,16 +363,16 @@ expression.
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
 </P>
 <br><a name="SEC10" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 26 April 2021
+Last updated: 19 January 2024
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2024 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 1 - 1
regex.mod/pcre/doc/html/pcre2sample.html

@@ -92,7 +92,7 @@ AUTHOR
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>

+ 7 - 8
regex.mod/pcre/doc/html/pcre2serialize.html

@@ -23,12 +23,12 @@ please consult the man page, in case the conversion went wrong.
 <br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS</a><br>
 <P>
 <b>int32_t pcre2_serialize_decode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, const uint32_t *<i>bytes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, const uint8_t *<i>bytes</i>,</b>
 <b>  pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
-<b>int32_t pcre2_serialize_encode(pcre2_code **<i>codes</i>,</b>
-<b>  int32_t <i>number_of_codes</i>, uint32_t **<i>serialized_bytes</i>,</b>
+<b>int32_t pcre2_serialize_encode(const pcre2_code **<i>codes</i>,</b>
+<b>  int32_t <i>number_of_codes</i>, uint8_t **<i>serialized_bytes</i>,</b>
 <b>  PCRE2_SIZE *<i>serialized_size</i>, pcre2_general_context *<i>gcontext</i>);</b>
 <br>
 <br>
@@ -88,13 +88,13 @@ being a pointer to a vector of pointers to compiled patterns, and the length of
 the vector. The third and fourth arguments point to variables which are set to
 point to the created byte stream and its length, respectively. The final
 argument is a pointer to a general context, which can be used to specify custom
-memory mangagement functions. If this argument is NULL, <b>malloc()</b> is used
+memory management functions. If this argument is NULL, <b>malloc()</b> is used
 to obtain memory for the byte stream. The yield of the function is the number
 of serialized patterns, or one of the following negative error codes:
 <pre>
   PCRE2_ERROR_BADDATA      the number of patterns is zero or less
   PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
   PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
   PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 </pre>
@@ -150,11 +150,10 @@ the compiled patterns in new memory blocks, setting pointers to them in a
 vector. The first two arguments are a pointer to a suitable vector and its
 length, and the third argument points to a byte stream. The final argument is a
 pointer to a general context, which can be used to specify custom memory
-mangagement functions for the decoded patterns. If this argument is NULL,
+management functions for the decoded patterns. If this argument is NULL,
 <b>malloc()</b> and <b>free()</b> are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 <pre>
-  int32_t number_of_codes;
   pcre2_code *list_of_codes[2];
   uint8_t *bytes = &#60;serialized data&#62;;
   int32_t number_of_codes =
@@ -197,7 +196,7 @@ save/restore cycle. You can, however, process a restored pattern with
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>

+ 158 - 229
regex.mod/pcre/doc/html/pcre2syntax.html

@@ -15,33 +15,36 @@ please consult the man page, in case the conversion went wrong.
 <ul>
 <li><a name="TOC1" href="#SEC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a>
 <li><a name="TOC2" href="#SEC2">QUOTING</a>
-<li><a name="TOC3" href="#SEC3">ESCAPED CHARACTERS</a>
-<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
-<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
-<li><a name="TOC6" href="#SEC6">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
-<li><a name="TOC7" href="#SEC7">SCRIPT NAMES FOR \p AND \P</a>
-<li><a name="TOC8" href="#SEC8">CHARACTER CLASSES</a>
-<li><a name="TOC9" href="#SEC9">QUANTIFIERS</a>
-<li><a name="TOC10" href="#SEC10">ANCHORS AND SIMPLE ASSERTIONS</a>
-<li><a name="TOC11" href="#SEC11">REPORTED MATCH POINT SETTING</a>
-<li><a name="TOC12" href="#SEC12">ALTERNATION</a>
-<li><a name="TOC13" href="#SEC13">CAPTURING</a>
-<li><a name="TOC14" href="#SEC14">ATOMIC GROUPS</a>
-<li><a name="TOC15" href="#SEC15">COMMENT</a>
-<li><a name="TOC16" href="#SEC16">OPTION SETTING</a>
-<li><a name="TOC17" href="#SEC17">NEWLINE CONVENTION</a>
-<li><a name="TOC18" href="#SEC18">WHAT \R MATCHES</a>
-<li><a name="TOC19" href="#SEC19">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
-<li><a name="TOC20" href="#SEC20">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
-<li><a name="TOC21" href="#SEC21">SCRIPT RUNS</a>
-<li><a name="TOC22" href="#SEC22">BACKREFERENCES</a>
-<li><a name="TOC23" href="#SEC23">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
-<li><a name="TOC24" href="#SEC24">CONDITIONAL PATTERNS</a>
-<li><a name="TOC25" href="#SEC25">BACKTRACKING CONTROL</a>
-<li><a name="TOC26" href="#SEC26">CALLOUTS</a>
-<li><a name="TOC27" href="#SEC27">SEE ALSO</a>
-<li><a name="TOC28" href="#SEC28">AUTHOR</a>
-<li><a name="TOC29" href="#SEC29">REVISION</a>
+<li><a name="TOC3" href="#SEC3">BRACED ITEMS</a>
+<li><a name="TOC4" href="#SEC4">ESCAPED CHARACTERS</a>
+<li><a name="TOC5" href="#SEC5">CHARACTER TYPES</a>
+<li><a name="TOC6" href="#SEC6">GENERAL CATEGORY PROPERTIES FOR \p and \P</a>
+<li><a name="TOC7" href="#SEC7">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a>
+<li><a name="TOC8" href="#SEC8">BINARY PROPERTIES FOR \p AND \P</a>
+<li><a name="TOC9" href="#SEC9">SCRIPT MATCHING WITH \p AND \P</a>
+<li><a name="TOC10" href="#SEC10">THE BIDI_CLASS PROPERTY FOR \p AND \P</a>
+<li><a name="TOC11" href="#SEC11">CHARACTER CLASSES</a>
+<li><a name="TOC12" href="#SEC12">QUANTIFIERS</a>
+<li><a name="TOC13" href="#SEC13">ANCHORS AND SIMPLE ASSERTIONS</a>
+<li><a name="TOC14" href="#SEC14">REPORTED MATCH POINT SETTING</a>
+<li><a name="TOC15" href="#SEC15">ALTERNATION</a>
+<li><a name="TOC16" href="#SEC16">CAPTURING</a>
+<li><a name="TOC17" href="#SEC17">ATOMIC GROUPS</a>
+<li><a name="TOC18" href="#SEC18">COMMENT</a>
+<li><a name="TOC19" href="#SEC19">OPTION SETTING</a>
+<li><a name="TOC20" href="#SEC20">NEWLINE CONVENTION</a>
+<li><a name="TOC21" href="#SEC21">WHAT \R MATCHES</a>
+<li><a name="TOC22" href="#SEC22">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
+<li><a name="TOC23" href="#SEC23">NON-ATOMIC LOOKAROUND ASSERTIONS</a>
+<li><a name="TOC24" href="#SEC24">SCRIPT RUNS</a>
+<li><a name="TOC25" href="#SEC25">BACKREFERENCES</a>
+<li><a name="TOC26" href="#SEC26">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
+<li><a name="TOC27" href="#SEC27">CONDITIONAL PATTERNS</a>
+<li><a name="TOC28" href="#SEC28">BACKTRACKING CONTROL</a>
+<li><a name="TOC29" href="#SEC29">CALLOUTS</a>
+<li><a name="TOC30" href="#SEC30">SEE ALSO</a>
+<li><a name="TOC31" href="#SEC31">AUTHOR</a>
+<li><a name="TOC32" href="#SEC32">REVISION</a>
 </ul>
 <br><a name="SEC1" href="#TOC1">PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
 <P>
@@ -55,15 +58,27 @@ documentation. This document contains a quick-reference summary of the syntax.
 <pre>
   \x         where x is non-alphanumeric is a literal x
   \Q...\E    treat enclosed characters as literal
-</PRE>
+</pre>
+Note that white space inside \Q...\E is always treated as literal, even if
+PCRE2_EXTENDED is set, causing most other white space to be ignored.
+</P>
+<br><a name="SEC3" href="#TOC1">BRACED ITEMS</a><br>
+<P>
+With one exception, wherever brace characters { and } are required to enclose
+data for constructions such as \g{2} or \k{name}, space and/or horizontal tab
+characters that follow { or precede } are allowed and are ignored. In the case
+of quantifiers, they may also appear before or after the comma. The exception
+is \u{...} which is not Perl-compatible and is recognized only when
+PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and
+follows ECMAScript's behaviour.
 </P>
-<br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
+<br><a name="SEC4" href="#TOC1">ESCAPED CHARACTERS</a><br>
 <P>
 This table applies to ASCII and Unicode environments. An unrecognized escape
 sequence causes an error.
 <pre>
   \a         alarm, that is, the BEL character (hex 07)
-  \cx        "control-x", where x is any ASCII printing character
+  \cx        "control-x", where x is a non-control ASCII character
   \e         escape (hex 1B)
   \f         form feed (hex 0C)
   \n         newline (hex 0A)
@@ -101,7 +116,7 @@ also given. \N{U+hh..} is synonymous with \x{hh..} in PCRE2 but is not
 supported in EBCDIC environments. Note that \N not followed by an opening
 curly bracket has a different meaning (see below).
 </P>
-<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
+<br><a name="SEC5" href="#TOC1">CHARACTER TYPES</a><br>
 <P>
 <pre>
   .          any character except newline;
@@ -134,9 +149,15 @@ or in the 16-bit and 32-bit libraries. However, if locale-specific matching is
 happening, \s and \w may also match characters with code points in the range
 128-255. If the PCRE2_UCP option is set, the behaviour of these escape
 sequences is changed to use Unicode properties and they match many more
-characters.
+characters, but there are some option settings that can restrict individual
+sequences to matching only ASCII characters.
 </P>
-<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
+<P>
+Property descriptions in \p and \P are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
+</P>
+<br><a name="SEC6" href="#TOC1">GENERAL CATEGORY PROPERTIES FOR \p and \P</a><br>
 <P>
 <pre>
   C          Other
@@ -152,6 +173,7 @@ characters.
   Lo         Other letter
   Lt         Title case letter
   Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
   L&         Ll, Lu, or Lt
 
   M          Mark
@@ -185,184 +207,71 @@ characters.
   Zs         Space separator
 </PRE>
 </P>
-<br><a name="SEC6" href="#TOC1">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
+<br><a name="SEC7" href="#TOC1">PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P</a><br>
 <P>
 <pre>
   Xan        Alphanumeric: union of properties L and N
   Xps        POSIX space: property Z or tab, NL, VT, FF, CR
   Xsp        Perl space: property Z or tab, NL, VT, FF, CR
-  Xuc        Univerally-named character: one that can be
+  Xuc        Universally-named character: one that can be
                represented by a Universal Character Name
   Xwd        Perl word: property Xan or underscore
 </pre>
 Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 </P>
-<br><a name="SEC7" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
-<P>
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cypro_Minoan,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Old_Uyghur,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangsa,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Toto,
-Ugaritic,
-Vai,
-Vithkuqi,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
-</P>
-<br><a name="SEC8" href="#TOC1">CHARACTER CLASSES</a><br>
+<br><a name="SEC8" href="#TOC1">BINARY PROPERTIES FOR \p AND \P</a><br>
+<P>
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\p and \P, along with their abbreviations, by running this command:
+<pre>
+  pcre2test -LP
+</PRE>
+</P>
+<br><a name="SEC9" href="#TOC1">SCRIPT MATCHING WITH \p AND \P</a><br>
+<P>
+Many script names and their 4-letter abbreviations are recognized in
+\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of
+course). You can obtain a list of these scripts by running this command:
+<pre>
+  pcre2test -LS
+</PRE>
+</P>
+<br><a name="SEC10" href="#TOC1">THE BIDI_CLASS PROPERTY FOR \p AND \P</a><br>
+<P>
+<pre>
+  \p{Bidi_Class:&#60;class&#62;}   matches a character with the given class
+  \p{BC:&#60;class&#62;}           matches a character with the given class
+</pre>
+The recognized classes are:
+<pre>
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+</PRE>
+</P>
+<br><a name="SEC11" href="#TOC1">CHARACTER CLASSES</a><br>
 <P>
 <pre>
   [...]       positive character class
@@ -390,7 +299,7 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
 but some of them use Unicode properties if PCRE2_UCP is set. You can use
 \Q...\E inside a character class.
 </P>
-<br><a name="SEC9" href="#TOC1">QUANTIFIERS</a><br>
+<br><a name="SEC12" href="#TOC1">QUANTIFIERS</a><br>
 <P>
 <pre>
   ?           0 or 1, greedy
@@ -409,9 +318,12 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
   {n,}        n or more, greedy
   {n,}+       n or more, possessive
   {n,}?       n or more, lazy
+  {,m}        zero up to m, greedy
+  {,m}+       zero up to m, possessive
+  {,m}?       zero up to m, lazy
 </PRE>
 </P>
-<br><a name="SEC10" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
+<br><a name="SEC13" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
 <P>
 <pre>
   \b          word boundary
@@ -429,7 +341,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
   \G          first matching position in subject
 </PRE>
 </P>
-<br><a name="SEC11" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
+<br><a name="SEC14" href="#TOC1">REPORTED MATCH POINT SETTING</a><br>
 <P>
 <pre>
   \K          set reported start of match
@@ -439,13 +351,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
 option is set, the previous behaviour is re-enabled. When this option is set,
 \K is honoured in positive assertions, but ignored in negative ones.
 </P>
-<br><a name="SEC12" href="#TOC1">ALTERNATION</a><br>
+<br><a name="SEC15" href="#TOC1">ALTERNATION</a><br>
 <P>
 <pre>
   expr|expr|expr...
 </PRE>
 </P>
-<br><a name="SEC13" href="#TOC1">CAPTURING</a><br>
+<br><a name="SEC16" href="#TOC1">CAPTURING</a><br>
 <P>
 <pre>
   (...)           capture group
@@ -460,35 +372,47 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
 in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
 both cases, a name must not start with a digit.
 </P>
-<br><a name="SEC14" href="#TOC1">ATOMIC GROUPS</a><br>
+<br><a name="SEC17" href="#TOC1">ATOMIC GROUPS</a><br>
 <P>
 <pre>
   (?&#62;...)         atomic non-capture group
   (*atomic:...)   atomic non-capture group
 </PRE>
 </P>
-<br><a name="SEC15" href="#TOC1">COMMENT</a><br>
+<br><a name="SEC18" href="#TOC1">COMMENT</a><br>
 <P>
 <pre>
   (?#....)        comment (not nestable)
 </PRE>
 </P>
-<br><a name="SEC16" href="#TOC1">OPTION SETTING</a><br>
+<br><a name="SEC19" href="#TOC1">OPTION SETTING</a><br>
 <P>
 Changes of these options within a group are automatically cancelled at the end
 of the group.
 <pre>
+  (?a)            all ASCII options
+  (?aD)           restrict \d to ASCII in UCP mode
+  (?aS)           restrict \s to ASCII in UCP mode
+  (?aW)           restrict \w to ASCII in UCP mode
+  (?aP)           restrict all POSIX classes to ASCII in UCP mode
+  (?aT)           restrict POSIX digit classes to ASCII in UCP mode
   (?i)            caseless
   (?J)            allow duplicate named groups
   (?m)            multiline
   (?n)            no auto capture
+  (?r)            restrict caseless to either ASCII or non-ASCII
   (?s)            single line (dotall)
   (?U)            default ungreedy (lazy)
-  (?x)            extended: ignore white space except in classes
+  (?x)            ignore white space except in classes or \Q...\E
   (?xx)           as (?x) but also ignore space and tab in classes
-  (?-...)         unset option(s)
-  (?^)            unset imnsx options
+  (?-...)         unset the given option(s)
+  (?^)            unset imnrsx options
 </pre>
+(?aP) implies (?aT) as well, though this has no additional effect. However, it
+means that (?-aP) is really (?-PT) which disables all ASCII restrictions for
+POSIX classes.
+</P>
+<P>
 Unsetting x or xx unsets both. Several options may be set at once, and a
 mixture of setting and unsetting such as (?i-x) is allowed, but there may be
 only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
@@ -518,7 +442,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The
 application can lock out the use of (*UTF) and (*UCP) by setting the
 PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.
 </P>
-<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
+<br><a name="SEC20" href="#TOC1">NEWLINE CONVENTION</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 settings with a similar syntax.
@@ -531,7 +455,7 @@ settings with a similar syntax.
   (*NUL)          the NUL character (binary zero)
 </PRE>
 </P>
-<br><a name="SEC18" href="#TOC1">WHAT \R MATCHES</a><br>
+<br><a name="SEC21" href="#TOC1">WHAT \R MATCHES</a><br>
 <P>
 These are recognized only at the very start of the pattern or after option
 setting with a similar syntax.
@@ -540,7 +464,7 @@ setting with a similar syntax.
   (*BSR_UNICODE)  any Unicode newline sequence
 </PRE>
 </P>
-<br><a name="SEC19" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
+<br><a name="SEC22" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
 <P>
 <pre>
   (?=...)                     )
@@ -559,9 +483,14 @@ setting with a similar syntax.
   (*nlb:...)                  ) negative lookbehind
   (*negative_lookbehind:...)  )
 </pre>
-Each top-level branch of a lookbehind must be of a fixed length.
+Each top-level branch of a lookbehind must have a limit for the number of
+characters it matches. If any branch can match a variable number of characters,
+the maximum for each branch is limited to a value set by the caller of
+<b>pcre2_compile()</b> or defaulted. The default is set when PCRE2 is built
+(ultimate default 255). If every branch matches a fixed number of characters,
+the limit for each branch is 65535 characters.
 </P>
-<br><a name="SEC20" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
+<br><a name="SEC23" href="#TOC1">NON-ATOMIC LOOKAROUND ASSERTIONS</a><br>
 <P>
 These assertions are specific to PCRE2 and are not Perl-compatible.
 <pre>
@@ -574,7 +503,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
   (*non_atomic_positive_lookbehind:...)  )
 </PRE>
 </P>
-<br><a name="SEC21" href="#TOC1">SCRIPT RUNS</a><br>
+<br><a name="SEC24" href="#TOC1">SCRIPT RUNS</a><br>
 <P>
 <pre>
   (*script_run:...)           ) script run, can be backtracked into
@@ -584,7 +513,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
   (*asr:...)                  )
 </PRE>
 </P>
-<br><a name="SEC22" href="#TOC1">BACKREFERENCES</a><br>
+<br><a name="SEC25" href="#TOC1">BACKREFERENCES</a><br>
 <P>
 <pre>
   \n              reference by number (can be ambiguous)
@@ -601,7 +530,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
   (?P=name)       reference by name (Python)
 </PRE>
 </P>
-<br><a name="SEC23" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
+<br><a name="SEC26" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
 <P>
 <pre>
   (?R)            recurse whole pattern
@@ -620,15 +549,15 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
   \g'-n'          call subroutine by relative number (PCRE2 extension)
 </PRE>
 </P>
-<br><a name="SEC24" href="#TOC1">CONDITIONAL PATTERNS</a><br>
+<br><a name="SEC27" href="#TOC1">CONDITIONAL PATTERNS</a><br>
 <P>
 <pre>
   (?(condition)yes-pattern)
   (?(condition)yes-pattern|no-pattern)
 
   (?(n)               absolute reference condition
-  (?(+n)              relative reference condition
-  (?(-n)              relative reference condition
+  (?(+n)              relative reference condition (PCRE2 extension)
+  (?(-n)              relative reference condition (PCRE2 extension)
   (?(&#60;name&#62;)          named reference condition (Perl)
   (?('name')          named reference condition (Perl)
   (?(name)            named reference condition (PCRE2, deprecated)
@@ -643,7 +572,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
 conditions or recursion tests. Such a condition is interpreted as a reference
 condition if the relevant named group exists.
 </P>
-<br><a name="SEC25" href="#TOC1">BACKTRACKING CONTROL</a><br>
+<br><a name="SEC28" href="#TOC1">BACKTRACKING CONTROL</a><br>
 <P>
 All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the
 name is mandatory, for the others it is optional. (*SKIP) changes its behaviour
@@ -670,7 +599,7 @@ pattern is not anchored.
 The effect of one of these verbs in a group called as a subroutine is confined
 to the subroutine call.
 </P>
-<br><a name="SEC26" href="#TOC1">CALLOUTS</a><br>
+<br><a name="SEC29" href="#TOC1">CALLOUTS</a><br>
 <P>
 <pre>
   (?C)            callout (assumed number 0)
@@ -681,12 +610,12 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
 start and the end), and the starting delimiter { matched with the ending
 delimiter }. To encode the ending delimiter within the string, double it.
 </P>
-<br><a name="SEC27" href="#TOC1">SEE ALSO</a><br>
+<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
 <P>
 <b>pcre2pattern</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
 <b>pcre2matching</b>(3), <b>pcre2</b>(3).
 </P>
-<br><a name="SEC28" href="#TOC1">AUTHOR</a><br>
+<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
 <P>
 Philip Hazel
 <br>
@@ -695,11 +624,11 @@ Retired from University Computing Service
 Cambridge, England.
 <br>
 </P>
-<br><a name="SEC29" href="#TOC1">REVISION</a><br>
+<br><a name="SEC32" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 12 October 2023
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 135 - 64
regex.mod/pcre/doc/html/pcre2test.html

@@ -78,7 +78,7 @@ to 8-bit code units for output.
 </P>
 <P>
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, <b>pcre_compile()</b>. The actual
+are given in generic form, for example, <b>pcre2_compile()</b>. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 <a name="inputencoding"></a></P>
 <br><a name="SEC3" href="#TOC1">INPUT ENCODING</a><br>
@@ -90,14 +90,14 @@ end of file, and no further data is read, so this character should be avoided
 unless you really want that action.
 </P>
 <P>
-The input is processed using using C's string functions, so must not
-contain binary zeros, even though in Unix-like environments, <b>fgets()</b>
-treats any bytes other than newline as data characters. An error is generated
-if a binary zero is encountered. By default subject lines are processed for
-backslash escapes, which makes it possible to include any data value in strings
-that are passed to the library for matching. For patterns, there is a facility
-for specifying some or all of the 8-bit input characters as hexadecimal pairs,
-which makes it possible to include binary zeros.
+The input is processed using C's string functions, so must not contain binary
+zeros, even though in Unix-like environments, <b>fgets()</b> treats any bytes
+other than newline as data characters. An error is generated if a binary zero
+is encountered. By default subject lines are processed for backslash escapes,
+which makes it possible to include any data value in strings that are passed to
+the library for matching. For patterns, there is a facility for specifying some
+or all of the 8-bit input characters as hexadecimal pairs, which makes it
+possible to include binary zeros.
 </P>
 <br><b>
 Input for the 16-bit and 32-bit libraries
@@ -138,15 +138,15 @@ error.
 </P>
 <P>
 <b>-16</b>
-If the 16-bit library has been built, this option causes it to be used. If only
-the 16-bit library has been built, this is the default. If the 16-bit library
+If the 16-bit library has been built, this option causes it to be used. If the
+8-bit library has not been built, this is the default. If the 16-bit library
 has not been built, this option causes an error.
 </P>
 <P>
 <b>-32</b>
-If the 32-bit library has been built, this option causes it to be used. If only
-the 32-bit library has been built, this is the default. If the 32-bit library
-has not been built, this option causes an error.
+If the 32-bit library has been built, this option causes it to be used. If no
+other library has been built, this is the default. If the 32-bit library has
+not been built, this option causes an error.
 </P>
 <P>
 <b>-ac</b>
@@ -253,7 +253,19 @@ available, and the use of JIT for matching is verified.
 <b>-LM</b>
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LP</b>
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+</P>
+<P>
+<b>-LS</b>
+List scripts: write a list of recognized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
 </P>
 <P>
 <b>-pattern</b> <i>modifier-list</i>
@@ -495,8 +507,8 @@ followed by a backslash, for example,
 <pre>
   /abc/\
 </pre>
-then a backslash is added to the end of the pattern. This is done to provide a
-way of testing the error condition that arises if a pattern finishes with a
+a backslash is added to the end of the pattern. This is done to provide a way
+of testing the error condition that arises if a pattern finishes with a
 backslash, because
 <pre>
   /abc\/
@@ -600,12 +612,11 @@ Setting compilation options
 <P>
 The following modifiers set options for <b>pcre2_compile()</b>. Most of them set
 bits in the options argument of that function, but those whose names start with
-PCRE2_EXTRA are additional options that are set in the compile context. For the
-main options, there are some single-letter abbreviations that are the same as
-Perl options. There is special handling for /x: if a second x is present,
-PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
-appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
-way <b>pcre2_compile()</b> behaves. See
+PCRE2_EXTRA are additional options that are set in the compile context.
+Some of these options have single-letter abbreviations. There is special
+handling for /x: if a second x is present, PCRE2_EXTENDED is converted into
+PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well,
+though this makes no difference to the way <b>pcre2_compile()</b> behaves. See
 <a href="pcre2api.html"><b>pcre2api</b></a>
 for a description of the effects of these options.
 <pre>
@@ -616,9 +627,16 @@ for a description of the effects of these options.
       alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
       alt_verbnames             set PCRE2_ALT_VERBNAMES
       anchored                  set PCRE2_ANCHORED
+  /a  ascii_all                 set all ASCII options
+      ascii_bsd                 set PCRE2_EXTRA_ASCII_BSD
+      ascii_bss                 set PCRE2_EXTRA_ASCII_BSS
+      ascii_bsw                 set PCRE2_EXTRA_ASCII_BSW
+      ascii_digit               set PCRE2_EXTRA_ASCII_DIGIT
+      ascii_posix               set PCRE2_EXTRA_ASCII_POSIX
       auto_callout              set PCRE2_AUTO_CALLOUT
       bad_escape_is_literal     set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
   /i  caseless                  set PCRE2_CASELESS
+  /r  caseless_restrict         set PCRE2_EXTRA_CASELESS_RESTRICT
       dollar_endonly            set PCRE2_DOLLAR_ENDONLY
   /s  dotall                    set PCRE2_DOTALL
       dupnames                  set PCRE2_DUPNAMES
@@ -678,10 +696,12 @@ heavily used in the test files.
       jitfast                   use JIT fast path
       jitverify                 verify JIT use
       locale=&#60;name&#62;             use this locale
-      max_pattern_length=&#60;n&#62;    set the maximum pattern length
+      max_pattern_length=&#60;n&#62;    set maximum pattern length
+      max_varlookbehind=&#60;n&#62;     set maximum variable lookbehind length
       memory                    show memory used
       newline=&#60;type&#62;            set newline type
       null_context              compile with a NULL context
+      null_pattern              pass pattern as NULL
       parens_nest_limit=&#60;n&#62;     set maximum parentheses depth
       posix                     use the POSIX API
       posix_nosub               use the POSIX API with REG_NOSUB
@@ -761,9 +781,11 @@ ending code units are recorded. The subject length line is omitted when
 when it can never be used.
 </P>
 <P>
-The <b>framesize</b> modifier shows the size, in bytes, of the storage frames
+The <b>framesize</b> modifier shows the size, in bytes, of each storage frame
 used by <b>pcre2_match()</b> for handling backtracking. The size depends on the
-number of capturing parentheses in the pattern.
+number of capturing parentheses in the pattern. A vector of these frames is
+used at matching time; its overall size is shown when the <b>heaframes_size</b>
+subject modifier is set.
 </P>
 <P>
 The <b>callout_info</b> modifier requests information about all the callouts in
@@ -781,6 +803,15 @@ testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses
 default values).
 </P>
 <br><b>
+Passing a NULL pattern
+</b><br>
+<P>
+The <b>null_pattern</b> modifier is for testing the behaviour of
+<b>pcre2_compile()</b> when the pattern argument is NULL. The length value
+passed is the default PCRE2_ZERO_TERMINATED unless <b>use_length</b> is set.
+Any length other than zero causes an error.
+</P>
+<br><b>
 Specifying pattern characters in hexadecimal
 </b><br>
 <P>
@@ -818,6 +849,17 @@ If <b>hex</b> or <b>use_length</b> is used with the POSIX wrapper API (see
 below), the REG_PEND extension is used to pass the pattern's length.
 </P>
 <br><b>
+Specifying a maximum for variable lookbehinds
+</b><br>
+<P>
+Variable lookbehind assertions are supported only if, for each one, there is a
+maximum length (in characters) that it can match. There is a limit on this,
+whose default can be set at build time, with an ultimate default of 255. The
+<b>max_varlookbehind</b> modifier uses the <b>pcre2_set_max_varlookbehind()</b>
+function to change the limit. Lookbehinds whose branches each match a fixed
+length are limited to 65535 characters per branch.
+</P>
+<br><b>
 Specifying wide characters in 16-bit and 32-bit modes
 </b><br>
 <P>
@@ -1069,6 +1111,7 @@ process.
       allusedtext                 show all consulted text
       altglobal                   alternative global matching
   /g  global                      global matching
+      heapframes_size             show match data heapframes size
       jitstack=&#60;n&#62;                set size of JIT stack
       mark                        show mark values
       replace=&#60;string&#62;            specify a replacement string
@@ -1166,18 +1209,19 @@ The following modifiers set options for <b>pcre2_match()</b> or
 <a href="pcreapi.html"><b>pcreapi</b></a>
 for a description of their effects.
 <pre>
-      anchored                  set PCRE2_ANCHORED
-      endanchored               set PCRE2_ENDANCHORED
-      dfa_restart               set PCRE2_DFA_RESTART
-      dfa_shortest              set PCRE2_DFA_SHORTEST
-      no_jit                    set PCRE2_NO_JIT
-      no_utf_check              set PCRE2_NO_UTF_CHECK
-      notbol                    set PCRE2_NOTBOL
-      notempty                  set PCRE2_NOTEMPTY
-      notempty_atstart          set PCRE2_NOTEMPTY_ATSTART
-      noteol                    set PCRE2_NOTEOL
-      partial_hard (or ph)      set PCRE2_PARTIAL_HARD
-      partial_soft (or ps)      set PCRE2_PARTIAL_SOFT
+      anchored                   set PCRE2_ANCHORED
+      endanchored                set PCRE2_ENDANCHORED
+      dfa_restart                set PCRE2_DFA_RESTART
+      dfa_shortest               set PCRE2_DFA_SHORTEST
+      disable_recurseloop_check  set PCRE2_DISABLE_RECURSELOOP_CHECK
+      no_jit                     set PCRE2_NO_JIT
+      no_utf_check               set PCRE2_NO_UTF_CHECK
+      notbol                     set PCRE2_NOTBOL
+      notempty                   set PCRE2_NOTEMPTY
+      notempty_atstart           set PCRE2_NOTEMPTY_ATSTART
+      noteol                     set PCRE2_NOTEOL
+      partial_hard (or ph)       set PCRE2_PARTIAL_HARD
+      partial_soft (or ps)       set PCRE2_PARTIAL_SOFT
 </pre>
 The partial matching modifiers are provided with abbreviations because they
 appear frequently in tests.
@@ -1229,16 +1273,20 @@ pattern, but can be overridden by modifiers on the subject.
       copy=&#60;number or name&#62;      copy captured substring
       depth_limit=&#60;n&#62;            set a depth limit
       dfa                        use <b>pcre2_dfa_match()</b>
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
       get=&#60;number or name&#62;       extract captured substring
       getall                     extract all captured substrings
   /g  global                     global matching
+      heapframes_size            show match data heapframes size
       heap_limit=&#60;n&#62;             set a limit on heap memory (Kbytes)
       jitstack=&#60;n&#62;               set size of JIT stack
       mark                       show mark values
       match_limit=&#60;n&#62;            set a match limit
       memory                     show heap memory usage
       null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
       offset=&#60;n&#62;                 set starting offset
       offset_limit=&#60;n&#62;           set offset limit
       ovector=&#60;n&#62;                set size of output vector
@@ -1353,7 +1401,7 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
 controlled by various modifiers listed above whose names begin with
 <b>callout_</b>. Details are given in the section entitled "Callouts"
 <a href="#callouts">below.</a>
-Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
+Testing callouts from <b>pcre2_substitute()</b> is described separately in
 "Testing the substitution function"
 <a href="#substitution">below.</a>
 </P>
@@ -1496,7 +1544,7 @@ Testing substitute callouts
 If the <b>substitute_callout</b> modifier is set, a substitution callout
 function is set up. The <b>null_context</b> modifier must not be set, because
 the address of the callout function is passed in a match context. When the
-callout function is called (after each substitution), details of the the input
+callout function is called (after each substitution), details of the input
 and output strings are output. For example:
 <pre>
   /abc/g,replace=&#60;$0&#62;,substitute_callout
@@ -1550,7 +1598,7 @@ Setting heap, match, and depth limits
 <P>
 The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
 the appropriate limits in the match context. These values are ignored when the
-<b>find_limits</b> modifier is specified.
+<b>find_limits</b> or <b>find_limits_noheap</b> modifier is specified.
 </P>
 <br><b>
 Finding minimum limits
@@ -1560,8 +1608,12 @@ If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b
 calls the relevant matching function several times, setting different values in
 the match context via <b>pcre2_set_heap_limit()</b>,
 <b>pcre2_set_match_limit()</b>, or <b>pcre2_set_depth_limit()</b> until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, <b>find_limits_noheap</b>, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 </P>
 <P>
 When using this modifier, the pattern should not contain any limit settings
@@ -1589,9 +1641,7 @@ overall amount of computing resource that is used.
 </P>
 <P>
 For both kinds of matching, the <i>heap_limit</i> number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 </P>
 <br><b>
 Showing MARK names
@@ -1609,16 +1659,32 @@ Showing memory usage
 <P>
 The <b>memory</b> modifier causes <b>pcre2test</b> to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(<b>pcre2_match()</b>) or for internal workspace (<b>pcre2_dfa_match()</b>). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the <b>memory</b> modifier never has any effect. For this modifier to work, the
+<b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 <b>null_context</b> modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 </P>
 <br><b>
+Showing the heap frame overall vector size
+</b><br>
+<P>
+The <b>heapframes_size</b> modifier is relevant for matches using
+<b>pcre2_match()</b> without JIT. After a match has run (whether successful or
+not) the size, in bytes, of the allocated heap frames vector that is left
+attached to the match data block is shown. If the matching action involved
+several calls to <b>pcre2_match()</b> (for example, global matching or for
+timing) only the final value is shown.
+</P>
+<P>
+This modifier is ignored, with a warning, for POSIX or DFA matching. JIT
+matching does not use the heap frames vector, so the size is always zero,
+unless there was a previous non-JIT match. Note that specifing a size of zero
+for the output vector (see below) causes <b>pcre2test</b> to free its match data
+block (and associated heap frames vector) and allocate a new one.
+</P>
+<br><b>
 Setting a starting offset
 </b><br>
 <P>
@@ -1649,9 +1715,9 @@ A value of zero is useful when testing the POSIX API because it causes
 <b>regexec()</b> to be called with a NULL capture vector. When not testing the
 POSIX API, a value of zero is used to cause
 <b>pcre2_match_data_create_from_pattern()</b> to be called, in order to create a
-match block of exactly the right size for the pattern. (It is not possible to
-create a match block with a zero-length ovector; there is always at least one
-pair of offsets.)
+new match block of exactly the right size for the pattern. (It is not possible
+to create a match block with a zero-length ovector; there is always at least
+one pair of offsets.) The old match data block is freed.
 </P>
 <br><b>
 Passing the subject as zero-terminated
@@ -1668,7 +1734,7 @@ When testing <b>pcre2_substitute()</b>, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 </P>
 <br><b>
-Passing a NULL context
+Passing a NULL context, subject, or replacement
 </b><br>
 <P>
 Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
@@ -1676,7 +1742,13 @@ Normally, <b>pcre2test</b> passes a context block to <b>pcre2_match()</b>,
 If the <b>null_context</b> modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-<b>find_limits</b> or <b>substitute_callout</b> modifiers.
+<b>find_limits</b>, <b>find_limits_noheap</b>, or <b>substitute_callout</b>
+modifiers.
+</P>
+<P>
+Similarly, for testing purposes, if the <b>null_subject</b> or
+<b>null_replacement</b> modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 </P>
 <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
 <P>
@@ -1743,9 +1815,8 @@ unset substring is shown as "&#60;unset&#62;", as for the second data line.
 If the strings contain any non-printing characters, they are output as \xhh
 escapes if the value is less than 256 and UTF mode is not set. Otherwise they
 are output as \x{hh...} escapes. See below for the definition of non-printing
-characters. If the <b>aftertext</b> modifier is set, the output for substring
-0 is followed by the the rest of the subject string, identified by "0+" like
-this:
+characters. If the <b>aftertext</b> modifier is set, the output for substring 0
+is followed by the rest of the subject string, identified by "0+" like this:
 <pre>
     re&#62; /cat/aftertext
   data&#62; cataract
@@ -2101,7 +2172,7 @@ If <b>jitverify</b> is used with #pop, it does not automatically imply
 <b>jit</b>, which is different behaviour from when it is used on a pattern.
 </P>
 <P>
-The #popcopy command is analagous to the <b>pushcopy</b> modifier in that it
+The #popcopy command is analogous to the <b>pushcopy</b> modifier in that it
 makes current a copy of the topmost stack pattern, leaving the original still
 on the stack.
 </P>
@@ -2122,9 +2193,9 @@ Cambridge, England.
 </P>
 <br><a name="SEC21" href="#TOC1">REVISION</a><br>
 <P>
-Last updated: 30 August 2021
+Last updated: 27 January 2024
 <br>
-Copyright &copy; 1997-2021 University of Cambridge.
+Copyright &copy; 1997-2024 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 45 - 18
regex.mod/pcre/doc/html/pcre2unicode.html

@@ -50,17 +50,21 @@ UNICODE PROPERTY SUPPORT
 <P>
 When PCRE2 is built with Unicode support, the escape sequences \p{..},
 \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the derived properties
+Any and LC (synonym L&), the Unicode script names such as Arabic or Han,
+Bidi_Class, Bidi_Control, and a few binary properties.
+</P>
+<P>
+The full lists are given in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 and
 <a href="pcre2syntax.html"><b>pcre2syntax</b></a>
-documentation. Only the short names for properties are supported. For example,
-\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 </P>
 <br><b>
 WIDE CHARACTERS AND UTF MODES
@@ -117,21 +121,22 @@ and \B, because they are defined in terms of \w and \W. If you want
 to test for a wider sense of, say, "digit", you can use explicit Unicode
 property tests such as \p{Nd}. Alternatively, if you set the PCRE2_UCP option,
 the way that the character escapes work is changed so that Unicode properties
-are used to determine which characters match. There are more details in the
-section on
+are used to determine which characters match, though there are some options
+that suppress this for individual escapes. For details see the section on
 <a href="pcre2pattern.html#genericchartypes">generic character types</a>
 in the
 <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
 documentation.
 </P>
 <P>
-Similarly, characters that match the POSIX named character classes are all
-low-valued characters, unless the PCRE2_UCP option is set.
+Like the escapes, characters that match the POSIX named character classes are
+all low-valued characters unless the PCRE2_UCP option is set, but there is an
+option to override this.
 </P>
 <P>
-However, the special horizontal and vertical white space matching escapes (\h,
-\H, \v, and \V) do match all the appropriate Unicode characters, whether or
-not PCRE2_UCP is set.
+In contrast to the character escapes and character classes, the special
+horizontal and vertical white space escapes (\h, \H, \v, and \V) do match
+all the appropriate Unicode characters, whether or not PCRE2_UCP is set.
 </P>
 <br><b>
 UNICODE CASE-EQUIVALENCE
@@ -144,6 +149,14 @@ lookup is used for speed. A few Unicode characters such as Greek sigma have
 more than two code points that are case-equivalent, and these are treated
 specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
 processing for non-UTF character encodings such as UCS-2.
+</P>
+<P>
+There are two ASCII characters (S and K) that, in addition to their ASCII lower
+case equivalents, have a non-ASCII one as well (long S and Kelvin sign).
+Recognition of these non-ASCII characters as case-equivalent to their ASCII
+counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT
+option. When this is set, all characters in a case equivalence must either be
+ASCII or non-ASCII; there can be no mixing.
 <a name="scriptruns"></a></P>
 <br><b>
 SCRIPT RUNS
@@ -431,6 +444,14 @@ PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
 valid UTF string.
 </P>
 <P>
+If you do not set PCRE2_MATCH_INVALID_UTF when calling <b>pcre2_compile</b>, and
+you are not certain that your subject strings are valid UTF sequences, you
+should not make use of the JIT "fast path" function <b>pcre2_jit_match()</b>
+because it bypasses sanity checks, including the one for UTF validity. An
+invalid string may cause undefined behaviour, including looping, crashing, or
+giving the wrong answer.
+</P>
+<P>
 Setting PCRE2_MATCH_INVALID_UTF does not affect what <b>pcre2_compile()</b>
 generates, but if <b>pcre2_jit_compile()</b> is subsequently called, it does
 generate different code. If JIT is not used, the option affects the behaviour
@@ -471,13 +492,19 @@ Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary
 data, knowing that any matched strings that are returned are valid UTF. This
 can be useful when searching for UTF text in executable or other binary files.
 </P>
+<P>
+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as
+sequences of uint16_t or uint32_t code points. They cannot find valid UTF
+sequences within an arbitrary string of bytes unless such sequences are
+suitably aligned.
+</P>
 <br><b>
 AUTHOR
 </b><br>
 <P>
 Philip Hazel
 <br>
-University Computing Service
+Retired from University Computing Service
 <br>
 Cambridge, England.
 <br>
@@ -486,9 +513,9 @@ Cambridge, England.
 REVISION
 </b><br>
 <P>
-Last updated: 23 February 2020
+Last updated: 12 October 2023
 <br>
-Copyright &copy; 1997-2020 University of Cambridge.
+Copyright &copy; 1997-2023 University of Cambridge.
 <br>
 <p>
 Return to the <a href="index.html">PCRE2 index page</a>.

+ 3 - 0
regex.mod/pcre/doc/index.html.src

@@ -255,6 +255,9 @@ in the library.
 <tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
     <td>&nbsp;&nbsp;Set the maximum length of pattern</td></tr>
 
+<tr><td><a href="pcre2_set_max_varlookbehind.html">pcre2_set_max_varlookbehind</a></td>
+    <td>&nbsp;&nbsp;Set the maximum match length for a variable-length lookbehind</td></tr>
+
 <tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
     <td>&nbsp;&nbsp;Set the newline convention</td></tr>
 

+ 13 - 9
regex.mod/pcre/doc/pcre2-config.txt

@@ -1,10 +1,11 @@
-PCRE2-CONFIG(1)             General Commands Manual            PCRE2-CONFIG(1)
 
+PCRE2-CONFIG(1)             General Commands Manual            PCRE2-CONFIG(1)
 
 
 NAME
        pcre2-config - program to return PCRE2 configuration
 
+
 SYNOPSIS
 
        pcre2-config [--prefix] [--exec-prefix] [--version]
@@ -15,9 +16,9 @@ SYNOPSIS
 DESCRIPTION
 
        pcre2-config returns the configuration of the installed PCRE2 libraries
-       and the options required to compile a program to use them. Some of  the
-       options  apply  only  to the 8-bit, or 16-bit, or 32-bit libraries, re-
-       spectively, and are not available for  libraries  that  have  not  been
+       and  the options required to compile a program to use them. Some of the
+       options apply only to the 8-bit, or 16-bit, or  32-bit  libraries,  re-
+       spectively,  and  are  not  available  for libraries that have not been
        built. If an unavailable option is encountered, the "usage" information
        is output.
 
@@ -58,9 +59,9 @@ OPTIONS
                  -I options, but is blank on many systems).
 
        --cflags-posix
-                 Writes  to  the  standard output the command line options re-
-                 quired to compile files that use PCRE2's  POSIX  API  wrapper
-                 library  (this  may  include some -I options, but is blank on
+                 Writes to the standard output the command  line  options  re-
+                 quired  to  compile  files that use PCRE2's POSIX API wrapper
+                 library (this may include some -I options, but  is  blank  on
                  many systems).
 
 
@@ -71,11 +72,14 @@ SEE ALSO
 
 AUTHOR
 
-       This manual page was originally written by Mark Baker  for  the  Debian
-       GNU/Linux  system.  It has been subsequently revised as a generic PCRE2
+       This  manual  page  was originally written by Mark Baker for the Debian
+       GNU/Linux system. It has been subsequently revised as a  generic  PCRE2
        man page.
 
 
 REVISION
 
        Last updated: 28 September 2014
+
+
+PCRE2 10.00                    28 September 2014               PCRE2-CONFIG(1)

File diff suppressed because it is too large
+ 294 - 267
regex.mod/pcre/doc/pcre2.txt


+ 21 - 10
regex.mod/pcre/doc/pcre2_compile.3

@@ -1,4 +1,4 @@
-.TH PCRE2_COMPILE 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2_COMPILE 3 "19 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -20,24 +20,26 @@ arguments are:
 .sp
   \fIpattern\fP       A string containing expression to be compiled
   \fIlength\fP        The length of the string or PCRE2_ZERO_TERMINATED
-  \fIoptions\fP       Option bits
+  \fIoptions\fP       Primary option bits
   \fIerrorcode\fP     Where to put an error code
   \fIerroffset\fP     Where to put an error offset
   \fIccontext\fP      Pointer to a compile context or NULL
 .sp
 The length of the pattern and any error offset that is returned are in code
-units, not characters. A compile context is needed only if you want to provide
-custom memory allocation functions, or to provide an external function for
-system stack size checking, or to change one or more of these parameters:
+units, not characters. A NULL pattern with zero length is treated as an empty
+string. A compile context is needed only if you want to provide custom memory
+allocation functions, or to provide an external function for system stack size
+checking (see \fBpcre2_set_compile_recursion_guard()\fP), or to change one or
+more of these parameters:
 .sp
   What \eR matches (Unicode newlines, or CR, LF, CRLF only);
   PCRE2's character tables;
   The newline character sequence;
   The compile time nested parentheses limit;
-  The maximum pattern length (in code units) that is allowed.
-  The additional options bits (see pcre2_set_compile_extra_options())
+  The maximum pattern length (in code units) that is allowed;
+  The additional options bits.
 .sp
-The option bits are:
+The primary option bits are:
 .sp
   PCRE2_ANCHORED           Force pattern anchoring
   PCRE2_ALLOW_EMPTY_CLASS  Allow empty classes
@@ -80,8 +82,17 @@ Additional options may be set in the compile context via the
 .\"
 function.
 .P
-The yield of this function is a pointer to a private data structure that
-contains the compiled pattern, or NULL if an error was detected.
+If either of \fIerrorcode\fP or \fIerroroffset\fP is NULL, the function returns
+NULL immediately. Otherwise, the yield of this function is a pointer to a
+private data structure that contains the compiled pattern, or NULL if an error
+was detected. In the error case, a text error message can be obtained by
+passing the value returned via the \fIerrorcode\fP argument to the
+\fBpcre2_get_error_message()\fP function. The offset (in code units) where the
+error was encountered is returned via the \fIerroroffset\fP argument.
+.P
+If there is no error, the value passed via \fIerrorcode\fP returns the message
+"no error" if passed to \fBpcre2_get_error_message()\fP, and the value passed
+via \fIerroroffset\fP is zero.
 .P
 There is a complete description of the PCRE2 native API, with more detail on
 each option, in the

+ 1 - 1
regex.mod/pcre/doc/pcre2_general_context_create.3

@@ -8,7 +8,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .PP
 .nf
 .B pcre2_general_context *pcre2_general_context_create(
-.B "  void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *),"
+.B "  void *(*\fIprivate_malloc\fP)(size_t, void *),"
 .B "  void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
 .fi
 .

+ 28 - 0
regex.mod/pcre/doc/pcre2_get_match_data_heapframes_size.3

@@ -0,0 +1,28 @@
+.TH PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE 3 "13 January 2023" "PCRE2 10.43"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B PCRE2_SIZE pcre2_get_match_data_heapframes_size(
+.B "  pcre2_match_data *\fImatch_data\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This function returns the size, in bytes, of the heapframes data block that is
+owned by its argument.
+.P
+There is a complete description of the PCRE2 native API in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcre2posix\fP
+.\"
+page.

+ 11 - 3
regex.mod/pcre/doc/pcre2_jit_match.3

@@ -1,4 +1,4 @@
-.TH PCRE2_JIT_MATCH 3 "11 February 2020" "PCRE2 10.35"
+.TH PCRE2_JIT_MATCH 3 "20 January 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -20,7 +20,15 @@ This function matches a compiled regular expression that has been successfully
 processed by the JIT compiler against a given subject string, using a matching
 algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
 it bypasses some of the sanity checks that \fBpcre2_match()\fP applies.
-Its arguments are exactly the same as for
+.P
+In UTF mode, the subject string is not checked for UTF validity. Unless
+PCRE2_MATCH_INVALID_UTF was set when the pattern was compiled, passing an
+invalid UTF string results in undefined behaviour. Your program may crash or
+loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you
+should only call \fBpcre2_jit_match()\fP in UTF mode if you are sure the
+subject is valid.
+.P
+The arguments for \fBpcre2_jit_match()\fP are exactly the same as for
 .\" HREF
 \fBpcre2_match()\fP,
 .\"
@@ -29,7 +37,7 @@ PCRE2_ZERO_TERMINATED is not supported.
 .P
 The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
 PCRE2_NOTEMPTY_ATSTART, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Unsupported
-options are ignored. The subject string is not checked for UTF validity.
+options are ignored.
 .P
 The return values are the same as for \fBpcre2_match()\fP plus
 PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested

+ 4 - 3
regex.mod/pcre/doc/pcre2_jit_stack_create.3

@@ -7,8 +7,8 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .B #include <pcre2.h>
 .PP
 .nf
-.B pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE \fIstartsize\fP,
-.B "  PCRE2_SIZE \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
+.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP,
+.B "  size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
 .fi
 .
 .SH DESCRIPTION
@@ -22,7 +22,8 @@ allocation. The result can be passed to the JIT run-time code by calling
 \fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern,
 which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP.
 A maximum stack size of 512KiB to 1MiB should be more than enough for any
-pattern. For more details, see the
+pattern. If the stack couldn't be allocated or the values passed were not
+reasonable, NULL will be returned. For more details, see the
 .\" HREF
 \fBpcre2jit\fP
 .\"

+ 3 - 1
regex.mod/pcre/doc/pcre2_match.3

@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_MATCH 3 "27 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -50,6 +50,8 @@ terminated by a binary zero code unit. The options are:
   PCRE2_ANCHORED          Match only at the first position
   PCRE2_COPY_MATCHED_SUBJECT
                           On success, make a private subject copy
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+                          Only useful in rare cases; use with care
   PCRE2_ENDANCHORED       Pattern can match only at end of subject
   PCRE2_NOTBOL            Subject string is not the beginning of a line
   PCRE2_NOTEOL            Subject string is not the end of a line

+ 1 - 1
regex.mod/pcre/doc/pcre2_match_data_create_from_pattern.3

@@ -21,7 +21,7 @@ offsets that are required in the match data block. These form the "output
 vector" (ovector) within the match data block, and are used to identify the
 matched string and any captured substrings when matching with
 \fBpcre2_match()\fP. If you are using \fBpcre2_dfa_match()\fP, which uses the
-outut vector in a different way, you should use \fBpcre2_match_data_create()\fP
+output vector in a different way, you should use \fBpcre2_match_data_create()\fP
 instead of this function.
 .P
 The second argument points to a general context, for custom memory management,

+ 6 - 4
regex.mod/pcre/doc/pcre2_match_data_free.3

@@ -1,4 +1,4 @@
-.TH PCRE2_MATCH_DATA_FREE 3 "16 October 2018" "PCRE2 10.33"
+.TH PCRE2_MATCH_DATA_FREE 3 "18 January 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -16,11 +16,13 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 If \fImatch_data\fP is NULL, this function does nothing. Otherwise,
 \fImatch_data\fP must point to a match data block, which this function frees,
 using the memory freeing function from the general context or compiled pattern
-with which it was created, or \fBfree()\fP if that was not set.
+with which it was created, or \fBfree()\fP if that was not set. If the match
+data block was previously passed to \fBpcre2_match()\fP, it will have an
+attached heapframe vector; this is also freed.
 .P
 If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
-match data block, the copy of the subject that was remembered with the block is
-also freed.
+match data block, the copy of the subject that was referenced within the block
+is also freed.
 .P
 There is a complete description of the PCRE2 native API in the
 .\" HREF

+ 1 - 1
regex.mod/pcre/doc/pcre2_serialize_decode.3

@@ -36,7 +36,7 @@ the following negative error codes:
   PCRE2_ERROR_BADDATA   \fInumber_of_codes\fP is zero or less
   PCRE2_ERROR_BADMAGIC  mismatch of id bytes in \fIbytes\fP
   PCRE2_ERROR_BADMODE   mismatch of variable unit size or PCRE version
-  PCRE2_ERROR_MEMORY    memory allocation failed
+  PCRE2_ERROR_NOMEMORY  memory allocation failed
   PCRE2_ERROR_NULL      \fIcodes\fP or \fIbytes\fP is NULL
 .sp
 PCRE2_ERROR_BADMAGIC may mean that the data is corrupt, or that it was compiled

+ 15 - 3
regex.mod/pcre/doc/pcre2_set_compile_extra_options.3

@@ -1,4 +1,4 @@
-.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "31 August 2021" "PCRE2 10.38"
+.TH PCRE2_SET_COMPILE_EXTRA_OPTIONS 3 "03 February 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -18,16 +18,28 @@ This function sets additional option bits for \fBpcre2_compile()\fP that are
 housed in a compile context. It completely replaces all the bits. The extra
 options are:
 .sp
-.\" JOIN
   PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \eK in lookarounds
-  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{df800} to \ex{dfff}
+.\" JOIN
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \ex{d800} to \ex{dfff}
                                          in UTF-8 and UTF-32 modes
 .\" JOIN
   PCRE2_EXTRA_ALT_BSUX                 Extended alternate \eu, \eU, and
                                          \ex handling
+  PCRE2_EXTRA_ASCII_BSD                \ed remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSS                \es remains ASCII in UCP mode
+  PCRE2_EXTRA_ASCII_BSW                \ew remains ASCII in UCP mode
+.\" JOIN
+  PCRE2_EXTRA_ASCII_DIGIT              [:digit:] and [:xdigit:] POSIX classes
+                                         remain ASCII in UCP mode
+.\" JOIN
+  PCRE2_EXTRA_ASCII_POSIX              POSIX classes remain ASCII in
+                                         UCP mode
 .\" JOIN
   PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as
                                          a literal following character
+.\" JOIN
+  PCRE2_EXTRA_CASELESS_RESTRICT        Disable mixed ASCII/non-ASCII
+                                         case folding
   PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \er as \en
   PCRE2_EXTRA_MATCH_LINE               Pattern matches whole lines
   PCRE2_EXTRA_MATCH_WORD               Pattern matches "words"

+ 30 - 0
regex.mod/pcre/doc/pcre2_set_max_varlookbehind.3

@@ -0,0 +1,30 @@
+.TH PCRE2_SET_NEWLINE 3 "09 August 2023" "PCRE2 10.43"
+.SH NAME
+PCRE2 - Perl-compatible regular expressions (revised API)
+.SH SYNOPSIS
+.rs
+.sp
+.B #include <pcre2.h>
+.PP
+.nf
+.B int pcre2_set_max_varlookbehind(pcre2_compile_context *\fIccontext\fP,
+.B "  uint32_t \fIvalue\fP);"
+.fi
+.
+.SH DESCRIPTION
+.rs
+.sp
+This sets a maximum length for the number of characters matched by a
+variable-length lookbehind assertion. The default is set when PCRE2 is built,
+with the ultimate default being 255, the same as Perl. Lookbehind assertions
+without a bounding length are not supported. The result is always zero.
+.P
+There is a complete description of the PCRE2 native API in the
+.\" HREF
+\fBpcre2api\fP
+.\"
+page and a description of the POSIX API in the
+.\" HREF
+\fBpcre2posix\fP
+.\"
+page.

+ 1 - 1
regex.mod/pcre/doc/pcre2_set_recursion_memory_management.3

@@ -9,7 +9,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .nf
 .B int pcre2_set_recursion_memory_management(
 .B "  pcre2_match_context *\fImcontext\fP,"
-.B "  void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *),"
+.B "  void *(*\fIprivate_malloc\fP)(size_t, void *),"
 .B "  void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
 .fi
 .

+ 29 - 19
regex.mod/pcre/doc/pcre2_substitute.3

@@ -55,32 +55,42 @@ automatically added.
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 .sp
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
 .\" JOIN
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the
-                              subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
+  PCRE2_NOTBOL                       Subject is not the beginning of a
+                                      line
+  PCRE2_NOTEOL                       Subject is not the end of a line
 .\" JOIN
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement
-                              for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_LITERAL   The replacement string is literal
-  PCRE2_SUBSTITUTE_MATCHED   Use pre-existing match data for 1st match
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
+  PCRE2_NOTEMPTY                     An empty string is not a
+                                      valid match
+.\" JOIN
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of
+                                      the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+.\" JOIN
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in
+                                      the subject or replacement
+.\" JOIN
+                                      (only relevant if PCRE2_UTF was
+                                      set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+.\" JOIN
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the
+                                      subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+.\" JOIN
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for
+                                      first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
   PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 .sp
 If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED,
 PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.
 .P
-If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-zero; its
+If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its
 contents must be the result of a call to \fBpcre2_match()\fP using the same
 pattern and subject.
 .P

+ 2 - 2
regex.mod/pcre/doc/pcre2_substring_list_free.3

@@ -1,4 +1,4 @@
-.TH PCRE2_SUBSTRING_LIST_FREE 3 "28 June 2018" "PCRE2 10.32"
+.TH PCRE2_SUBSTRING_LIST_FREE 3 "02 December 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .B #include <pcre2.h>
 .PP
 .SM
-.B void pcre2_substring_list_free(PCRE2_SPTR *\fIlist\fP);
+.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP);
 .
 .SH DESCRIPTION
 .rs

+ 267 - 116
regex.mod/pcre/doc/pcre2api.3

@@ -1,4 +1,4 @@
-.TH PCRE2API 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2API 3 "27 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
@@ -49,6 +49,11 @@ document for an overview of all the PCRE2 documentation.
 .nf
 .B PCRE2_SPTR pcre2_get_mark(pcre2_match_data *\fImatch_data\fP);
 .sp
+.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP);
+.sp
+.B PCRE2_SIZE pcre2_get_match_data_heapframes_size(
+.B "  pcre2_match_data *\fImatch_data\fP);"
+.sp
 .B uint32_t pcre2_get_ovector_count(pcre2_match_data *\fImatch_data\fP);
 .sp
 .B PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *\fImatch_data\fP);
@@ -96,6 +101,9 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
 .B "  PCRE2_SIZE \fIvalue\fP);"
 .sp
+.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP,
+.B "  uint32_t \fIvalue\fP);
+.sp
 .B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
 .B "  uint32_t \fIvalue\fP);"
 .sp
@@ -173,7 +181,7 @@ document for an overview of all the PCRE2 documentation.
 .B int pcre2_substring_number_from_name(const pcre2_code *\fIcode\fP,
 .B "  PCRE2_SPTR \fIname\fP);"
 .sp
-.B void pcre2_substring_list_free(PCRE2_SPTR *\fIlist\fP);
+.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP);
 .sp
 .B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP,
 .B "  PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP);
@@ -206,8 +214,8 @@ document for an overview of all the PCRE2 documentation.
 .sp
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .sp
-.B pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE \fIstartsize\fP,
-.B "  PCRE2_SIZE \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
+.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP,
+.B "  size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
 .sp
 .B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP,
 .B "  pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);"
@@ -270,7 +278,7 @@ document for an overview of all the PCRE2 documentation.
 .sp
 .B int pcre2_set_recursion_memory_management(
 .B "  pcre2_match_context *\fImcontext\fP,"
-.B "  void *(*\fIprivate_malloc\fP)(PCRE2_SIZE, void *),"
+.B "  void *(*\fIprivate_malloc\fP)(size_t, void *),"
 .B "  void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
 .fi
 .sp
@@ -324,10 +332,8 @@ This contains the function prototypes and other definitions for all three
 libraries. One, two, or all three can be installed simultaneously. On Unix-like
 systems the libraries are called \fBlibpcre2-8\fP, \fBlibpcre2-16\fP, and
 \fBlibpcre2-32\fP, and they can also co-exist with the original PCRE libraries.
-.P
-Character strings are passed to and from a PCRE2 library as a sequence of
-unsigned integers in code units of the appropriate width. Every PCRE2 function
-comes in three different forms, one for each library, for example:
+Every PCRE2 function comes in three different forms, one for each library, for
+example:
 .sp
   \fBpcre2_compile_8()\fP
   \fBpcre2_compile_16()\fP
@@ -338,10 +344,15 @@ There are also three different sets of data types:
   \fBPCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32\fP
   \fBPCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32\fP
 .sp
-The UCHAR types define unsigned code units of the appropriate widths. For
-example, PCRE2_UCHAR16 is usually defined as `uint16_t'. The SPTR types are
-constant pointers to the equivalent UCHAR types, that is, they are pointers to
-vectors of unsigned code units.
+The UCHAR types define unsigned code units of the appropriate widths.
+For example, PCRE2_UCHAR16 is usually defined as `uint16_t'.
+The SPTR types are pointers to constants of the equivalent UCHAR types,
+that is, they are pointers to vectors of unsigned code units.
+.P
+Character strings are passed to a PCRE2 library as sequences of unsigned
+integers in code units of the appropriate width. The length of a string may
+be given as a number of code units, or the string may be specified as
+zero-terminated.
 .P
 Many applications use only one code unit width. For their convenience, macros
 are defined whose names are the generic forms such as \fBpcre2_compile()\fP and
@@ -378,7 +389,7 @@ names, without the _8, _16, or _32 suffix.
 PCRE2 has its own native API, which is described in this document. There are
 also some wrapper functions for the 8-bit library that correspond to the
 POSIX regular expression API, but they do not give access to all the
-functionality of PCRE2. They are described in the
+functionality of PCRE2 and they are not thread-safe. They are described in the
 .\" HREF
 \fBpcre2posix\fP
 .\"
@@ -490,7 +501,8 @@ unsigned integer type, currently always defined as \fIsize_t\fP. The largest
 value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved
 as a special indicator for zero-terminated strings and unset offsets.
 Therefore, the longest string that can be handled is one less than this
-maximum.
+maximum. Note that string lengths are always given in code units. Only in the
+8-bit library is such a length the same as the number of bytes in the string.
 .
 .
 .\" HTML <a name="newlines"></a>
@@ -793,6 +805,16 @@ external sources can limit their size. The default is the largest number that a
 PCRE2_SIZE variable can hold, which is effectively unlimited.
 .sp
 .nf
+.B int pcre2_set_max_varlookbehind(pcre2_compile_contest *\fIccontext\fP,
+.B "  uint32_t \fIvalue\fP);
+.fi
+.sp
+This sets a maximum length for the number of characters matched by a
+variable-length lookbehind assertion. The default is set when PCRE2 is built,
+with the ultimate default being 255, the same as Perl. Lookbehind assertions
+without a bounding length are not supported.
+.sp
+.nf
 .B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
 .B "  uint32_t \fIvalue\fP);"
 .fi
@@ -953,7 +975,7 @@ has its own memory control arrangements (see the
 documentation for more details). If the limit is reached, the negative error
 code PCRE2_ERROR_HEAPLIMIT is returned. The default limit can be set when PCRE2
 is built; if it is not, the default is set very large and is essentially
-"unlimited".
+unlimited.
 .P
 A value for the heap limit may also be supplied by an item at the start of a
 pattern of the form
@@ -964,18 +986,18 @@ where ddd is a decimal number. However, such a setting is ignored unless ddd is
 less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
 limit is set, less than the default.
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack for recording backtracking points. The more nested backtracking points
-there are (that is, the deeper the search tree), the more memory is needed.
-Heap memory is used only if the initial vector is too small. If the heap limit
-is set to a value less than 21 (in particular, zero) no heap memory will be
-used. In this case, only patterns that do not have a lot of nested backtracking
-can be successfully processed.
+The \fBpcre2_match()\fP function always needs some heap memory, so setting a
+value of zero guarantees a "heap limit exceeded" error. Details of how
+\fBpcre2_match()\fP uses the heap are given in the
+.\" HREF
+\fBpcre2perform\fP
+.\"
+documentation.
 .P
-Similarly, for \fBpcre2_dfa_match()\fP, a vector on the system stack is used
-when processing pattern recursions, lookarounds, or atomic groups, and only if
-this is not big enough is heap memory used. In this case, too, setting a value
-of zero disables the use of the heap.
+For \fBpcre2_dfa_match()\fP, a vector on the system stack is used when
+processing pattern recursions, lookarounds, or atomic groups, and only if this
+is not big enough is heap memory used. In this case, setting a value of zero
+disables the use of the heap.
 .sp
 .nf
 .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
@@ -1002,10 +1024,9 @@ matching that goes on for a very long time, and so the \fImatch_limit\fP value
 is also used in this case (but in a different way) to limit how long the
 matching can continue.
 .P
-The default value for the limit can be set when PCRE2 is built; the default
-default is 10 million, which handles all but the most extreme cases. A value
-for the match limit may also be supplied by an item at the start of a pattern
-of the form
+The default value for the limit can be set when PCRE2 is built; the default is
+10 million, which handles all but the most extreme cases. A value for the match
+limit may also be supplied by an item at the start of a pattern of the form
 .sp
   (*LIMIT_MATCH=ddd)
 .sp
@@ -1019,10 +1040,10 @@ less than the limit set by the caller of \fBpcre2_match()\fP or
 .fi
 .sp
 This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
-Each time a nested backtracking point is passed, a new memory "frame" is used
+Each time a nested backtracking point is passed, a new memory frame is used
 to remember the state of matching at that point. Thus, this parameter
 indirectly limits the amount of memory that is used in a match. However,
-because the size of each memory "frame" depends on the number of capturing
+because the size of each memory frame depends on the number of capturing
 parentheses, the actual memory limit varies from pattern to pattern. This limit
 was more useful in versions before 10.30, where function recursion was used for
 backtracking.
@@ -1115,7 +1136,13 @@ for the amount of heap memory used by \fBpcre2_match()\fP or
   PCRE2_CONFIG_JIT
 .sp
 The output is a uint32_t integer that is set to one if support for just-in-time
-compiling is available; otherwise it is set to zero.
+compiling is included in the library; otherwise it is set to zero. Note that
+having the support in the library does not guarantee that JIT will be used for
+any given match. See the
+.\" HREF
+\fBpcre2jit\fP
+.\"
+documentation for more details.
 .sp
   PCRE2_CONFIG_JITTARGET
 .sp
@@ -1235,10 +1262,12 @@ zero.
 .fi
 .P
 The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
-The pattern is defined by a pointer to a string of code units and a length (in
-code units). If the pattern is zero-terminated, the length can be specified as
-PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
-contains the compiled pattern and related data, or NULL if an error occurred.
+The pattern is defined by a pointer to a string of code units and a length in
+code units. If the pattern is zero-terminated, the length can be specified as
+PCRE2_ZERO_TERMINATED. A NULL pattern pointer with a length of zero is treated
+as an empty string (NULL with a non-zero length causes an error return). The
+function returns a pointer to a block of memory that contains the compiled
+pattern and related data, or NULL if an error occurred.
 .P
 If the compile context argument \fIccontext\fP is NULL, memory for the compiled
 pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from
@@ -1323,8 +1352,7 @@ If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns
 NULL immediately. Otherwise, the variables to which these point are set to an
 error code and an offset (number of code units) within the pattern,
 respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
-error has occurred. The values are not defined when compilation is successful
-and \fBpcre2_compile()\fP returns a non-NULL value.
+error has occurred.
 .P
 There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return
 if it finds an error in the pattern. There are also some negative error codes
@@ -1343,14 +1371,17 @@ message"
 below)
 .\"
 should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
-for both positive and negative error codes in \fBpcre2.h\fP.
+for both positive and negative error codes in \fBpcre2.h\fP. When compilation
+is successful \fIerrorcode\fP is set to a value that returns the message "no
+error" if passed to \fBpcre2_get_error_message()\fP.
 .P
 The value returned in \fIerroroffset\fP is an indication of where in the
-pattern the error occurred. It is not necessarily the furthest point in the
-pattern that was read. For example, after the error "lookbehind assertion is
-not fixed length", the error offset points to the start of the failing
-assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
-first code unit of the failing character.
+pattern an error occurred. When there is no error, zero is returned. A non-zero
+value is not necessarily the furthest point in the pattern that was read. For
+example, after the error "lookbehind assertion is not fixed length", the error
+offset points to the start of the failing assertion. For an invalid UTF-8 or
+UTF-16 string, the offset is that of the first code unit of the failing
+character.
 .P
 Some errors are not detected until the whole pattern has been scanned; in these
 cases, the offset passed back is the length of the pattern. Note that the
@@ -1465,11 +1496,13 @@ PCRE2_UCP is set, Unicode properties are used for all characters with more than
 one other case, and for all characters whose code points are greater than
 U+007F. Note that there are two ASCII characters, K and S, that, in addition to
 their lower case ASCII equivalents, are case-equivalent with U+212A (Kelvin
-sign) and U+017F (long S) respectively. For lower valued characters with only
-one other case, a lookup table is used for speed. When neither PCRE2_UTF nor
-PCRE2_UCP is set, a lookup table is used for all code points less than 256, and
-higher code points (available only in 16-bit or 32-bit mode) are treated as not
-having another case.
+sign) and U+017F (long S) respectively. If you do not want this case
+equivalence, you can suppress it by setting PCRE2_EXTRA_CASELESS_RESTRICT.
+.P
+For lower valued characters with only one other case, a lookup table is used
+for speed. When neither PCRE2_UTF nor PCRE2_UCP is set, a lookup table is used
+for all code points less than 256, and higher code points (available only in
+16-bit or 32-bit mode) are treated as not having another case.
 .sp
   PCRE2_DOLLAR_ENDONLY
 .sp
@@ -1528,13 +1561,13 @@ the end of the subject.
   PCRE2_EXTENDED
 .sp
 If this bit is set, most white space characters in the pattern are totally
-ignored except when escaped or inside a character class. However, white space
-is not allowed within sequences such as (?> that introduce various
-parenthesized groups, nor within numerical quantifiers such as {1,3}. Ignorable
-white space is permitted between an item and a following quantifier and between
-a quantifier and a following + that indicates possessiveness. PCRE2_EXTENDED is
-equivalent to Perl's /x option, and it can be changed within a pattern by a
-(?x) option setting.
+ignored except when escaped, inside a character class, or inside a \eQ...\eE
+sequence. However, white space is not allowed within sequences such as (?> that
+introduce various parenthesized groups, nor within numerical quantifiers such
+as {1,3}. Ignorable white space is permitted between an item and a following
+quantifier and between a quantifier and a following + that indicates
+possessiveness. PCRE2_EXTENDED is equivalent to Perl's /x option, and it can be
+changed within a pattern by a (?x) option setting.
 .P
 When PCRE2 is compiled without Unicode support, PCRE2_EXTENDED recognizes as
 white space only those characters with code points less than 256 that are
@@ -1594,7 +1627,7 @@ PCRE2_FIRSTLINE if \fIstartoffset\fP is greater than 3. See also
 PCRE2_USE_OFFSET_LIMIT, which provides a more general limiting facility. If
 PCRE2_FIRSTLINE is set with an offset limit, a match must occur in the first
 line and also within the offset limit. In other words, whichever limit comes
-first is used.
+first is used. This option has no effect for anchored patterns.
 .sp
   PCRE2_LITERAL
 .sp
@@ -1613,7 +1646,11 @@ PCRE2_EXTRA_MATCH_WORD are also supported. Any other options cause an error.
 .sp
 This option forces PCRE2_UTF (see below) and also enables support for matching
 by \fBpcre2_match()\fP in subject strings that contain invalid UTF sequences.
-This facility is not supported for DFA matching. For details, see the
+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as
+sequences of uint16_t or uint32_t code points. They cannot find valid UTF
+sequences within an arbitrary string of bytes unless such sequences are
+suitably aligned. This facility is not supported for DFA matching. For details,
+see the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
@@ -1794,7 +1831,7 @@ it is set, the effect of passing an invalid UTF string as a pattern is
 undefined. It may cause your program to crash or loop.
 .P
 Note that this option can also be passed to \fBpcre2_match()\fP and
-\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject
+\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject
 string.
 .P
 Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
@@ -1815,8 +1852,9 @@ are not representable in UTF-16.
 This option has two effects. Firstly, it change the way PCRE2 processes \eB,
 \eb, \eD, \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes. By
 default, only ASCII characters are recognized, but if PCRE2_UCP is set, Unicode
-properties are used instead to classify characters. More details are given in
-the section on
+properties are used to classify characters. There are some PCRE2_EXTRA
+options (see below) that add finer control to this behaviour. More details are
+given in the section on
 .\" HTML <a href="pcre2pattern.html#genericchartypes">
 .\" </a>
 generic character types
@@ -1825,14 +1863,15 @@ in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
-page. If you set PCRE2_UCP, matching one of the items it affects takes much
-longer.
+page.
 .P
 The second effect of PCRE2_UCP is to force the use of Unicode properties for
-upper/lower casing operations on characters with code points greater than 127,
-even when PCRE2_UTF is not set. This makes it possible, for example, to process
-strings in the 16-bit UCS-2 code. This option is available only if PCRE2 has
-been compiled with Unicode support (which is the default).
+upper/lower casing operations, even when PCRE2_UTF is not set. This makes it
+possible to process strings in the 16-bit UCS-2 code. This option is available
+only if PCRE2 has been compiled with Unicode support (which is the default).
+The PCRE2_EXTRA_CASELESS_RESTRICT option (see below) restricts caseless
+matching such that ASCII characters match only ASCII characters and non-ASCII
+characters match only non-ASCII characters.
 .sp
   PCRE2_UNGREEDY
 .sp
@@ -1865,8 +1904,7 @@ behaviour of PCRE2 are given in the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
-page. In particular, note that it changes the way PCRE2_CASELESS handles
-characters with code points greater than 127.
+page. In particular, note that it changes the way PCRE2_CASELESS works.
 .
 .
 .\" HTML <a name="extracompileoptions"></a>
@@ -1912,6 +1950,37 @@ the way that ECMAscript (aka JavaScript) does. Additional functionality was
 defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
 PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
 character code, where hhh.. is any number of hexadecimal digits.
+.sp
+  PCRE2_EXTRA_ASCII_BSD
+.sp
+This option forces \ed to match only ASCII digits, even when PCRE2_UCP is set.
+It can be changed within a pattern by means of the (?aD) option setting.
+.sp
+  PCRE2_EXTRA_ASCII_BSS
+.sp
+This option forces \es to match only ASCII space characters, even when
+PCRE2_UCP is set. It can be changed within a pattern by means of the (?aS)
+option setting.
+.sp
+  PCRE2_EXTRA_ASCII_BSW
+.sp
+This option forces \ew to match only ASCII word characters, even when PCRE2_UCP
+is set. It can be changed within a pattern by means of the (?aW) option
+setting.
+.sp
+  PCRE2_EXTRA_ASCII_DIGIT
+.sp
+This option forces the POSIX character classes [:digit:] and [:xdigit:] to
+match only ASCII digits, even when PCRE2_UCP is set. It can be changed within
+a pattern by means of the (?aT) option setting.
+.sp
+  PCRE2_EXTRA_ASCII_POSIX
+.sp
+This option forces all the POSIX character classes, including [:digit:] and
+[:xdigit:], to match only ASCII characters, even when PCRE2_UCP is set. It can
+be changed within a pattern by means of the (?aP) option setting, but note that
+this also sets PCRE2_EXTRA_ASCII_DIGIT in order to ensure that (?-aP) unsets
+all ASCII restrictions for POSIX classes.
 .sp
   PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
 .sp
@@ -1932,6 +2001,17 @@ that a sequence such as [\eN{] is interpreted as a malformed attempt at
 [\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an
 unqualified \eN is a valid escape sequence but is not supported in a character
 class. To reiterate: this is a dangerous option. Use with great care.
+.sp
+  PCRE2_EXTRA_CASELESS_RESTRICT
+.sp
+When either PCRE2_UCP or PCRE2_UTF is set, caseless matching follows Unicode
+rules, which allow for more than two cases per character. There are two
+case-equivalent character sets that contain both ASCII and non-ASCII
+characters. The ASCII letter S is case-equivalent to U+017f (long S) and the
+ASCII letter K is case-equivalent to U+212a (Kelvin sign). This option disables
+recognition of case-equivalences that cross the ASCII/non-ASCII boundary. In a
+caseless match, both characters must either be ASCII or non-ASCII. The option
+can be changed with a pattern by the (?r) option setting.
 .sp
   PCRE2_EXTRA_ESCAPED_CR_IS_LF
 .sp
@@ -1974,8 +2054,8 @@ also set.
 .sp
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .sp
-.B pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE \fIstartsize\fP,
-.B "  PCRE2_SIZE \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
+.B pcre2_jit_stack *pcre2_jit_stack_create(size_t \fIstartsize\fP,
+.B "  size_t \fImaxsize\fP, pcre2_general_context *\fIgcontext\fP);"
 .sp
 .B void pcre2_jit_stack_assign(pcre2_match_context *\fImcontext\fP,
 .B "  pcre2_jit_callback \fIcallback_function\fP, void *\fIcallback_data\fP);"
@@ -2015,13 +2095,14 @@ point. However, this applies only to characters whose code points are less than
 256. By default, higher-valued code points never match escapes such as \ew or
 \ed.
 .P
-When PCRE2 is built with Unicode support (the default), the Unicode properties
-of all characters can be tested with \ep and \eP, or, alternatively, the
+When PCRE2 is built with Unicode support (the default), certain Unicode
+character properties can be tested with \ep and \eP, or, alternatively, the
 PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and
 friends to use Unicode property support instead of the built-in tables.
 PCRE2_UCP also causes upper/lower casing operations on characters with code
 points greater than 127 to use Unicode properties. These effects apply even
-when PCRE2_UTF is not set.
+when PCRE2_UTF is not set. There are, however, some PCRE2_EXTRA options (see
+above) that can be used to modify or suppress them.
 .P
 The use of locales with Unicode is discouraged. If you are handling characters
 with code points greater than 127, you should either use Unicode support, or
@@ -2279,7 +2360,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
   PCRE2_INFO_LASTCODETYPE
 .sp
 Returns 1 if there is a rightmost literal code unit that must exist in any
-matched string, other than at its start. The third argument should  point to a
+matched string, other than at its start. The third argument should point to a
 \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
 returned, the code unit value itself can be retrieved using
 PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
@@ -2517,7 +2598,9 @@ large enough to hold as many as are expected.
 A minimum of at least 1 pair is imposed by \fBpcre2_match_data_create()\fP, so
 it is always possible to return the overall matched string in the case of
 \fBpcre2_match()\fP or the longest match in the case of
-\fBpcre2_dfa_match()\fP.
+\fBpcre2_dfa_match()\fP. The maximum number of pairs is 65535; if the first
+argument of \fBpcre2_match_data_create()\fP is greater than this, 65535 is
+used.
 .P
 The second argument of \fBpcre2_match_data_create()\fP is a pointer to a
 general context, which can specify custom memory management for obtaining the
@@ -2569,6 +2652,45 @@ calling \fBpcre2_match_data_free()\fP. If this function is called with a NULL
 argument, it returns immediately, without doing anything.
 .
 .
+.SH "MEMORY USE FOR MATCH DATA BLOCKS"
+.rs
+.sp
+.nf
+.B PCRE2_SIZE pcre2_get_match_data_size(pcre2_match_data *\fImatch_data\fP);
+.sp
+.B PCRE2_SIZE pcre2_get_match_data_heapframes_size(
+.B "  pcre2_match_data *\fImatch_data\fP);"
+.fi
+.P
+The size of a match data block depends on the size of the ovector that it
+contains. The function \fBpcre2_get_match_data_size()\fP returns the size, in
+bytes, of the block that is its argument.
+.P
+When \fBpcre2_match()\fP runs interpretively (that is, without using JIT), it
+makes use of a vector of data frames for remembering backtracking positions.
+The size of each individual frame depends on the number of capturing
+parentheses in the pattern and can be obtained by calling
+\fBpcre2_pattern_info()\fP with the PCRE2_INFO_FRAMESIZE option (see the
+section entitled "Information about a compiled pattern"
+.\" HTML <a href="#infoaboutpattern>">
+.\" </a>
+above).
+.\"
+.P
+Heap memory is used for the frames vector; if the initial memory block turns
+out to be too small during matching, it is automatically expanded. When
+\fBpcre2_match()\fP returns, the memory is not freed, but remains attached to
+the match data block, for use by any subsequent matches that use the same
+block. It is automatically freed when the match data block itself is freed.
+.P
+You can find the current size of the frames vector that a match data block owns
+by calling \fBpcre2_get_match_data_heapframes_size()\fP. For a newly created
+match data block the size will be zero. Some types of match may require a lot
+of frames and thus a large vector; applications that run in environments where
+memory is constrained can check this and free the match data block if the heap
+frames vector has become too big.
+.
+.
 .SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
 .rs
 .sp
@@ -2624,7 +2746,9 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
 \fIstartoffset\fP. The length and offset are in code units, not characters.
 That is, they are in bytes for the 8-bit library, 16-bit code units for the
 16-bit library, and 32-bit code units for the 32-bit library, whether or not
-UTF processing is enabled.
+UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and
+\fIlength\fP is zero, the subject is assumed to be an empty string. If
+\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL.
 .P
 If \fIstartoffset\fP is greater than the length of the subject,
 \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is
@@ -2680,13 +2804,15 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA.
 .sp
 The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be
 zero. The only bits that may be set are PCRE2_ANCHORED,
-PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
-PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
-PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
+PCRE2_COPY_MATCHED_SUBJECT, PCRE2_DISABLE_RECURSELOOP_CHECK, PCRE2_ENDANCHORED,
+PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
+PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT.
+Their action is described below.
 .P
 Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not supported by
 the just-in-time (JIT) compiler. If it is set, JIT matching is disabled and the
-interpretive code in \fBpcre2_match()\fP is run. Apart from PCRE2_NO_JIT
+interpretive code in \fBpcre2_match()\fP is run.
+PCRE2_DISABLE_RECURSELOOP_CHECK is ignored by JIT, but apart from PCRE2_NO_JIT
 (obviously), the remaining options are supported for JIT matching.
 .sp
   PCRE2_ANCHORED
@@ -2712,6 +2838,23 @@ the match block itself is used. The copy is automatically freed when
 \fBpcre2_match_data_free()\fP is called to free the match data block. It is also
 automatically freed if the match data block is re-used for another match
 operation.
+.sp
+  PCRE2_DISABLE_RECURSELOOP_CHECK
+.sp
+This option is relevant only to \fBpcre2_match()\fP for interpretive matching.
+It is ignored when JIT is used, and is forbidden for \fBpcre2_dfa_match()\fP.
+.P
+The use of recursion in patterns can lead to infinite loops. In the
+interpretive matcher these would be eventually caught by the match or heap
+limits, but this could take a long time and/or use a lot of memory if the
+limits are large. There is therefore a check at the start of each recursion.
+If the same group is still active from a previous call, and the current subject
+pointer is the same as it was at the start of that group, and the furthest
+inspected character of the subject has not changed, an error is generated.
+.P
+There are rare cases of matches that would complete, but nevertheless trigger
+this error. This option disables the check. It is provided mainly for testing
+when comparing JIT and interpretive behaviour.
 .sp
   PCRE2_ENDANCHORED
 .sp
@@ -2986,8 +3129,8 @@ Offset values that correspond to unused groups at the end of the expression are
 also set to PCRE2_UNSET. For example, if the string "abc" is matched against
 the pattern (abc)(x(yz)?)? groups 2 and 3 are not matched. The return from the
 function is 2, because the highest used capture group number is 1. The offsets
-for for the second and third capture groupss (assuming the vector is large
-enough, of course) are set to PCRE2_UNSET.
+for the second and third capture groups (assuming the vector is large enough,
+of course) are set to PCRE2_UNSET.
 .P
 Elements in the ovector that do not correspond to capturing parentheses in the
 pattern are never changed. That is, if a pattern contains \fIn\fP capturing
@@ -3158,11 +3301,11 @@ The backtracking match limit was reached.
 .sp
   PCRE2_ERROR_NOMEMORY
 .sp
-If a pattern contains many nested backtracking points, heap memory is used to
-remember them. This error is given when the memory allocation function (default
-or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
-if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
-also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
+Heap memory is used to remember backtracking points. This error is given when
+the memory allocation function (default or custom) fails. Note that a different
+error, PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
+the heap limit. PCRE2_ERROR_NOMEMORY is also returned if
+PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
 .sp
   PCRE2_ERROR_NULL
 .sp
@@ -3305,7 +3448,7 @@ capturing slots, substring number 1 is unset.
 .B int pcre2_substring_list_get(pcre2_match_data *\fImatch_data\fP,
 .B "  PCRE2_UCHAR ***\fIlistptr\fP, PCRE2_SIZE **\fIlengthsptr\fP);
 .sp
-.B void pcre2_substring_list_free(PCRE2_SPTR *\fIlist\fP);
+.B void pcre2_substring_list_free(PCRE2_UCHAR **\fIlist\fP);
 .fi
 .P
 The \fBpcre2_substring_list_get()\fP function extracts all available substrings
@@ -3413,12 +3556,16 @@ same number causes an error at compile time.
 .P
 This function optionally calls \fBpcre2_match()\fP and then makes a copy of the
 subject string in \fIoutputbuffer\fP, replacing parts that were matched with
-the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This
-can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an
-option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the
-replacement string(s). The default action is to perform just one replacement if
-the pattern matches, but there is an option that requests multiple replacements
-(see PCRE2_SUBSTITUTE_GLOBAL below).
+the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which
+can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a
+special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the
+replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an
+error occurs if \fIreplacement\fP is NULL.
+.P
+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just
+the replacement string(s). The default action is to perform just one
+replacement if the pattern matches, but there is an option that requests
+multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).
 .P
 If successful, \fBpcre2_substitute()\fP returns the number of substitutions
 that were carried out. This may be zero if no match was found, and is never
@@ -3447,12 +3594,12 @@ block may or may not have been changed.
 As well as the usual options for \fBpcre2_match()\fP, a number of additional
 options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
 One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external
-\fImatch_data\fP block must be provided, and it must have been used for an
-external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block
-(return code, offset vector) is used for the first substitution instead of
-calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows
-an application to check for a match before choosing to substitute, without
-having to repeat the match.
+\fImatch_data\fP block must be provided, and it must have already been used for
+an external call to \fBpcre2_match()\fP with the same pattern and subject
+arguments. The data in the \fImatch_data\fP block (return code, offset vector)
+is then used for the first substitution instead of calling \fBpcre2_match()\fP
+from within \fBpcre2_substitute()\fP. This allows an application to check for a
+match before choosing to substitute, without having to repeat the match.
 .P
 The contents of the externally supplied match data block are not changed when
 PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set,
@@ -3509,7 +3656,8 @@ replacement string causes an immediate return with the relevant UTF error code.
 If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not interpreted
 in any way. By default, however, a dollar character is an escape character that
 can specify the insertion of characters from capture groups and names from
-(*MARK) or other control verbs in the pattern. The following forms are always
+(*MARK) or other control verbs in the pattern. Dollar is the only escape
+character (backslash is treated as literal). The following forms are always
 recognized:
 .sp
   $$                  insert a dollar character
@@ -3584,7 +3732,7 @@ and force lower case. The escape sequences change the current state: \eU and
 terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
 \eu and \el force the next character (if it is a letter) to upper or lower
 case, respectively, and then the state automatically reverts to no case
-forcing. Case forcing applies to all inserted  characters, including those from
+forcing. Case forcing applies to all inserted characters, including those from
 capture groups and letters within \eQ...\eE quoted sequences. If either
 PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode
 properties are used for case forcing characters whose code points are greater
@@ -3649,7 +3797,9 @@ needed is returned via \fIoutlengthptr\fP. Note that this does not happen by
 default.
 .P
 PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the
-\fImatch_data\fP argument is NULL.
+\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP
+arguments are NULL. For backward compatibility reasons an exception is made for
+the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0.
 .P
 PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the
 replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE
@@ -3722,7 +3872,7 @@ PCRE2_SUBSTITUTE_GLOBAL is set, processing continues with a search for the next
 match. If the value is not zero, the current replacement is not accepted. If
 the value is greater than zero, processing continues when
 PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero or
-PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is copied to the
+PCRE2_SUBSTITUTE_GLOBAL is not set), the rest of the input is copied to the
 output and the call to \fBpcre2_substitute()\fP exits, returning the number of
 matches so far.
 .
@@ -3811,12 +3961,13 @@ other alternatives. Ultimately, when it runs out of matches,
 .P
 The function \fBpcre2_dfa_match()\fP is called to match a subject string
 against a compiled pattern, using a matching algorithm that scans the subject
-string just once (not counting lookaround assertions), and does not backtrack.
-This has different characteristics to the normal algorithm, and is not
-compatible with Perl. Some of the features of PCRE2 patterns are not supported.
-Nevertheless, there are times when this kind of matching can be useful. For a
-discussion of the two matching algorithms, and a list of features that
-\fBpcre2_dfa_match()\fP does not support, see the
+string just once (not counting lookaround assertions), and does not backtrack
+(except when processing lookaround assertions). This has different
+characteristics to the normal algorithm, and is not compatible with Perl. Some
+of the features of PCRE2 patterns are not supported. Nevertheless, there are
+times when this kind of matching can be useful. For a discussion of the two
+matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does
+not support, see the
 .\" HREF
 \fBpcre2matching\fP
 .\"
@@ -3848,7 +3999,7 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP:
     wspace,         /* working space vector */
     20);            /* number of elements (NOT size in bytes) */
 .
-.SS "Option bits for \fBpcre_dfa_match()\fP"
+.SS "Option bits for \fBpcre2_dfa_match()\fP"
 .rs
 .sp
 The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must
@@ -4016,6 +4167,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 27 January 2024
+Copyright (c) 1997-2024 University of Cambridge.
 .fi

+ 41 - 15
regex.mod/pcre/doc/pcre2build.3

@@ -1,4 +1,4 @@
-.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35"
+.TH PCRE2BUILD 3 "24 November" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .
@@ -98,7 +98,17 @@ one of
   --disable-shared
   --disable-static
 .sp
-to the \fBconfigure\fP command.
+to the \fBconfigure\fP command. Setting --disable-shared ensures that PCRE2
+libraries are built as static libraries. The binaries that are then created as
+part of the build process (for example, \fBpcre2test\fP and \fBpcre2grep\fP)
+are linked statically with one or more PCRE2 libraries, but may also be
+dynamically linked with other libraries such as \fBlibc\fP. If you want these
+binaries to be fully statically linked, you can set LDFLAGS like this:
+.sp
+LDFLAGS=--static ./configure --disable-shared
+.sp
+Note the two hyphens in --static. Of course, this works only if static versions
+of all the relevant libraries are available for linking.
 .
 .
 .SH "UNICODE AND UTF SUPPORT"
@@ -122,8 +132,9 @@ locked this out by setting PCRE2_NEVER_UTF.
 UTF support allows the libraries to process character code points up to
 0x10ffff in the strings that they handle. Unicode support also gives access to
 the Unicode properties of characters, using pattern escapes such as \eP, \ep,
-and \eX. Only the general category properties such as \fILu\fP and \fINd\fP are
-supported. Details are given in the
+and \eX. Only the general category properties such as \fILu\fP and \fINd\fP,
+script names, and some bi-directional properties are supported. Details are
+given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@@ -277,12 +288,11 @@ to the \fBconfigure\fP command. This setting also applies to the
 \fBpcre2_dfa_match()\fP matching function, and to JIT matching (though the
 counting is done differently).
 .P
-The \fBpcre2_match()\fP function starts out using a 20KiB vector on the system
-stack to record backtracking points. The more nested backtracking points there
-are (that is, the deeper the search tree), the more memory is needed. If the
-initial vector is not large enough, heap memory is used, up to a certain limit,
-which is specified in kibibytes (units of 1024 bytes). The limit can be changed
-at run time, as described in the
+The \fBpcre2_match()\fP function uses heap memory to record backtracking
+points. The more nested backtracking points there are (that is, the deeper the
+search tree), the more memory is needed. There is an upper limit, specified in
+kibibytes (units of 1024 bytes). This limit can be changed at run time, as
+described in the
 .\" HREF
 \fBpcre2api\fP
 .\"
@@ -302,7 +312,7 @@ You can also explicitly limit the depth of nested backtracking in the
 for --with-match-limit. You can set a lower default limit by adding, for
 example,
 .sp
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 .sp
 to the \fBconfigure\fP command. This value can be overridden at run time. This
 depth limit indirectly limits the amount of heap memory that is used, but
@@ -317,6 +327,22 @@ used for lookaround assertions, atomic groups, and recursion within patterns.
 The limit does not apply to JIT matching.
 .
 .
+.SH "LIMITING VARIABLE-LENGTH LOOKBEHIND ASSERTIONS"
+.rs
+.sp
+Lookbehind assertions in which one or more branches can match a variable number
+of characters are supported only if there is a maximum matching length for each
+top-level branch. There is a limit to this maximum that defaults to 255
+characters. You can alter this default by a setting such as
+.sp
+  --with-max-varlookbehind=100
+.sp
+The limit can be changed at runtime by calling
+\fBpcre2_set_max_varlookbehind()\fP. Lookbehind assertions in which every
+branch matches a fixed number of characters (not necessarily all the same) are
+not constrained by this limit.
+.
+.
 .\" HTML <a name="createtables"></a>
 .SH "CREATING CHARACTER TABLES AT BUILD TIME"
 .rs
@@ -461,7 +487,7 @@ with \fBlibedit\fP, which has a BSD licence.
 .P
 Setting --enable-pcre2test-libreadline causes the \fB-lreadline\fP option to be
 added to the \fBpcre2test\fP build. In many operating environments with a
-sytem-installed readline library this is sufficient. However, in some
+system-installed readline library this is sufficient. However, in some
 environments (e.g. if an unmodified distribution version of readline is in
 use), some extra configuration may be necessary. The INSTALL file for
 \fBlibreadline\fP says this:
@@ -624,7 +650,7 @@ give a warning.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -633,6 +659,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 20 March 2020
-Copyright (c) 1997-2020 University of Cambridge.
+Last updated: 24 November 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 10 - 10
regex.mod/pcre/doc/pcre2callout.3

@@ -1,4 +1,4 @@
-.TH PCRE2CALLOUT 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2CALLOUT 3 "19 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH SYNOPSIS
@@ -327,12 +327,12 @@ The \fInext_item_length\fP field contains the length of the next item to be
 processed in the pattern string. When the callout is at the end of the pattern,
 the length is zero. When the callout precedes an opening parenthesis, the
 length includes meta characters that follow the parenthesis. For example, in a
-callout before an assertion such as (?=ab) the length is 3. For an an
-alternation bar or a closing parenthesis, the length is one, unless a closing
-parenthesis is followed by a quantifier, in which case its length is included.
-(This changed in release 10.23. In earlier releases, before an opening
-parenthesis the length was that of the entire group, and before an alternation
-bar or a closing parenthesis the length was zero.)
+callout before an assertion such as (?=ab) the length is 3. For an alternation
+bar or a closing parenthesis, the length is one, unless a closing parenthesis
+is followed by a quantifier, in which case its length is included. (This
+changed in release 10.23. In earlier releases, before an opening parenthesis
+the length was that of the entire group, and before an alternation bar or a
+closing parenthesis the length was zero.)
 .P
 The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to
 help in distinguishing between different automatic callouts, which all have the
@@ -443,7 +443,7 @@ value, scanning the pattern stops, and that value is returned from
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -452,6 +452,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 19 January 2024
+Copyright (c) 1997-2024 University of Cambridge.
 .fi

+ 80 - 63
regex.mod/pcre/doc/pcre2compat.3

@@ -1,36 +1,47 @@
-.TH PCRE2COMPAT 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2COMPAT 3 "30 November 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "DIFFERENCES BETWEEN PCRE2 AND PERL"
 .rs
 .sp
-This document describes some of the differences in the ways that PCRE2 and Perl
-handle regular expressions. The differences described here are with respect to
-Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the
-information may at times be out of date.
+This document describes some of the known differences in the ways that PCRE2
+and Perl handle regular expressions. The differences described here are with
+respect to Perl version 5.38.0, but as both Perl and PCRE2 are continually
+changing, the information may at times be out of date.
 .P
-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
+1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the
+behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the
+next character unless it is the start of a newline sequence. This means that,
+if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF
+(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using
+EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline
+indicator.
+.P
+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does
 have are given in the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
 page.
 .P
-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
+3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but
 they do not mean what you might think. For example, (?!a){3} does not assert
 that the next three characters are not "a". It just asserts that the next
 character is not "a" three times (in principle; PCRE2 optimizes this to run the
 assertion just once). Perl allows some repeat quantifiers on other assertions,
-for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these
-do not seem to have any use. PCRE2 does not allow any kind of quantifier on
-non-lookaround assertions.
+for example, \eb* , but these do not seem to have any use. PCRE2 does not allow
+any kind of quantifier on non-lookaround assertions.
+.P
+4. If a braced quantifier such as {1,2} appears where there is nothing to
+repeat (for example, at the start of a branch), PCRE2 raises an error whereas
+Perl treats the quantifier characters as literal.
 .P
-3. Capture groups that occur inside negative lookaround assertions are counted,
+5. Capture groups that occur inside negative lookaround assertions are counted,
 but their entries in the offsets vector are set only when a negative assertion
 is a condition that has a matching branch (that is, the condition is false).
 Perl may set such capture groups in other circumstances.
 .P
-4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
+6. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu,
 \eU, and \eN when followed by a character name. \eN on its own, matching a
 non-newline character, and \eN{U+dd..}, matching a Unicode code point, are
 supported. The escapes that modify the case of following letters are
@@ -40,12 +51,13 @@ generated by default. However, if either of the PCRE2_ALT_BSUX or
 PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript
 interprets them.
 .P
-5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
+7. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is
 built with Unicode support (the default). The properties that can be tested
 with \ep and \eP are limited to the general category properties such as Lu and
-Nd, script names such as Greek or Han, and the derived properties Any and L&.
-Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use
-is limited. See the
+Nd, the derived properties Any and LC (synonym L&), script names such as Greek
+or Han, Bidi_Class, Bidi_Control, and a few binary properties. Both PCRE2 and
+Perl support the Cs (surrogate) property, but in PCRE2 its use is limited. See
+the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@@ -53,14 +65,14 @@ documentation for details. The long synonyms for property names that Perl
 supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted
 to prefix any of these properties with "Is".
 .P
-6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
+8. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters
 in between are treated as literals. However, this is slightly different from
 Perl in that $ and @ are also handled as literals inside the quotes. In Perl,
-they cause variable interpolation (but of course PCRE2 does not have
-variables). Also, Perl does "double-quotish backslash interpolation" on any
-backslashes between \eQ and \eE which, its documentation says, "may lead to
-confusing results". PCRE2 treats a backslash between \eQ and \eE just like any
-other character. Note the following examples:
+they cause variable interpolation (PCRE2 does not have variables). Also, Perl
+does "double-quotish backslash interpolation" on any backslashes between \eQ
+and \eE which, its documentation says, "may lead to confusing results". PCRE2
+treats a backslash between \eQ and \eE just like any other character. Note the
+following examples:
 .sp
     Pattern            PCRE2 matches     Perl matches
 .sp
@@ -75,7 +87,7 @@ other character. Note the following examples:
 The \eQ...\eE sequence is recognized both inside and outside character classes
 by both PCRE2 and Perl.
 .P
-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
+9. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code})
 constructions. However, PCRE2 does have a "callout" feature, which allows an
 external function to be called during pattern matching. See the
 .\" HREF
@@ -83,11 +95,11 @@ external function to be called during pattern matching. See the
 .\"
 documentation for details.
 .P
-8. Subroutine calls (whether recursive or not) were treated as atomic groups up
-to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking
-into subroutine calls is now supported, as in Perl.
+10. Subroutine calls (whether recursive or not) were treated as atomic groups
+up to PCRE2 release 10.23, but from release 10.30 this changed, and
+backtracking into subroutine calls is now supported, as in Perl.
 .P
-9. In PCRE2, if any of the backtracking control verbs are used in a group that
+11. In PCRE2, if any of the backtracking control verbs are used in a group that
 is called as a subroutine (whether or not recursively), their effect is
 confined to that group; it does not extend to the surrounding pattern. This is
 not always the case in Perl. In particular, if (*THEN) is present in a group
@@ -95,18 +107,18 @@ that is called as a subroutine, its action is limited to that group, even if
 the group does not contain any | characters. Note that such groups are
 processed as anchored at the point where they are tested.
 .P
-10. If a pattern contains more than one backtracking control verb, the first
+12. If a pattern contains more than one backtracking control verb, the first
 one that is backtracked onto acts. For example, in the pattern
 A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
 triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
 same as PCRE2, but there are cases where it differs.
 .P
-11. There are some differences that are concerned with the settings of captured
+13. There are some differences that are concerned with the settings of captured
 strings when part of a pattern is repeated. For example, matching "aba" against
 the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
 "b".
 .P
-12. PCRE2's handling of duplicate capture group numbers and names is not as
+14. PCRE2's handling of duplicate capture group numbers and names is not as
 general as Perl's. This is a consequence of the fact the PCRE2 works internally
 just with numbers, using an external table to translate between numbers and
 names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two
@@ -115,82 +127,77 @@ causes an error at compile time. If it were allowed, it would not be possible
 to distinguish which group matched, because both names map to capture group
 number 1. To avoid this confusing situation, an error is given at compile time.
 .P
-13. Perl used to recognize comments in some places that PCRE2 does not, for
+15. Perl used to recognize comments in some places that PCRE2 does not, for
 example, between the ( and ? at the start of a group. If the /x modifier is
 set, Perl allowed white space between ( and ? though the latest Perls give an
 error (for a while it was just deprecated). There may still be some cases where
 Perl behaves differently.
 .P
-14. Perl, when in warning mode, gives warnings for character classes such as
+16. Perl, when in warning mode, gives warnings for character classes such as
 [A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no
 warning features, so it gives an error in these cases because they are almost
 certainly user mistakes.
 .P
-15. In PCRE2, the upper/lower case character properties Lu and Ll are not
+17. In PCRE2, the upper/lower case character properties Lu and Ll are not
 affected when case-independent matching is specified. For example, \ep{Lu}
 always matches an upper case letter. I think Perl has changed in this respect;
-in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all
+in the release at the time of writing (5.38), \ep{Lu} and \ep{Ll} match all
 letters, regardless of case, when case independence is specified.
 .P
-16. From release 5.32.0, Perl locks out the use of \eK in lookaround
+18. From release 5.32.0, Perl locks out the use of \eK in lookaround
 assertions. From release 10.38 PCRE2 does the same by default. However, there
 is an option for re-enabling the previous behaviour. When this option is set,
 \eK is acted on when it occurs in positive assertions, but is ignored in
 negative assertions.
 .P
-17. PCRE2 provides some extensions to the Perl regular expression facilities.
+19. PCRE2 provides some extensions to the Perl regular expression facilities.
 Perl 5.10 included new features that were not in earlier versions of Perl, some
 of which (such as named parentheses) were in PCRE2 for some time before. This
-list is with respect to Perl 5.32:
-.sp
-(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
-each alternative toplevel branch of a lookbehind assertion can match a
-different length of string. Perl requires them all to have the same length.
-.sp
-(b) From PCRE2 10.23, backreferences to groups of fixed length are supported
-in lookbehinds, provided that there is no possibility of referencing a
-non-unique number or name. Perl does not support backreferences in lookbehinds.
+list is with respect to Perl 5.38:
 .sp
-(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
+(a) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
 meta-character matches only at the very end of the string.
 .sp
-(d) A backslash followed by a letter with no special meaning is faulted. (Perl
+(b) A backslash followed by a letter with no special meaning is faulted. (Perl
 can be made to issue a warning.)
 .sp
-(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
+(c) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
 inverted, that is, by default they are not greedy, but if followed by a
 question mark they are.
 .sp
-(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
+(d) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
 only at the first matching position in the subject string.
 .sp
-(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART
+(e) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART
 options have no Perl equivalents.
 .sp
-(h) The \eR escape sequence can be restricted to match only CR, LF, or CRLF
+(f) The \eR escape sequence can be restricted to match only CR, LF, or CRLF
 by the PCRE2_BSR_ANYCRLF option.
 .sp
-(i) The callout facility is PCRE2-specific. Perl supports codeblocks and
+(g) The callout facility is PCRE2-specific. Perl supports codeblocks and
 variable interpolation, but not general hooks on every match.
 .sp
-(j) The partial matching facility is PCRE2-specific.
+(h) The partial matching facility is PCRE2-specific.
 .sp
-(k) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a
+(i) The alternative matching function (\fBpcre2_dfa_match()\fP matches in a
 different way and is not Perl-compatible.
 .sp
-(l) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
+(j) PCRE2 recognizes some special sequences such as (*CR) or (*NO_JIT) at
 the start of a pattern. These set overall options that cannot be changed within
 the pattern.
 .sp
-(m) PCRE2 supports non-atomic positive lookaround assertions. This is an
+(k) PCRE2 supports non-atomic positive lookaround assertions. This is an
 extension to the lookaround facilities. The default, Perl-compatible
 lookarounds are atomic.
+.sp
+(l) There are three syntactical items in patterns that can refer to a capturing
+group by number: back references such as \eg{2}, subroutine calls such as (?3),
+and condition references such as (?(4)...). PCRE2 supports relative group
+numbers such as +2 and -4 in all three cases. Perl supports both plus and minus
+for subroutine calls, but only minus for back references, and no relative
+numbering at all for conditions.
 .P
-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa
-modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode
-rules. This separation cannot be represented with PCRE2_UCP.
-.P
-19. Perl has different limits than PCRE2. See the
+20. Perl has different limits than PCRE2. See the
 .\" HREF
 \fBpcre2limit\fP
 .\"
@@ -198,6 +205,16 @@ documentation for details. Perl went with 5.10 from recursion to iteration
 keeping the intermediate matches on the heap, which is ~10% slower but does not
 fall into any stack-overflow limit. PCRE2 made a similar change at release
 10.30, and also has many build-time and run-time customizable limits.
+.P
+21. Unlike Perl, PCRE2 doesn't have character set modifiers and specially no way
+to set characters by context just like Perl's "/d". A regular expression using
+PCRE2_UTF and PCRE2_UCP will use similar rules to Perl's "/u"; something closer
+to "/a" could be selected by adding other PCRE2_EXTRA_ASCII* options on top.
+.P
+22. Some recursive patterns that Perl diagnoses as infinite recursions can be
+handled by PCRE2, either by the interpreter or the JIT. An example is
+/(?:|(?0)abcd)(?(R)|\ez)/, which matches a sequence of any number of repeated
+"abcd" substrings at the end of the subject.
 .
 .
 .SH AUTHOR
@@ -214,6 +231,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 30 November 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 1 - 1
regex.mod/pcre/doc/pcre2convert.3

@@ -150,7 +150,7 @@ neither do POSIX extended patterns).
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .

+ 11 - 0
regex.mod/pcre/doc/pcre2demo.3

@@ -1,19 +1,30 @@
+.TH PCRE2DEMO 3 "16 February 2024" "PCRE2 10.43-RC1"
+.\"AUTOMATICALLY GENERATED BY PrepareRelease - do not EDIT!
+.SH NAME
+PCRE2DEMO - A demonstration C program for PCRE2
+.SH "SOURCE CODE"
+.rs
+.sp
 .\" Start example.
 .de EX
+.	do ds mF \\n[.fam]
 .  nr mE \\n(.f
 .  nf
 .  nh
+.	do fam C
 .  ft CW
 ..
 .
 .
 .\" End example.
 .de EE
+.	do fam \\*(mF
 .  ft \\n(mE
 .  fi
 .  hy \\n(HY
 ..
 .
+.RS -7
 .EX
 /*************************************************
 *           PCRE2 DEMONSTRATION PROGRAM          *

+ 181 - 128
regex.mod/pcre/doc/pcre2grep.1

@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "31 August 2021" "PCRE2 10.38"
+.TH PCRE2GREP 1 "22 December 2023" "PCRE2 10.43"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -43,13 +43,16 @@ For example:
 .sp
   pcre2grep some-pattern file1 - file3
 .sp
-Input files are searched line by line. By default, each line that matches a
-pattern is copied to the standard output, and if there is more than one file,
-the file name is output at the start of each line, followed by a colon.
-However, there are options that can change how \fBpcre2grep\fP behaves. In
-particular, the \fB-M\fP option makes it possible to search for strings that
-span line boundaries. What defines a line boundary is controlled by the
-\fB-N\fP (\fB--newline\fP) option.
+By default, input files are searched line by line, so pattern assertions about
+the beginning and end of a subject string (^, $, \eA, \eZ, and \ez) match at
+the beginning and end of each line. When a line matches a pattern, it is copied
+to the standard output, and if there is more than one file, the file name is
+output at the start of each line, followed by a colon. However, there are
+options that can change how \fBpcre2grep\fP behaves. For example, the \fB-M\fP
+option makes it possible to search for strings that span line boundaries. What
+defines a line boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
+The \fB-h\fP and \fB-H\fP options control whether or not file names are shown,
+and the \fB-Z\fP option changes the file name terminator to a zero byte.
 .P
 The amount of memory used for buffering files that are being scanned is
 controlled by parameters that can be set by the \fB--buffer-size\fP and
@@ -66,6 +69,9 @@ The block of memory that is actually used is three times the "buffer size", to
 allow for buffering "before" and "after" lines. If the buffer size is too
 small, fewer than requested "before" and "after" lines may be output.
 .P
+When matching with a multiline pattern, the size of the buffer must be at least
+half of the maximum match expected or the pattern might fail to match.
+.P
 Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
 BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
 (specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
@@ -74,18 +80,22 @@ patterns are tried before the \fB-f\fP patterns.
 .P
 By default, as soon as one pattern matches a line, no further patterns are
 considered. However, if \fB--colour\fP (or \fB--color\fP) is used to colour the
-matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP, or
-\fB--line-offsets\fP is used to output only the part of the line that matched
-(either shown literally, or as an offset), scanning resumes immediately
-following the match, so that further matches on the same line can be found. If
-there are multiple patterns, they are all tried on the remainder of the line,
-but patterns that follow the one that matched are not tried on the earlier
-matched part of the line.
+matching substrings, or if \fB--only-matching\fP, \fB--file-offsets\fP,
+\fB--line-offsets\fP, or \fB--output\fP is used to output only the part of the
+line that matched (either shown literally, or as an offset), the behaviour is
+different. In this situation, all the patterns are applied to the line. If
+there is more than one match, the one that begins nearest to the start of the
+subject is processed; if there is more than one match at that position, the one
+with the longest matching substring is processed; if the matching substrings
+are equal, the first match found is processed.
+.P
+Scanning with all the patterns resumes immediately following the match, so that
+later matches on the same line can be found. Note, however, that an overlapping
+match that starts in the middle of another match will not be processed.
 .P
-This behaviour means that the order in which multiple patterns are specified
-can affect the output when one of the above options is used. This is no longer
-the same behaviour as GNU grep, which now manages to display earlier matches
-for later patterns (as long as there is no overlap).
+The above behaviour was changed at release 10.41 to be more compatible with GNU
+grep. In earlier releases, \fBpcre2grep\fP did not recognize matches from
+later patterns that were earlier in the subject.
 .P
 Patterns that can match an empty string are accepted, but empty string
 matches are never recognized. An example is the pattern "(super)?(man)?", in
@@ -101,14 +111,15 @@ The \fB--locale\fP option can be used to override this.
 .SH "SUPPORT FOR COMPRESSED FILES"
 .rs
 .sp
-It is possible to compile \fBpcre2grep\fP so that it uses \fBlibz\fP or
-\fBlibbz2\fP to read compressed files whose names end in \fB.gz\fP or
+Compile-time options for \fBpcre2grep\fP can set it up to use \fBlibz\fP or
+\fBlibbz2\fP for reading compressed files whose names end in \fB.gz\fP or
 \fB.bz2\fP, respectively. You can find out whether your \fBpcre2grep\fP binary
 has support for one or both of these file types by running it with the
 \fB--help\fP option. If the appropriate support is not present, all files are
-treated as plain text. The standard input is always so treated. When input is
-from a compressed .gz or .bz2 file, the \fB--line-buffered\fP option is
-ignored.
+treated as plain text. The standard input is always so treated. If a file with
+a \fB.gz\fP or \fB.bz2\fP extension is not in fact compressed, it is read as a
+plain text file. When input is from a compressed .gz or .bz2 file, the
+\fB--line-buffered\fP option is ignored.
 .
 .
 .SH "BINARY FILES"
@@ -149,9 +160,11 @@ Output up to \fInumber\fP lines of context after each matching line. Fewer
 lines are output if the next match or the end of the file is reached, or if the
 processing buffer size has been set too small. If file names and/or line
 numbers are being output, a hyphen separator is used instead of a colon for the
-context lines. A line containing "--" is output between each group of lines,
-unless they are in fact contiguous in the input file. The value of \fInumber\fP
-is expected to be relatively small. When \fB-c\fP is used, \fB-A\fP is ignored.
+context lines (the \fB-Z\fP option can be used to change the file name
+terminator to a zero byte). A line containing "--" is output between each group
+of lines, unless they are in fact contiguous in the input file. The value of
+\fInumber\fP is expected to be relatively small. When \fB-c\fP is used,
+\fB-A\fP is ignored.
 .TP
 \fB-a\fP, \fB--text\fP
 Treat binary files as text. This is equivalent to
@@ -167,9 +180,10 @@ Output up to \fInumber\fP lines of context before each matching line. Fewer
 lines are output if the previous match or the start of the file is within
 \fInumber\fP lines, or if the processing buffer size has been set too small. If
 file names and/or line numbers are being output, a hyphen separator is used
-instead of a colon for the context lines. A line containing "--" is output
-between each group of lines, unless they are in fact contiguous in the input
-file. The value of \fInumber\fP is expected to be relatively small. When
+instead of a colon for the context lines (the \fB-Z\fP option can be used to
+change the file name terminator to a zero byte). A line containing "--" is
+output between each group of lines, unless they are in fact contiguous in the
+input file. The value of \fInumber\fP is expected to be relatively small. When
 \fB-c\fP is used, \fB-B\fP is ignored.
 .TP
 \fB--binary-files=\fP\fIword\fP
@@ -201,7 +215,7 @@ exactly the same as the number of lines that would have been output, but if the
 \fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more
 suppressed lines than the count (that is, the number of matches).
 .sp
-If no lines are selected, the number zero is output. If several files are are
+If no lines are selected, the number zero is output. If several files are
 being scanned, a count is output for each of them and the \fB-t\fP option can
 be used to cause a total to be output at the end. However, if the
 \fB--files-with-matches\fP option is also used, only those files whose counts
@@ -215,12 +229,14 @@ equals sign.
 .TP
 \fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP
 This option specifies under what circumstances the parts of a line that matched
-a pattern should be coloured in the output. By default, the output is not
-coloured. The value (which is optional, see above) may be "never", "always", or
-"auto". In the latter case, colouring happens only if the standard output is
-connected to a terminal. More resources are used when colouring is enabled,
-because \fBpcre2grep\fP has to search for all possible matches in a line, not
-just one, in order to colour them all.
+a pattern should be coloured in the output. It is ignored if
+\fB--file-offsets\fP, \fB--line-offsets\fP, or \fB--output\fP is set. By
+default, output is not coloured. The value for the \fB--colour\fP option (which
+is optional, see above) may be "never", "always", or "auto". In the latter
+case, colouring happens only if the standard output is connected to a terminal.
+More resources are used when colouring is enabled, because \fBpcre2grep\fP has
+to search for all possible matches in a line, not just one, in order to colour
+them all.
 .sp
 The colour that is used can be specified by setting one of the environment
 variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
@@ -256,23 +272,24 @@ end-of-file; in others it may provoke an error.
 \fB--depth-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
 .TP
+\fB-E\fP, \fB--case-restrict\fP
+When case distinctions are being ignored in Unicode mode, two ASCII letters (K
+and S) will by default match Unicode characters U+212A (Kelvin sign) and U+017F
+(long S) respectively, as well as their lower case ASCII counterparts. When
+this option is set, case equivalences are restricted such that no ASCII
+character matches a non-ASCII character, and vice versa.
+.TP
 \fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
 Specify a pattern to be matched. This option can be used multiple times in
 order to specify several patterns. It can also be used as a way of specifying a
 single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
 pattern is taken from the command line; all arguments are treated as file
 names. There is no limit to the number of patterns. They are applied to each
-line in the order in which they are defined until one matches.
+line in the order in which they are defined.
 .sp
 If \fB-f\fP is used with \fB-e\fP, the command line patterns are matched first,
 followed by the patterns from the file(s), independent of the order in which
-these options are specified. Note that multiple use of \fB-e\fP is not the same
-as a single pattern with alternatives. For example, X|Y finds the first
-character in a line that is X or Y, whereas if the two patterns are given
-separately, with X first, \fBpcre2grep\fP finds X if it is present, even if it
-follows Y in the line. It finds Y only if there is no X in the line. This
-matters only if you are using \fB-o\fP or \fB--colo(u)r\fP to show the part(s)
-of the line that matched.
+these options are specified.
 .TP
 \fB--exclude\fP=\fIpattern\fP
 Files (but not directories) whose names match the pattern are skipped without
@@ -316,22 +333,19 @@ files; it does not apply to patterns specified by any of the \fB--include\fP or
 \fB--exclude\fP options.
 .TP
 \fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
-Read patterns from the file, one per line, and match them against each line of
-input. As is the case with patterns on the command line, no delimiters should
-be used. What constitutes a newline when reading the file is the operating
-system's default interpretation of \en. The \fB--newline\fP option has no
-effect on this option. Trailing white space is removed from each line, and
-blank lines are ignored. An empty file contains no patterns and therefore
-matches nothing. Patterns read from a file in this way may contain binary
-zeros, which are treated as ordinary data characters. See also the comments
-about multiple patterns versus a single pattern with alternatives in the
-description of \fB-e\fP above.
+Read patterns from the file, one per line. As is the case with patterns on the
+command line, no delimiters should be used. What constitutes a newline when
+reading the file is the operating system's default interpretation of \en. The
+\fB--newline\fP option has no effect on this option. Trailing white space is
+removed from each line, and blank lines are ignored. An empty file contains no
+patterns and therefore matches nothing. Patterns read from a file in this way
+may contain binary zeros, which are treated as ordinary data characters.
 .sp
 If this option is given more than once, all the specified files are read. A
 data line is output if any of the patterns match it. A file name can be given
 as "-" to refer to the standard input. When \fB-f\fP is used, patterns
 specified on the command line using \fB-e\fP may also be present; they are
-tested before the file's patterns. However, no other pattern is taken from the
+matched before the file's patterns. However, no pattern is taken from the
 command line; all arguments are treated as the names of paths to be searched.
 .TP
 \fB--file-list\fP=\fIfilename\fP
@@ -349,26 +363,32 @@ specified files are read.
 \fB--file-offsets\fP
 Instead of showing lines or parts of lines that match, show each match as an
 offset from the start of the file and a length, separated by a comma. In this
-mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
-options are ignored. If there is more than one match in a line, each of them is
-shown separately. This option is mutually exclusive with \fB--output\fP,
-\fB--line-offsets\fP, and \fB--only-matching\fP.
+mode, \fB--colour\fP has no effect, and no context is shown. That is, the
+\fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is more than one
+match in a line, each of them is shown separately. This option is mutually
+exclusive with \fB--output\fP, \fB--line-offsets\fP, and \fB--only-matching\fP.
+.TP
+\fB--group-separator\fP=\fItext\fP
+Output this text string instead of two hyphens between groups of lines when
+\fB-A\fP, \fB-B\fP, or \fB-C\fP is in use. See also \fB--no-group-separator\fP.
 .TP
 \fB-H\fP, \fB--with-filename\fP
 Force the inclusion of the file name at the start of output lines when
-searching a single file. By default, the file name is not shown in this case.
-For matching lines, the file name is followed by a colon; for context lines, a
-hyphen separator is used. If a line number is also being output, it follows the
-file name. When the \fB-M\fP option causes a pattern to match more than one
-line, only the first is preceded by the file name. This option overrides any
-previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
+searching a single file. The file name is not normally shown in this case.
+By default, for matching lines, the file name is followed by a colon; for
+context lines, a hyphen separator is used. The \fB-Z\fP option can be used to
+change the terminator to a zero byte. If a line number is also being output,
+it follows the file name. When the \fB-M\fP option causes a pattern to match
+more than one line, only the first is preceded by the file name. This option
+overrides any previous \fB-h\fP, \fB-l\fP, or \fB-L\fP options.
 .TP
 \fB-h\fP, \fB--no-filename\fP
-Suppress the output file names when searching multiple files. By default,
-file names are shown when multiple files are searched. For matching lines, the
-file name is followed by a colon; for context lines, a hyphen separator is used.
-If a line number is also being output, it follows the file name. This option
-overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
+Suppress the output file names when searching multiple files. File names are
+normally shown when multiple files are searched. By default, for matching
+lines, the file name is followed by a colon; for context lines, a hyphen
+separator is used. The \fB-Z\fP option can be used to change the terminator to
+a zero byte. If a line number is also being output, it follows the file name.
+This option overrides any previous \fB-H\fP, \fB-L\fP, or \fB-l\fP options.
 .TP
 \fB--heap-limit\fP=\fInumber\fP
 See \fB--match-limit\fP below.
@@ -383,7 +403,9 @@ Ignore binary files. This is equivalent to
 \fB--binary-files\fP=\fIwithout-match\fP.
 .TP
 \fB-i\fP, \fB--ignore-case\fP
-Ignore upper/lower case distinctions during comparisons.
+Ignore upper/lower case distinctions when pattern matching. This applies when
+matching path names for inclusion or exclusion as well as when matching lines
+in files.
 .TP
 \fB--include\fP=\fIpattern\fP
 If any \fB--include\fP patterns are specified, the only files that are
@@ -417,17 +439,19 @@ given any number of times. If a directory matches both \fB--include-dir\fP and
 \fB-L\fP, \fB--files-without-match\fP
 Instead of outputting lines from the files, just output the names of the files
 that do not contain any lines that would have been output. Each file name is
-output once, on a separate line. This option overrides any previous \fB-H\fP,
-\fB-h\fP, or \fB-l\fP options.
+output once, on a separate line by default, but if the \fB-Z\fP option is set,
+they are separated by zero bytes instead of newlines. This option overrides any
+previous \fB-H\fP, \fB-h\fP, or \fB-l\fP options.
 .TP
 \fB-l\fP, \fB--files-with-matches\fP
 Instead of outputting lines from the files, just output the names of the files
 containing lines that would have been output. Each file name is output once, on
-a separate line. Searching normally stops as soon as a matching line is found
-in a file. However, if the \fB-c\fP (count) option is also used, matching
-continues in order to obtain the correct count, and those files that have at
-least one match are listed along with their counts. Using this option with
-\fB-c\fP is a way of suppressing the listing of files with no matches that
+a separate line, but if the \fB-Z\fP option is set, they are separated by zero
+bytes instead of newlines. Searching normally stops as soon as a matching line
+is found in a file. However, if the \fB-c\fP (count) option is also used,
+matching continues in order to obtain the correct count, and those files that
+have at least one match are listed along with their counts. Using this option
+with \fB-c\fP is a way of suppressing the listing of files with no matches that
 occurs with \fB-c\fP on its own. This option overrides any previous \fB-H\fP,
 \fB-h\fP, or \fB-L\fP options.
 .TP
@@ -452,11 +476,11 @@ ceases to work. When input is from a compressed .gz or .bz2 file,
 Instead of showing lines or parts of lines that match, show each match as a
 line number, the offset from the start of the line, and a length. The line
 number is terminated by a colon (as usual; see the \fB-n\fP option), and the
-offset and length are separated by a comma. In this mode, no context is shown.
-That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
-more than one match in a line, each of them is shown separately. This option is
-mutually exclusive with \fB--output\fP, \fB--file-offsets\fP, and
-\fB--only-matching\fP.
+offset and length are separated by a comma. In this mode, \fB--colour\fP has no
+effect, and no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
+options are ignored. If there is more than one match in a line, each of them is
+shown separately. This option is mutually exclusive with \fB--output\fP,
+\fB--file-offsets\fP, and \fB--only-matching\fP.
 .TP
 \fB--locale\fP=\fIlocale-name\fP
 This option specifies a locale to be used for pattern matching. It overrides
@@ -466,16 +490,22 @@ used. There is no short form for this option.
 .TP
 \fB-M\fP, \fB--multiline\fP
 Allow patterns to match more than one line. When this option is set, the PCRE2
-library is called in "multiline" mode. This allows a matched string to extend
-past the end of a line and continue on one or more subsequent lines. Patterns
-used with \fB-M\fP may usefully contain literal newline characters and internal
-occurrences of ^ and $ characters. The output for a successful match may
-consist of more than one line. The first line is the line in which the match
-started, and the last line is the line in which the match ended. If the matched
-string ends with a newline sequence, the output ends at the end of that line.
-If \fB-v\fP is set, none of the lines in a multi-line match are output. Once a
-match has been handled, scanning restarts at the beginning of the line after
-the one in which the match ended.
+library is called in "multiline" mode, and a match is allowed to continue past
+the end of the initial line and onto one or more subsequent lines.
+.sp
+Patterns used with \fB-M\fP may usefully contain literal newline characters and
+internal occurrences of ^ and $ characters, because in multiline mode these can
+match at internal newlines. Because \fBpcre2grep\fP is scanning multiple lines,
+the \eZ and \ez assertions match only at the end of the last line in the file.
+The \eA assertion matches at the start of the first line of a match. This can
+be any line in the file; it is not anchored to the first line.
+.sp
+The output for a successful match may consist of more than one line. The first
+line is the line in which the match started, and the last line is the line in
+which the match ended. If the matched string ends with a newline sequence, the
+output ends at the end of that line. If \fB-v\fP is set, none of the lines in a
+multi-line match are output. Once a match has been handled, scanning restarts
+at the beginning of the line after the one in which the match ended.
 .sp
 The newline sequence that separates multiple lines must be matched as part of
 the pattern. For example, to find the phrase "regular expression" in a file
@@ -490,8 +520,10 @@ well as possibly handling a two-character newline sequence.
 .sp
 There is a limit to the number of lines that can be matched, imposed by the way
 that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
-large processing buffer, this should not be a problem, but the \fB-M\fP option
-does not work when input is read line by line (see \fB--line-buffered\fP.)
+large processing buffer, this should not be a problem.
+.sp
+The \fB-M\fP option does not work when input is read line by line (see
+\fB--line-buffered\fP.)
 .TP
 \fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
 Stop processing after finding \fInumber\fP matching lines, or non-matching
@@ -516,10 +548,7 @@ counter that is incremented each time around its main processing loop. If the
 value set by \fB--match-limit\fP is reached, an error occurs.
 .sp
 The \fB--heap-limit\fP option specifies, as a number of kibibytes (units of
-1024 bytes), the amount of heap memory that may be used for matching. Heap
-memory is needed only if matching the pattern requires a significant number of
-nested backtracking points to be remembered. This parameter can be set to zero
-to forbid the use of heap memory altogether.
+1024 bytes), the maximum amount of heap memory that may be used for matching.
 .sp
 The \fB--depth-limit\fP option limits the depth of nested backtracking points,
 which indirectly limits the amount of memory that is used. The amount of memory
@@ -572,22 +601,27 @@ being output, it precedes the line number. When the \fB-M\fP option causes a
 pattern to match more than one line, only the first is preceded by its line
 number. This option is forced if \fB--line-offsets\fP is used.
 .TP
+\fB--no-group-separator\fP
+Do not output a separator between groups of lines when \fB-A\fP, \fB-B\fP, or
+\fB-C\fP is in use. The default is to output a line containing two hyphens. See
+also \fB--group-separator\fP.
+.TP
 \fB--no-jit\fP
 If the PCRE2 library is built with support for just-in-time compiling (which
 speeds up matching), \fBpcre2grep\fP automatically makes use of this, unless it
 was explicitly disabled at build time. This option can be used to disable the
-use of JIT at run time. It is provided for testing and working round problems.
+use of JIT at run time. It is provided for testing and working around problems.
 It should never be needed in normal use.
 .TP
 \fB-O\fP \fItext\fP, \fB--output\fP=\fItext\fP
 When there is a match, instead of outputting the line that matched, output just
 the text specified in this option, followed by an operating-system standard
-newline. In this mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP,
-and \fB-C\fP options are ignored. The \fB--newline\fP option has no effect on
-this option, which is mutually exclusive with \fB--only-matching\fP,
-\fB--file-offsets\fP, and \fB--line-offsets\fP. However, like
-\fB--only-matching\fP, if there is more than one match in a line, each of them
-causes a line of output.
+newline. In this mode, \fB--colour\fP has no effect, and no context is shown.
+That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. The
+\fB--newline\fP option has no effect on this option, which is mutually
+exclusive with \fB--only-matching\fP, \fB--file-offsets\fP, and
+\fB--line-offsets\fP. However, like \fB--only-matching\fP, if there is more
+than one match in a line, each of them causes a line of output.
 .sp
 Escape sequences starting with a dollar character may be used to insert the
 contents of the matched part of the line and/or captured substrings into the
@@ -656,6 +690,17 @@ default is 50.
 Specify a separating string for multiple occurrences of \fB-o\fP. The default
 is an empty string. Separating strings are never coloured.
 .TP
+\fB-P\fP, \fB--no-ucp\fP
+Starting from release 10.43, when UTF/Unicode mode is specified with \fB-u\fP
+or \fB-U\fP, the PCRE2_UCP option is used by default. This means that the
+POSIX classes in patterns match more than just ASCII characters. For example,
+[:digit:] matches any Unicode decimal digit. The \fB--no-ucp\fP option
+suppresses PCRE2_UCP, thus restricting the POSIX classes to ASCII characters,
+as was the case in earlier releases. Note that there are now more fine-grained
+option settings within patterns that affect individual classes. For example,
+when in UCP mode, the sequence (?aP) restricts [:word:] to ASCII letters, while
+allowing \ew to match Unicode letters and digits.
+.TP
 \fB-q\fP, \fB--quiet\fP
 Work quietly, that is, display nothing except error messages. The exit
 status indicates whether or not any matches were found.
@@ -688,11 +733,11 @@ ignored when used with \fB-L\fP (list files without matches), because the grand
 total would always be zero.
 .TP
 \fB-u\fP, \fB--utf\fP
-Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
-with UTF-8 support. All patterns (including those for any \fB--exclude\fP and
-\fB--include\fP options) and all lines that are scanned must be valid strings
-of UTF-8 characters. If an invalid UTF-8 string is encountered, an error
-occurs.
+Operate in UTF/Unicode mode. This option is available only if PCRE2 has been
+compiled with UTF-8 support. All patterns (including those for any
+\fB--exclude\fP and \fB--include\fP options) and all lines that are scanned
+must be valid strings of UTF-8 characters. If an invalid UTF-8 string is
+encountered, an error occurs.
 .TP
 \fB-U\fP, \fB--utf-allow-invalid\fP
 As \fB--utf\fP, but in addition subject lines may contain invalid UTF-8 code
@@ -732,6 +777,12 @@ be more than one line. This is equivalent to having "^(?:" at the start of each
 pattern and ")$" at the end. This option applies only to the patterns that are
 matched against the contents of files; it does not apply to patterns specified
 by any of the \fB--include\fP or \fB--exclude\fP options.
+.TP
+\fB-Z\fP, \fB--null\fP
+Terminate files names in the regular output with a zero byte (the NUL
+character) instead of what would normally appear. This is useful when file
+names contain unusual characters such as colons, hyphens, or even newlines. The
+option does not apply to file names in error messages.
 .
 .
 .SH "ENVIRONMENT VARIABLES"
@@ -768,25 +819,27 @@ standard output must end with "\er\en". For all other operating systems, and
 for all messages to the standard error stream, "\en" is used.
 .
 .
-.SH "OPTIONS COMPATIBILITY"
+.SH "OPTIONS COMPATIBILITY WITH GNU GREP"
 .rs
 .sp
-Many of the short and long forms of \fBpcre2grep\fP's options are the same
-as in the GNU \fBgrep\fP program. Any long option of the form
-\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
-(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
-\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
-\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
-\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
-\fB--output\fP, \fB-u\fP, \fB--utf\fP, \fB-U\fP, and \fB--utf-allow-invalid\fP
-options are specific to \fBpcre2grep\fP, as is the use of the
-\fB--only-matching\fP option with a capturing parentheses number.
+Many of the short and long forms of \fBpcre2grep\fP's options are the same as
+in the GNU \fBgrep\fP program. Any long option of the form \fB--xxx-regexp\fP
+(GNU terminology) is also available as \fB--xxx-regex\fP (PCRE2 terminology).
+However, the \fB--case-restrict\fP, \fB--depth-limit\fP, \fB-E\fP,
+\fB--file-list\fP, \fB--file-offsets\fP, \fB--heap-limit\fP,
+\fB--include-dir\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP,
+\fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--no-ucp\fP,
+\fB--om-separator\fP, \fB--output\fP, \fB-P\fP, \fB-u\fP, \fB--utf\fP,
+\fB-U\fP, and \fB--utf-allow-invalid\fP options are specific to
+\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
+capturing parentheses number.
 .P
 Although most of the common options work the same way, a few are different in
 \fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
-for GNU \fBgrep\fP, but a regular expression for \fBpcre2grep\fP. If both the
-\fB-c\fP and \fB-l\fP options are given, GNU grep lists only file names,
-without counts, but \fBpcre2grep\fP gives the counts as well.
+for GNU \fBgrep\fP, but in \fBpcre2grep\fP it is a regular expression to which
+the \fB-i\fP option applies. If both the \fB-c\fP and \fB-l\fP options are
+given, GNU grep lists only file names, without counts, but \fBpcre2grep\fP
+gives the counts as well.
 .
 .
 .SH "OPTIONS WITH DATA"
@@ -960,6 +1013,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 31 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 22 December 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

File diff suppressed because it is too large
+ 362 - 305
regex.mod/pcre/doc/pcre2grep.txt


+ 76 - 45
regex.mod/pcre/doc/pcre2jit.3

@@ -1,4 +1,4 @@
-.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34"
+.TH PCRE2JIT 3 "23 January 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
@@ -16,7 +16,7 @@ one-off matches. JIT support is available for all of the 8-bit, 16-bit and
 .P
 JIT support applies only to the traditional Perl-compatible matching function.
 It does not apply when the DFA matching function is being used. The code for
-this support was written by Zoltan Herczeg.
+JIT support was written by Zoltan Herczeg.
 .
 .
 .SH "AVAILABILITY OF JIT SUPPORT"
@@ -31,18 +31,37 @@ platforms:
   ARM 64-bit
   IBM s390x 64 bit
   Intel x86 32-bit and 64-bit
+  LoongArch 64 bit
   MIPS 32-bit and 64-bit
   Power PC 32-bit and 64-bit
-  SPARC 32-bit
+  RISC-V 32-bit and 64-bit
 .sp
 If --enable-jit is set on an unsupported platform, compilation fails.
 .P
-A program can tell if JIT support is available by calling \fBpcre2_config()\fP
-with the PCRE2_CONFIG_JIT option. The result is 1 when JIT is available, and 0
-otherwise. However, a simple program does not need to check this in order to
-use JIT. The API is implemented in a way that falls back to the interpretive
-code if JIT is not available. For programs that need the best possible
-performance, there is also a "fast path" API that is JIT-specific.
+A client program can tell if JIT support is available by calling
+\fBpcre2_config()\fP with the PCRE2_CONFIG_JIT option. The result is one if
+PCRE2 was built with JIT support, and zero otherwise. However, having the JIT
+code available does not guarantee that it will be used for any particular
+match. One reason for this is that there are a number of options and pattern
+items that are
+.\" HTML <a href="#unsupported">
+.\" </a>
+not supported by JIT
+.\"
+(see below). Another reason is that in some environments JIT is unable to get
+memory in which to build its compiled code. The only guarantee from
+\fBpcre2_config()\fP is that if it returns zero, JIT will definitely \fInot\fP
+be used.
+.P
+A simple program does not need to check availability in order to use JIT when
+possible. The API is implemented in a way that falls back to the interpretive
+code if JIT is not available or cannot be used for a given match. For programs
+that need the best possible performance, there is a
+.\" HTML <a href="#fastpath">
+.\" </a>
+"fast path"
+.\"
+API that is JIT-specific.
 .
 .
 .SH "SIMPLE USE OF JIT"
@@ -99,9 +118,13 @@ below.
 .P
 There are some \fBpcre2_match()\fP options that are not supported by JIT, and
 there are also some pattern items that JIT cannot handle. Details are given
-below. In both cases, matching automatically falls back to the interpretive
-code. If you want to know whether JIT was actually used for a particular match,
-you should arrange for a JIT callback function to be set up as described in the
+.\" HTML <a href="#unsupported">
+.\" </a>
+below.
+.\"
+In both cases, matching automatically falls back to the interpretive code. If
+you want to know whether JIT was actually used for a particular match, you
+should arrange for a JIT callback function to be set up as described in the
 section entitled
 .\" HTML <a href="#stackcontrol">
 .\" </a>
@@ -113,12 +136,14 @@ match-time options are not right for JIT execution, the callback function is
 not obeyed.
 .P
 If the JIT compiler finds an unsupported item, no JIT data is generated. You
-can find out if JIT matching is available after compiling a pattern by calling
-\fBpcre2_pattern_info()\fP with the PCRE2_INFO_JITSIZE option. A non-zero
-result means that JIT compilation was successful. A result of 0 means that JIT
-support is not available, or the pattern was not processed by
+can find out if JIT compilation was successful for a compiled pattern by
+calling \fBpcre2_pattern_info()\fP with the PCRE2_INFO_JITSIZE option. A
+non-zero result means that JIT compilation was successful. A result of 0 means
+that JIT support is not available, or the pattern was not processed by
 \fBpcre2_jit_compile()\fP, or the JIT compiler was not able to handle the
-pattern.
+pattern. Successful JIT compilation does not, however, guarantee the use of JIT
+at match time because there are some match time options that are not supported
+by JIT.
 .
 .
 .SH "MATCHING SUBJECTS CONTAINING INVALID UTF"
@@ -130,14 +155,15 @@ checked at the start of matching and an error is generated if invalid UTF is
 detected. The PCRE2_NO_UTF_CHECK option can be passed to \fBpcre2_match()\fP to
 skip the check (for improved performance) if you are sure that a subject string
 is valid. If this option is used with an invalid string, the result is
-undefined.
+undefined. The calling program may crash or loop or otherwise misbehave.
 .P
 However, a way of running matches on strings that may contain invalid UTF
 sequences is available. Calling \fBpcre2_compile()\fP with the
 PCRE2_MATCH_INVALID_UTF option has two effects: it tells the interpreter in
 \fBpcre2_match()\fP to support invalid UTF, and, if \fBpcre2_jit_compile()\fP
-is called, the compiled JIT code also supports invalid UTF. Details of how this
-support works, in both the JIT and the interpretive cases, is given in the
+is subsequently called, the compiled JIT code also supports invalid UTF.
+Details of how this support works, in both the JIT and the interpretive cases,
+is given in the
 .\" HREF
 \fBpcre2unicode\fP
 .\"
@@ -149,6 +175,7 @@ It is superseded by the \fBpcre2_compile()\fP option PCRE2_MATCH_INVALID_UTF
 and should no longer be used. It may be removed in future.
 .
 .
+.\" HTML <a name="unsupported"></a>
 .SH "UNSUPPORTED OPTIONS AND PATTERN ITEMS"
 .rs
 .sp
@@ -169,10 +196,10 @@ in a conditional group.
 .SH "RETURN VALUES FROM JIT MATCHING"
 .rs
 .sp
-When a pattern is matched using JIT matching, the return values are the same
-as those given by the interpretive \fBpcre2_match()\fP code, with the addition
-of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory
-used for the JIT stack was insufficient. See
+When a pattern is matched using JIT, the return values are the same as those
+given by the interpretive \fBpcre2_match()\fP code, with the addition of one
+new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means that the memory used for
+the JIT stack was insufficient. See
 .\" HTML <a href="#stackcontrol">
 .\" </a>
 "Controlling the JIT stack"
@@ -251,11 +278,11 @@ non-sequential matches in one thread is to use callouts: if a callout function
 starts another match, that match must use a different JIT stack to the one used
 for currently suspended match(es).
 .P
-In a multithread application, if you do not
-specify a JIT stack, or if you assign or pass back NULL from a callback, that
-is thread-safe, because each thread has its own machine stack. However, if you
-assign or pass back a non-NULL JIT stack, this must be a different stack for
-each thread so that the application is thread-safe.
+In a multithread application, if you do not specify a JIT stack, or if you
+assign or pass back NULL from a callback, that is thread-safe, because each
+thread has its own machine stack. However, if you assign or pass back a
+non-NULL JIT stack, this must be a different stack for each thread so that the
+application is thread-safe.
 .P
 Strictly speaking, even more is allowed. You can assign the same non-NULL stack
 to a match context that is used by any number of patterns, as long as they are
@@ -335,7 +362,7 @@ list of patterns.
 pattern causes stack overflow with a stack of 1MiB? Is that 1MiB kept until the
 stack is freed?
 .sp
-Especially on embedded sytems, it might be a good idea to release memory
+Especially on embedded systems, it might be a good idea to release memory
 sometimes without freeing the stack. There is no API for this at the moment.
 Probably a function call which returns with the currently allocated memory for
 any stack and another which allows releasing memory (shrinking the stack) would
@@ -355,8 +382,8 @@ out this complicated API.
 .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
 .fi
 .P
-The JIT executable allocator does not free all memory when it is possible.
-It expects new allocations, and keeps some free memory around to improve
+The JIT executable allocator does not free all memory when it is possible. It
+expects new allocations, and keeps some free memory around to improve
 allocation speed. However, in low memory conditions, it might be better to free
 all possible memory. You can cause this to happen by calling
 pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
@@ -393,6 +420,7 @@ calls.
 .sp
 .
 .
+.\" HTML <a name="fastpath"></a>
 .SH "JIT FAST PATH API"
 .rs
 .sp
@@ -408,18 +436,21 @@ processed by \fBpcre2_jit_compile()\fP).
 The fast path function is called \fBpcre2_jit_match()\fP, and it takes exactly
 the same arguments as \fBpcre2_match()\fP. However, the subject string must be
 specified with a length; PCRE2_ZERO_TERMINATED is not supported. Unsupported
-option bits (for example, PCRE2_ANCHORED, PCRE2_ENDANCHORED and
-PCRE2_COPY_MATCHED_SUBJECT) are ignored, as is the PCRE2_NO_JIT option. The
-return values are also the same as for \fBpcre2_match()\fP, plus
-PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is requested
-that was not compiled.
+option bits (for example, PCRE2_ANCHORED and PCRE2_ENDANCHORED) are ignored, as
+is the PCRE2_NO_JIT option. The return values are also the same as for
+\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
+or complete) is requested that was not compiled.
 .P
 When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
 number of other sanity checks are performed on the arguments. For example, if
-the subject pointer is NULL, an immediate error is given. Also, unless
-PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the
-interests of speed, these checks do not happen on the JIT fast path, and if
-invalid data is passed, the result is undefined.
+the subject pointer is NULL but the length is non-zero, an immediate error is
+given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested
+for validity. In the interests of speed, these checks do not happen on the JIT
+fast path. If invalid UTF data is passed when PCRE2_MATCH_INVALID_UTF was not
+set for \fBpcre2_compile()\fP, the result is undefined. The program may crash
+or loop or give wrong results. In the absence of PCRE2_MATCH_INVALID_UTF you
+should call \fBpcre2_jit_match()\fP in UTF mode only if you are sure the
+subject is valid.
 .P
 Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give
 speedups of more than 10%.
@@ -428,7 +459,7 @@ speedups of more than 10%.
 .SH "SEE ALSO"
 .rs
 .sp
-\fBpcre2api\fP(3)
+\fBpcre2api\fP(3), \fBpcre2unicode\fP(3)
 .
 .
 .SH AUTHOR
@@ -436,7 +467,7 @@ speedups of more than 10%.
 .sp
 .nf
 Philip Hazel (FAQ by Zoltan Herczeg)
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -445,6 +476,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 May 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 23 January 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 14 - 5
regex.mod/pcre/doc/pcre2limits.3

@@ -1,4 +1,4 @@
-.TH PCRE2LIMITS 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2LIMITS 3 "1 August 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SIZE AND OTHER LIMITATIONS"
@@ -32,7 +32,12 @@ and unset offsets.
 .P
 All values in repeating quantifiers must be less than 65536.
 .P
-The maximum length of a lookbehind assertion is 65535 characters.
+There are two different limits that apply to branches of lookbehind assertions.
+If every branch in such an assertion matches a fixed number of characters,
+the maximum length of any branch is 65535 characters. If any branch matches a
+variable number of characters, then the maximum matching length for every
+branch is limited. The default limit is set at compile time, defaulting to 255,
+but can be changed by the calling program.
 .P
 There is no limit to the number of parenthesized groups, but there can be no
 more than 65535 capture groups, and there is a limit to the depth of nesting of
@@ -51,6 +56,10 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
 .P
 The maximum length of a string argument to a callout is the largest number a
 32-bit unsigned integer can hold.
+.P
+The maximum amount of heap memory used for matching is controlled by the heap
+limit, which can be set in a pattern or in a match context. The default is a
+very large number, effectively unlimited.
 .
 .
 .SH AUTHOR
@@ -58,7 +67,7 @@ The maximum length of a string argument to a callout is the largest number a
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -67,6 +76,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 02 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: August 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 4 - 4
regex.mod/pcre/doc/pcre2matching.3

@@ -1,4 +1,4 @@
-.TH PCRE2MATCHING 3 "28 August 2021" "PCRE2 10.38"
+.TH PCRE2MATCHING 3 "19 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 MATCHING ALGORITHMS"
@@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 This document describes the two different algorithms that are available in
 PCRE2 for matching a compiled regular expression against a given subject
 string. The "standard" algorithm is the one provided by the \fBpcre2_match()\fP
-function. This works in the same as as Perl's matching function, and provide a
+function. This works in the same as Perl's matching function, and provide a
 Perl-compatible matching operation. The just-in-time (JIT) optimization that is
 described in the
 .\" HREF
@@ -217,6 +217,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 28 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 19 January 2024
+Copyright (c) 1997-2024 University of Cambridge.
 .fi

+ 1 - 1
regex.mod/pcre/doc/pcre2partial.3

@@ -359,7 +359,7 @@ can then try a new match starting at offset \fIn+1\fP in the first buffer.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .

File diff suppressed because it is too large
+ 329 - 345
regex.mod/pcre/doc/pcre2pattern.3


+ 26 - 10
regex.mod/pcre/doc/pcre2perform.3

@@ -1,4 +1,4 @@
-.TH PCRE2PERFORM 3 "03 February 2019" "PCRE2 10.33"
+.TH PCRE2PERFORM 3 "27 July 2022" "PCRE2 10.41"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 PERFORMANCE"
@@ -69,12 +69,28 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
 uses very little system stack at run time. In earlier releases recursive
 function calls could use a great deal of stack, and this could cause problems,
 but this usage has been eliminated. Backtracking positions are now explicitly
-remembered in memory frames controlled by the code. An initial 20KiB vector of
-frames is allocated on the system stack (enough for about 100 frames for small
-patterns), but if this is insufficient, heap memory is used. The amount of heap
-memory can be limited; if the limit is set to zero, only the initial stack
-vector is used. Rewriting patterns to be time-efficient, as described below,
-may also reduce the memory requirements.
+remembered in memory frames controlled by the code.
+.P
+The size of each frame depends on the size of pointer variables and the number
+of capturing parenthesized groups in the pattern being matched. On a 64-bit
+system the frame size for a pattern with no captures is 128 bytes. For each
+capturing group the size increases by 16 bytes.
+.P
+Until release 10.41, an initial 20KiB frames vector was allocated on the system
+stack, but this still caused some issues for multi-thread applications where
+each thread has a very small stack. From release 10.41 backtracking memory
+frames are always held in heap memory. An initial heap allocation is obtained
+the first time any match data block is passed to \fBpcre2_match()\fP. This is
+remembered with the match data block and re-used if that block is used for
+another match. It is freed when the match data block itself is freed.
+.P
+The size of the initial block is the larger of 20KiB or ten times the pattern's
+frame size, unless the heap limit is less than this, in which case the heap
+limit is used. If the initial block proves to be too small during matching, it
+is replaced by a larger block, subject to the heap limit. The heap limit is
+checked only when a new block is to be allocated. Reducing the heap limit
+between calls to \fBpcre2_match()\fP with the same match data block does not
+affect the saved block.
 .P
 In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
 function calls, but only for processing atomic groups, lookaround assertions,
@@ -230,7 +246,7 @@ pattern to match. This is done by repeatedly matching with different limits.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -239,6 +255,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 03 February 2019
-Copyright (c) 1997-2019 University of Cambridge.
+Last updated: 27 July 2022
+Copyright (c) 1997-2022 University of Cambridge.
 .fi

+ 31 - 12
regex.mod/pcre/doc/pcre2posix.3

@@ -1,4 +1,4 @@
-.TH PCRE2POSIX 3 "26 April 2021" "PCRE2 10.37"
+.TH PCRE2POSIX 3 "19 January 2024" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "SYNOPSIS"
@@ -31,9 +31,13 @@ and 32-bit libraries. See the
 documentation for a description of PCRE2's native API, which contains much
 additional functionality.
 .P
-The functions described here are wrapper functions that ultimately call the
-PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
-file, and they all have unique names starting with \fBpcre2_\fP. However, the
+\fBIMPORTANT NOTE\fP: The functions described here are NOT thread-safe, and
+should not be used in multi-threaded applications. They are also limited to
+processing subjects that are not bigger than 2GB. Use the native API instead.
+.P
+These functions are wrapper functions that ultimately call the PCRE2 native
+API. Their prototypes are defined in the \fBpcre2posix.h\fP header file, and
+they all have unique names starting with \fBpcre2_\fP. However, the
 \fBpcre2posix.h\fP header also contains macro definitions that convert the
 standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
 means that a program can use the usual POSIX names without running the risk of
@@ -44,7 +48,12 @@ can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
 application. Because the POSIX functions call the native ones, it is also
 necessary to add \fB-lpcre2-8\fP.
 .P
-Although they were not defined as protypes in \fBpcre2posix.h\fP, releases
+On Windows systems, if you are linking to a DLL version of the library, it is
+recommended that \fBPCRE2POSIX_SHARED\fP is defined before including the
+\fBpcre2posix.h\fP header, as it will allow for a more efficient way to
+invoke the functions by adding the \fB__declspec(dllimport)\fP decorator.
+.P
+Although they were not defined as prototypes in \fBpcre2posix.h\fP, releases
 10.33 to 10.36 of the library contained functions with the POSIX names
 \fBregcomp()\fP etc. These simply passed their arguments to the PCRE2
 functions. These functions were provided for backwards compatibility with
@@ -64,6 +73,10 @@ captured substrings. It also defines some constants whose names start with
 .SH "USING THE POSIX FUNCTIONS"
 .rs
 .sp
+Note that these functions are just POSIX-style wrappers for PCRE2's native API.
+They do not give POSIX regular expression behaviour, and they are not
+thread-safe or even POSIX compatible.
+.P
 Those POSIX option bits that can reasonably be mapped to PCRE2 native options
 have been implemented. In addition, the option REG_EXTENDED is defined with the
 value zero. This has no effect, but since programs that are written to the
@@ -93,8 +106,10 @@ The function \fBpcre2_regcomp()\fP is called to compile a pattern into an
 internal form. By default, the pattern is a C string terminated by a binary
 zero (but see REG_PEND below). The \fIpreg\fP argument is a pointer to a
 \fBregex_t\fP structure that is used as a base for storing information about
-the compiled regular expression. (It is also used for input when REG_PEND is
-set.)
+the compiled regular expression. It is also used for input when REG_PEND is
+set. The \fBregex_t\fP structure used by \fBpcre2_regcomp()\fP is defined in
+\fBpcre2posix.h\fP and is not the same as the structure used by other libraries
+that provide POSIX-style matching.
 .P
 The argument \fIcflags\fP is either zero, or contains one or more of the bits
 defined by the following macros:
@@ -146,7 +161,7 @@ caution in software intended to be portable to other systems.
 .sp
 The PCRE2_UCP option is set when the regular expression is passed for
 compilation to the native function. This causes PCRE2 to use Unicode properties
-when matchine \ed, \ew, etc., instead of just recognizing ASCII values. Note
+when matching \ed, \ew, etc., instead of just recognizing ASCII values. Note
 that REG_UCP is not part of the POSIX standard.
 .sp
   REG_UNGREEDY
@@ -163,7 +178,7 @@ strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF
 is not part of the POSIX standard.
 .P
 In the absence of these flags, no options are passed to the native function.
-This means the the regex is compiled with PCRE2 default semantics. In
+This means that the regex is compiled with PCRE2 default semantics. In
 particular, the way it handles newline characters in the subject string is the
 Perl way, not the POSIX way. Note that setting PCRE2_MULTILINE has only
 \fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way
@@ -284,6 +299,10 @@ entire portion of \fIstring\fP that was matched; subsequent elements relate to
 the capturing subpatterns of the regular expression. Unused entries in the
 array have both structure members set to -1.
 .P
+\fIregmatch_t\fP as well as the \fIregoff_t\fP typedef it uses are defined in
+\fBpcre2posix.h\fP and are not warranted to have the same size or layout as other
+similarly named types from other libraries that provide POSIX-style matching.
+.P
 A successful match yields a zero return; various error codes are defined in the
 header file, of which REG_NOMATCH is the "expected" failure code.
 .
@@ -315,7 +334,7 @@ expression.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -324,6 +343,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 26 April 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 19 January 2024
+Copyright (c) 1997-2024 University of Cambridge.
 .fi

+ 1 - 1
regex.mod/pcre/doc/pcre2sample.3

@@ -85,7 +85,7 @@ need to add
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .

+ 7 - 8
regex.mod/pcre/doc/pcre2serialize.3

@@ -6,11 +6,11 @@ PCRE2 - Perl-compatible regular expressions (revised API)
 .sp
 .nf
 .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP,"
+.B "  int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP,"
 .B "  pcre2_general_context *\fIgcontext\fP);"
 .sp
-.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP,
-.B "  int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP,"
+.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP,
+.B "  int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP,"
 .B "  PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);"
 .sp
 .B void pcre2_serialize_free(uint8_t *\fIbytes\fP);
@@ -75,13 +75,13 @@ being a pointer to a vector of pointers to compiled patterns, and the length of
 the vector. The third and fourth arguments point to variables which are set to
 point to the created byte stream and its length, respectively. The final
 argument is a pointer to a general context, which can be used to specify custom
-memory mangagement functions. If this argument is NULL, \fBmalloc()\fP is used
+memory management functions. If this argument is NULL, \fBmalloc()\fP is used
 to obtain memory for the byte stream. The yield of the function is the number
 of serialized patterns, or one of the following negative error codes:
 .sp
   PCRE2_ERROR_BADDATA      the number of patterns is zero or less
   PCRE2_ERROR_BADMAGIC     mismatch of id bytes in one of the patterns
-  PCRE2_ERROR_MEMORY       memory allocation failed
+  PCRE2_ERROR_NOMEMORY     memory allocation failed
   PCRE2_ERROR_MIXEDTABLES  the patterns do not all use the same tables
   PCRE2_ERROR_NULL         the 1st, 3rd, or 4th argument is NULL
 .sp
@@ -137,11 +137,10 @@ the compiled patterns in new memory blocks, setting pointers to them in a
 vector. The first two arguments are a pointer to a suitable vector and its
 length, and the third argument points to a byte stream. The final argument is a
 pointer to a general context, which can be used to specify custom memory
-mangagement functions for the decoded patterns. If this argument is NULL,
+management functions for the decoded patterns. If this argument is NULL,
 \fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte
 stream is no longer needed and can be discarded.
 .sp
-  int32_t number_of_codes;
   pcre2_code *list_of_codes[2];
   uint8_t *bytes = <serialized data>;
   int32_t number_of_codes =
@@ -185,7 +184,7 @@ save/restore cycle. You can, however, process a restored pattern with
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .

+ 105 - 176
regex.mod/pcre/doc/pcre2syntax.3

@@ -1,4 +1,4 @@
-.TH PCRE2SYNTAX 3 "30 August 2021" "PCRE2 10.38"
+.TH PCRE2SYNTAX 3 "12 October 2023" "PCRE2 10.43"
 .SH NAME
 PCRE2 - Perl-compatible regular expressions (revised API)
 .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -17,6 +17,21 @@ documentation. This document contains a quick-reference summary of the syntax.
 .sp
   \ex         where x is non-alphanumeric is a literal x
   \eQ...\eE    treat enclosed characters as literal
+.sp
+Note that white space inside \eQ...\eE is always treated as literal, even if
+PCRE2_EXTENDED is set, causing most other white space to be ignored.
+.
+.
+.SH "BRACED ITEMS"
+.rs
+.sp
+With one exception, wherever brace characters { and } are required to enclose
+data for constructions such as \eg{2} or \ek{name}, space and/or horizontal tab
+characters that follow { or precede } are allowed and are ignored. In the case
+of quantifiers, they may also appear before or after the comma. The exception
+is \eu{...} which is not Perl-compatible and is recognized only when
+PCRE2_EXTRA_ALT_BSUX is set. This is an ECMAScript compatibility feature, and
+follows ECMAScript's behaviour.
 .
 .
 .SH "ESCAPED CHARACTERS"
@@ -26,7 +41,7 @@ This table applies to ASCII and Unicode environments. An unrecognized escape
 sequence causes an error.
 .sp
   \ea         alarm, that is, the BEL character (hex 07)
-  \ecx        "control-x", where x is any ASCII printing character
+  \ecx        "control-x", where x is a non-control ASCII character
   \ee         escape (hex 1B)
   \ef         form feed (hex 0C)
   \en         newline (hex 0A)
@@ -101,7 +116,12 @@ or in the 16-bit and 32-bit libraries. However, if locale-specific matching is
 happening, \es and \ew may also match characters with code points in the range
 128-255. If the PCRE2_UCP option is set, the behaviour of these escape
 sequences is changed to use Unicode properties and they match many more
-characters.
+characters, but there are some option settings that can restrict individual
+sequences to matching only ASCII characters.
+.P
+Property descriptions in \ep and \eP are matched caselessly; hyphens,
+underscores, and white space are ignored, in accordance with Unicode's "loose
+matching" rules.
 .
 .
 .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP"
@@ -120,6 +140,7 @@ characters.
   Lo         Other letter
   Lt         Title case letter
   Lu         Upper case letter
+  Lc         Ll, Lu, or Lt
   L&         Ll, Lu, or Lt
 .sp
   M          Mark
@@ -159,7 +180,7 @@ characters.
   Xan        Alphanumeric: union of properties L and N
   Xps        POSIX space: property Z or tab, NL, VT, FF, CR
   Xsp        Perl space: property Z or tab, NL, VT, FF, CR
-  Xuc        Univerally-named character: one that can be
+  Xuc        Universally-named character: one that can be
                represented by a Universal Character Name
   Xwd        Perl word: property Xan or underscore
 .sp
@@ -167,170 +188,59 @@ Perl and POSIX space are now the same. Perl added VT to its space character set
 at release 5.18.
 .
 .
-.SH "SCRIPT NAMES FOR \ep AND \eP"
-.rs
-.sp
-Adlam,
-Ahom,
-Anatolian_Hieroglyphs,
-Arabic,
-Armenian,
-Avestan,
-Balinese,
-Bamum,
-Bassa_Vah,
-Batak,
-Bengali,
-Bhaiksuki,
-Bopomofo,
-Brahmi,
-Braille,
-Buginese,
-Buhid,
-Canadian_Aboriginal,
-Carian,
-Caucasian_Albanian,
-Chakma,
-Cham,
-Cherokee,
-Chorasmian,
-Common,
-Coptic,
-Cuneiform,
-Cypriot,
-Cypro_Minoan,
-Cyrillic,
-Deseret,
-Devanagari,
-Dives_Akuru,
-Dogra,
-Duployan,
-Egyptian_Hieroglyphs,
-Elbasan,
-Elymaic,
-Ethiopic,
-Georgian,
-Glagolitic,
-Gothic,
-Grantha,
-Greek,
-Gujarati,
-Gunjala_Gondi,
-Gurmukhi,
-Han,
-Hangul,
-Hanifi_Rohingya,
-Hanunoo,
-Hatran,
-Hebrew,
-Hiragana,
-Imperial_Aramaic,
-Inherited,
-Inscriptional_Pahlavi,
-Inscriptional_Parthian,
-Javanese,
-Kaithi,
-Kannada,
-Katakana,
-Kayah_Li,
-Kharoshthi,
-Khitan_Small_Script,
-Khmer,
-Khojki,
-Khudawadi,
-Lao,
-Latin,
-Lepcha,
-Limbu,
-Linear_A,
-Linear_B,
-Lisu,
-Lycian,
-Lydian,
-Mahajani,
-Makasar,
-Malayalam,
-Mandaic,
-Manichaean,
-Marchen,
-Masaram_Gondi,
-Medefaidrin,
-Meetei_Mayek,
-Mende_Kikakui,
-Meroitic_Cursive,
-Meroitic_Hieroglyphs,
-Miao,
-Modi,
-Mongolian,
-Mro,
-Multani,
-Myanmar,
-Nabataean,
-Nandinagari,
-New_Tai_Lue,
-Newa,
-Nko,
-Nushu,
-Nyakeng_Puachue_Hmong,
-Ogham,
-Ol_Chiki,
-Old_Hungarian,
-Old_Italic,
-Old_North_Arabian,
-Old_Permic,
-Old_Persian,
-Old_Sogdian,
-Old_South_Arabian,
-Old_Turkic,
-Old_Uyghur,
-Oriya,
-Osage,
-Osmanya,
-Pahawh_Hmong,
-Palmyrene,
-Pau_Cin_Hau,
-Phags_Pa,
-Phoenician,
-Psalter_Pahlavi,
-Rejang,
-Runic,
-Samaritan,
-Saurashtra,
-Sharada,
-Shavian,
-Siddham,
-SignWriting,
-Sinhala,
-Sogdian,
-Sora_Sompeng,
-Soyombo,
-Sundanese,
-Syloti_Nagri,
-Syriac,
-Tagalog,
-Tagbanwa,
-Tai_Le,
-Tai_Tham,
-Tai_Viet,
-Takri,
-Tamil,
-Tangsa,
-Tangut,
-Telugu,
-Thaana,
-Thai,
-Tibetan,
-Tifinagh,
-Tirhuta,
-Toto,
-Ugaritic,
-Vai,
-Vithkuqi,
-Wancho,
-Warang_Citi,
-Yezidi,
-Yi,
-Zanabazar_Square.
+.SH "BINARY PROPERTIES FOR \ep AND \eP"
+.rs
+.sp
+Unicode defines a number of binary properties, that is, properties whose only
+values are true or false. You can obtain a list of those that are recognized by
+\ep and \eP, along with their abbreviations, by running this command:
+.sp
+  pcre2test -LP
+.
+.
+.
+.SH "SCRIPT MATCHING WITH \ep AND \eP"
+.rs
+.sp
+Many script names and their 4-letter abbreviations are recognized in
+\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of
+course). You can obtain a list of these scripts by running this command:
+.sp
+  pcre2test -LS
+.
+.
+.
+.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP"
+.rs
+.sp
+  \ep{Bidi_Class:<class>}   matches a character with the given class
+  \ep{BC:<class>}           matches a character with the given class
+.sp
+The recognized classes are:
+.sp
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
 .
 .
 .SH "CHARACTER CLASSES"
@@ -381,6 +291,9 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
   {n,}        n or more, greedy
   {n,}+       n or more, possessive
   {n,}?       n or more, lazy
+  {,m}        zero up to m, greedy
+  {,m}+       zero up to m, possessive
+  {,m}?       zero up to m, lazy
 .
 .
 .SH "ANCHORS AND SIMPLE ASSERTIONS"
@@ -452,17 +365,28 @@ both cases, a name must not start with a digit.
 Changes of these options within a group are automatically cancelled at the end
 of the group.
 .sp
+  (?a)            all ASCII options
+  (?aD)           restrict \ed to ASCII in UCP mode
+  (?aS)           restrict \es to ASCII in UCP mode
+  (?aW)           restrict \ew to ASCII in UCP mode
+  (?aP)           restrict all POSIX classes to ASCII in UCP mode
+  (?aT)           restrict POSIX digit classes to ASCII in UCP mode
   (?i)            caseless
   (?J)            allow duplicate named groups
   (?m)            multiline
   (?n)            no auto capture
+  (?r)            restrict caseless to either ASCII or non-ASCII
   (?s)            single line (dotall)
   (?U)            default ungreedy (lazy)
-  (?x)            extended: ignore white space except in classes
+  (?x)            ignore white space except in classes or \eQ...\eE
   (?xx)           as (?x) but also ignore space and tab in classes
-  (?-...)         unset option(s)
-  (?^)            unset imnsx options
+  (?-...)         unset the given option(s)
+  (?^)            unset imnrsx options
 .sp
+(?aP) implies (?aT) as well, though this has no additional effect. However, it
+means that (?-aP) is really (?-PT) which disables all ASCII restrictions for
+POSIX classes.
+.P
 Unsetting x or xx unsets both. Several options may be set at once, and a
 mixture of setting and unsetting such as (?i-x) is allowed, but there may be
 only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
@@ -535,7 +459,12 @@ setting with a similar syntax.
   (*nlb:...)                  ) negative lookbehind
   (*negative_lookbehind:...)  )
 .sp
-Each top-level branch of a lookbehind must be of a fixed length.
+Each top-level branch of a lookbehind must have a limit for the number of
+characters it matches. If any branch can match a variable number of characters,
+the maximum for each branch is limited to a value set by the caller of
+\fBpcre2_compile()\fP or defaulted. The default is set when PCRE2 is built
+(ultimate default 255). If every branch matches a fixed number of characters,
+the limit for each branch is 65535 characters.
 .
 .
 .SH "NON-ATOMIC LOOKAROUND ASSERTIONS"
@@ -605,8 +534,8 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
   (?(condition)yes-pattern|no-pattern)
 .sp
   (?(n)               absolute reference condition
-  (?(+n)              relative reference condition
-  (?(-n)              relative reference condition
+  (?(+n)              relative reference condition (PCRE2 extension)
+  (?(-n)              relative reference condition (PCRE2 extension)
   (?(<name>)          named reference condition (Perl)
   (?('name')          named reference condition (Perl)
   (?(name)            named reference condition (PCRE2, deprecated)
@@ -684,6 +613,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 12 October 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 132 - 65
regex.mod/pcre/doc/pcre2test.1

@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "30 August 2021" "PCRE 10.38"
+.TH PCRE2TEST 1 "27 January 2024" "PCRE 10.43"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -47,7 +47,7 @@ format before being passed to the library functions. Results are converted back
 to 8-bit code units for output.
 .P
 In the rest of this document, the names of library functions and structures
-are given in generic form, for example, \fBpcre_compile()\fP. The actual
+are given in generic form, for example, \fBpcre2_compile()\fP. The actual
 names used in the libraries have a suffix _8, _16, or _32, as appropriate.
 .
 .
@@ -61,14 +61,14 @@ library. In some Windows environments character 26 (hex 1A) causes an immediate
 end of file, and no further data is read, so this character should be avoided
 unless you really want that action.
 .P
-The input is processed using using C's string functions, so must not
-contain binary zeros, even though in Unix-like environments, \fBfgets()\fP
-treats any bytes other than newline as data characters. An error is generated
-if a binary zero is encountered. By default subject lines are processed for
-backslash escapes, which makes it possible to include any data value in strings
-that are passed to the library for matching. For patterns, there is a facility
-for specifying some or all of the 8-bit input characters as hexadecimal pairs,
-which makes it possible to include binary zeros.
+The input is processed using C's string functions, so must not contain binary
+zeros, even though in Unix-like environments, \fBfgets()\fP treats any bytes
+other than newline as data characters. An error is generated if a binary zero
+is encountered. By default subject lines are processed for backslash escapes,
+which makes it possible to include any data value in strings that are passed to
+the library for matching. For patterns, there is a facility for specifying some
+or all of the 8-bit input characters as hexadecimal pairs, which makes it
+possible to include binary zeros.
 .
 .
 .SS "Input for the 16-bit and 32-bit libraries"
@@ -111,14 +111,14 @@ the default). If the 8-bit library has not been built, this option causes an
 error.
 .TP 10
 \fB-16\fP
-If the 16-bit library has been built, this option causes it to be used. If only
-the 16-bit library has been built, this is the default. If the 16-bit library
+If the 16-bit library has been built, this option causes it to be used. If the
+8-bit library has not been built, this is the default. If the 16-bit library
 has not been built, this option causes an error.
 .TP 10
 \fB-32\fP
-If the 32-bit library has been built, this option causes it to be used. If only
-the 32-bit library has been built, this is the default. If the 32-bit library
-has not been built, this option causes an error.
+If the 32-bit library has been built, this option causes it to be used. If no
+other library has been built, this is the default. If the 32-bit library has
+not been built, this option causes an error.
 .TP 10
 \fB-ac\fP
 Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert
@@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified.
 \fB-LM\fP
 List modifiers: write a list of available pattern and subject modifiers to the
 standard output, then exit with zero exit code. All other options are ignored.
-If both -C and -LM are present, whichever is first is recognized.
+If both -C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-LP\fP
+List properties: write a list of recognized Unicode properties to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
+.TP 10
+\fB-LS\fP
+List scripts: write a list of recognized Unicode script names to the standard
+output, then exit with zero exit code. All other options are ignored. If both
+-C and any -Lx options are present, whichever is first is recognized.
 .TP 10
 \fB-pattern\fP \fImodifier-list\fP
 Behave as if each pattern line contains the given modifiers.
@@ -452,8 +462,8 @@ followed by a backslash, for example,
 .sp
   /abc/\e
 .sp
-then a backslash is added to the end of the pattern. This is done to provide a
-way of testing the error condition that arises if a pattern finishes with a
+a backslash is added to the end of the pattern. This is done to provide a way
+of testing the error condition that arises if a pattern finishes with a
 backslash, because
 .sp
   /abc\e/
@@ -555,12 +565,11 @@ by a previous \fB#pattern\fP command.
 .sp
 The following modifiers set options for \fBpcre2_compile()\fP. Most of them set
 bits in the options argument of that function, but those whose names start with
-PCRE2_EXTRA are additional options that are set in the compile context. For the
-main options, there are some single-letter abbreviations that are the same as
-Perl options. There is special handling for /x: if a second x is present,
-PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
-appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
-way \fBpcre2_compile()\fP behaves. See
+PCRE2_EXTRA are additional options that are set in the compile context.
+Some of these options have single-letter abbreviations. There is special
+handling for /x: if a second x is present, PCRE2_EXTENDED is converted into
+PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well,
+though this makes no difference to the way \fBpcre2_compile()\fP behaves. See
 .\" HREF
 \fBpcre2api\fP
 .\"
@@ -573,9 +582,16 @@ for a description of the effects of these options.
       alt_circumflex            set PCRE2_ALT_CIRCUMFLEX
       alt_verbnames             set PCRE2_ALT_VERBNAMES
       anchored                  set PCRE2_ANCHORED
+  /a  ascii_all                 set all ASCII options
+      ascii_bsd                 set PCRE2_EXTRA_ASCII_BSD
+      ascii_bss                 set PCRE2_EXTRA_ASCII_BSS
+      ascii_bsw                 set PCRE2_EXTRA_ASCII_BSW
+      ascii_digit               set PCRE2_EXTRA_ASCII_DIGIT
+      ascii_posix               set PCRE2_EXTRA_ASCII_POSIX
       auto_callout              set PCRE2_AUTO_CALLOUT
       bad_escape_is_literal     set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
   /i  caseless                  set PCRE2_CASELESS
+  /r  caseless_restrict         set PCRE2_EXTRA_CASELESS_RESTRICT
       dollar_endonly            set PCRE2_DOLLAR_ENDONLY
   /s  dotall                    set PCRE2_DOTALL
       dupnames                  set PCRE2_DUPNAMES
@@ -636,10 +652,12 @@ heavily used in the test files.
       jitfast                   use JIT fast path
       jitverify                 verify JIT use
       locale=<name>             use this locale
-      max_pattern_length=<n>    set the maximum pattern length
+      max_pattern_length=<n>    set maximum pattern length
+      max_varlookbehind=<n>     set maximum variable lookbehind length
       memory                    show memory used
       newline=<type>            set newline type
       null_context              compile with a NULL context
+      null_pattern              pass pattern as NULL
       parens_nest_limit=<n>     set maximum parentheses depth
       posix                     use the POSIX API
       posix_nosub               use the POSIX API with REG_NOSUB
@@ -714,9 +732,11 @@ ending code units are recorded. The subject length line is omitted when
 \fBno_start_optimize\fP is set because the minimum length is not calculated
 when it can never be used.
 .P
-The \fBframesize\fP modifier shows the size, in bytes, of the storage frames
+The \fBframesize\fP modifier shows the size, in bytes, of each storage frame
 used by \fBpcre2_match()\fP for handling backtracking. The size depends on the
-number of capturing parentheses in the pattern.
+number of capturing parentheses in the pattern. A vector of these frames is
+used at matching time; its overall size is shown when the \fBheaframes_size\fP
+subject modifier is set.
 .P
 The \fBcallout_info\fP modifier requests information about all the callouts in
 the pattern. A list of them is output at the end of any other information that
@@ -733,6 +753,15 @@ testing that \fBpcre2_compile()\fP behaves correctly in this case (it uses
 default values).
 .
 .
+.SS "Passing a NULL pattern"
+.rs
+.sp
+The \fBnull_pattern\fP modifier is for testing the behaviour of
+\fBpcre2_compile()\fP when the pattern argument is NULL. The length value
+passed is the default PCRE2_ZERO_TERMINATED unless \fBuse_length\fP is set.
+Any length other than zero causes an error.
+.
+.
 .SS "Specifying pattern characters in hexadecimal"
 .rs
 .sp
@@ -772,6 +801,17 @@ If \fBhex\fP or \fBuse_length\fP is used with the POSIX wrapper API (see
 below), the REG_PEND extension is used to pass the pattern's length.
 .
 .
+.SS "Specifying a maximum for variable lookbehinds"
+.rs
+.sp
+Variable lookbehind assertions are supported only if, for each one, there is a
+maximum length (in characters) that it can match. There is a limit on this,
+whose default can be set at build time, with an ultimate default of 255. The
+\fBmax_varlookbehind\fP modifier uses the \fBpcre2_set_max_varlookbehind()\fP
+function to change the limit. Lookbehinds whose branches each match a fixed
+length are limited to 65535 characters per branch.
+.
+.
 .SS "Specifying wide characters in 16-bit and 32-bit modes"
 .rs
 .sp
@@ -1029,6 +1069,7 @@ process.
       allusedtext                 show all consulted text
       altglobal                   alternative global matching
   /g  global                      global matching
+      heapframes_size             show match data heapframes size
       jitstack=<n>                set size of JIT stack
       mark                        show mark values
       replace=<string>            specify a replacement string
@@ -1133,18 +1174,19 @@ The following modifiers set options for \fBpcre2_match()\fP or
 .\"
 for a description of their effects.
 .sp
-      anchored                  set PCRE2_ANCHORED
-      endanchored               set PCRE2_ENDANCHORED
-      dfa_restart               set PCRE2_DFA_RESTART
-      dfa_shortest              set PCRE2_DFA_SHORTEST
-      no_jit                    set PCRE2_NO_JIT
-      no_utf_check              set PCRE2_NO_UTF_CHECK
-      notbol                    set PCRE2_NOTBOL
-      notempty                  set PCRE2_NOTEMPTY
-      notempty_atstart          set PCRE2_NOTEMPTY_ATSTART
-      noteol                    set PCRE2_NOTEOL
-      partial_hard (or ph)      set PCRE2_PARTIAL_HARD
-      partial_soft (or ps)      set PCRE2_PARTIAL_SOFT
+      anchored                   set PCRE2_ANCHORED
+      endanchored                set PCRE2_ENDANCHORED
+      dfa_restart                set PCRE2_DFA_RESTART
+      dfa_shortest               set PCRE2_DFA_SHORTEST
+      disable_recurseloop_check  set PCRE2_DISABLE_RECURSELOOP_CHECK
+      no_jit                     set PCRE2_NO_JIT
+      no_utf_check               set PCRE2_NO_UTF_CHECK
+      notbol                     set PCRE2_NOTBOL
+      notempty                   set PCRE2_NOTEMPTY
+      notempty_atstart           set PCRE2_NOTEMPTY_ATSTART
+      noteol                     set PCRE2_NOTEOL
+      partial_hard (or ph)       set PCRE2_PARTIAL_HARD
+      partial_soft (or ps)       set PCRE2_PARTIAL_SOFT
 .sp
 The partial matching modifiers are provided with abbreviations because they
 appear frequently in tests.
@@ -1196,16 +1238,20 @@ pattern, but can be overridden by modifiers on the subject.
       copy=<number or name>      copy captured substring
       depth_limit=<n>            set a depth limit
       dfa                        use \fBpcre2_dfa_match()\fP
-      find_limits                find match and depth limits
+      find_limits                find heap, match and depth limits
+      find_limits_noheap         find match and depth limits
       get=<number or name>       extract captured substring
       getall                     extract all captured substrings
   /g  global                     global matching
+      heapframes_size            show match data heapframes size
       heap_limit=<n>             set a limit on heap memory (Kbytes)
       jitstack=<n>               set size of JIT stack
       mark                       show mark values
       match_limit=<n>            set a match limit
       memory                     show heap memory usage
       null_context               match with a NULL context
+      null_replacement           substitute with NULL replacement
+      null_subject               match with NULL subject
       offset=<n>                 set starting offset
       offset_limit=<n>           set offset limit
       ovector=<n>                set size of output vector
@@ -1321,7 +1367,7 @@ controlled by various modifiers listed above whose names begin with
 .\" </a>
 below.
 .\"
-Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
+Testing callouts from \fBpcre2_substitute()\fP is described separately in
 "Testing the substitution function"
 .\" HTML <a href="#substitution">
 .\" </a>
@@ -1463,7 +1509,7 @@ matching provokes an error return ("bad option value") from
 If the \fBsubstitute_callout\fP modifier is set, a substitution callout
 function is set up. The \fBnull_context\fP modifier must not be set, because
 the address of the callout function is passed in a match context. When the
-callout function is called (after each substitution), details of the the input
+callout function is called (after each substitution), details of the input
 and output strings are output. For example:
 .sp
   /abc/g,replace=<$0>,substitute_callout
@@ -1516,7 +1562,7 @@ value that was set on the pattern.
 .sp
 The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
 the appropriate limits in the match context. These values are ignored when the
-\fBfind_limits\fP modifier is specified.
+\fBfind_limits\fP or \fBfind_limits_noheap\fP modifier is specified.
 .
 .
 .SS "Finding minimum limits"
@@ -1526,8 +1572,12 @@ If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
 calls the relevant matching function several times, setting different values in
 the match context via \fBpcre2_set_heap_limit()\fP,
 \fBpcre2_set_match_limit()\fP, or \fBpcre2_set_depth_limit()\fP until it finds
-the minimum values for each parameter that allows the match to complete without
-error. If JIT is being used, only the match limit is relevant.
+the smallest value for each parameter that allows the match to complete without
+a "limit exceeded" error. The match itself may succeed or fail. An alternative
+modifier, \fBfind_limits_noheap\fP, omits the heap limit. This is used in the
+standard tests, because the minimum heap limit varies between systems. If JIT
+is being used, only the match limit is relevant, and the other two are
+automatically omitted.
 .P
 When using this modifier, the pattern should not contain any limit settings
 such as (*LIMIT_MATCH=...) within it. If such a setting is present and is
@@ -1551,9 +1601,7 @@ and non-recursive, to the internal matching function, thus controlling the
 overall amount of computing resource that is used.
 .P
 For both kinds of matching, the \fIheap_limit\fP number, which is in kibibytes
-(units of 1024 bytes), limits the amount of heap memory used for matching. A
-value of zero disables the use of any heap memory; many simple pattern matches
-can be done without using the heap, so zero is not an unreasonable setting.
+(units of 1024 bytes), limits the amount of heap memory used for matching.
 .
 .
 .SS "Showing MARK names"
@@ -1572,16 +1620,31 @@ is added to the non-match message.
 .sp
 The \fBmemory\fP modifier causes \fBpcre2test\fP to log the sizes of all heap
 memory allocation and freeing calls that occur during a call to
-\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. These occur only when a match
-requires a bigger vector than the default for remembering backtracking points
-(\fBpcre2_match()\fP) or for internal workspace (\fBpcre2_dfa_match()\fP). In
-many cases there will be no heap memory used and therefore no additional
-output. No heap memory is allocated during matching with JIT, so in that case
-the \fBmemory\fP modifier never has any effect. For this modifier to work, the
+\fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP. In the latter case, heap memory
+is used only when a match requires more internal workspace that the default
+allocation on the stack, so in many cases there will be no output. No heap
+memory is allocated during matching with JIT. For this modifier to work, the
 \fBnull_context\fP modifier must not be set on both the pattern and the
 subject, though it can be set on one or the other.
 .
 .
+.SS "Showing the heap frame overall vector size"
+.rs
+.sp
+The \fBheapframes_size\fP modifier is relevant for matches using
+\fBpcre2_match()\fP without JIT. After a match has run (whether successful or
+not) the size, in bytes, of the allocated heap frames vector that is left
+attached to the match data block is shown. If the matching action involved
+several calls to \fBpcre2_match()\fP (for example, global matching or for
+timing) only the final value is shown.
+.P
+This modifier is ignored, with a warning, for POSIX or DFA matching. JIT
+matching does not use the heap frames vector, so the size is always zero,
+unless there was a previous non-JIT match. Note that specifing a size of zero
+for the output vector (see below) causes \fBpcre2test\fP to free its match data
+block (and associated heap frames vector) and allocate a new one.
+.
+.
 .SS "Setting a starting offset"
 .rs
 .sp
@@ -1611,9 +1674,9 @@ A value of zero is useful when testing the POSIX API because it causes
 \fBregexec()\fP to be called with a NULL capture vector. When not testing the
 POSIX API, a value of zero is used to cause
 \fBpcre2_match_data_create_from_pattern()\fP to be called, in order to create a
-match block of exactly the right size for the pattern. (It is not possible to
-create a match block with a zero-length ovector; there is always at least one
-pair of offsets.)
+new match block of exactly the right size for the pattern. (It is not possible
+to create a match block with a zero-length ovector; there is always at least
+one pair of offsets.) The old match data block is freed.
 .
 .
 .SS "Passing the subject as zero-terminated"
@@ -1629,7 +1692,7 @@ When testing \fBpcre2_substitute()\fP, this modifier also has the effect of
 passing the replacement string as zero-terminated.
 .
 .
-.SS "Passing a NULL context"
+.SS "Passing a NULL context, subject, or replacement"
 .rs
 .sp
 Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
@@ -1637,7 +1700,12 @@ Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP,
 If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for
 testing that the matching and substitution functions behave correctly in this
 case (they use default values). This modifier cannot be used with the
-\fBfind_limits\fP or \fBsubstitute_callout\fP modifiers.
+\fBfind_limits\fP, \fBfind_limits_noheap\fP, or \fBsubstitute_callout\fP
+modifiers.
+.P
+Similarly, for testing purposes, if the \fBnull_subject\fP or
+\fBnull_replacement\fP modifier is set, the subject or replacement string
+pointers are passed as NULL, respectively, to the relevant functions.
 .
 .
 .SH "THE ALTERNATIVE MATCHING FUNCTION"
@@ -1707,9 +1775,8 @@ unset substring is shown as "<unset>", as for the second data line.
 If the strings contain any non-printing characters, they are output as \exhh
 escapes if the value is less than 256 and UTF mode is not set. Otherwise they
 are output as \ex{hh...} escapes. See below for the definition of non-printing
-characters. If the \fBaftertext\fP modifier is set, the output for substring
-0 is followed by the the rest of the subject string, identified by "0+" like
-this:
+characters. If the \fBaftertext\fP modifier is set, the output for substring 0
+is followed by the rest of the subject string, identified by "0+" like this:
 .sp
     re> /cat/aftertext
   data> cataract
@@ -2075,7 +2142,7 @@ reloads two patterns.
 If \fBjitverify\fP is used with #pop, it does not automatically imply
 \fBjit\fP, which is different behaviour from when it is used on a pattern.
 .P
-The #popcopy command is analagous to the \fBpushcopy\fP modifier in that it
+The #popcopy command is analogous to the \fBpushcopy\fP modifier in that it
 makes current a copy of the topmost stack pattern, leaving the original still
 on the stack.
 .
@@ -2103,6 +2170,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 30 August 2021
-Copyright (c) 1997-2021 University of Cambridge.
+Last updated: 27 January 2024
+Copyright (c) 1997-2024 University of Cambridge.
 .fi

File diff suppressed because it is too large
+ 268 - 224
regex.mod/pcre/doc/pcre2test.txt


+ 42 - 19
regex.mod/pcre/doc/pcre2unicode.3

@@ -1,4 +1,4 @@
-.TH PCRE2UNICODE 3 "23 February 2020" "PCRE2 10.35"
+.TH PCRE2UNICODE 3 "04 February 2023" "PCRE2 10.43"
 .SH NAME
 PCRE - Perl-compatible regular expressions (revised API)
 .SH "UNICODE AND UTF SUPPORT"
@@ -40,10 +40,13 @@ handled, as documented below.
 .sp
 When PCRE2 is built with Unicode support, the escape sequences \ep{..},
 \eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting.
-The Unicode properties that can be tested are limited to the general category
-properties such as Lu for an upper case letter or Nd for a decimal number, the
-Unicode script names such as Arabic or Han, and the derived properties Any and
-L&. Full lists are given in the
+The Unicode properties that can be tested are a subset of those that Perl
+supports. Currently they are limited to the general category properties such as
+Lu for an upper case letter or Nd for a decimal number, the derived properties
+Any and LC (synonym L&), the Unicode script names such as Arabic or Han,
+Bidi_Class, Bidi_Control, and a few binary properties.
+.P
+The full lists are given in the
 .\" HREF
 \fBpcre2pattern\fP
 .\"
@@ -51,10 +54,10 @@ and
 .\" HREF
 \fBpcre2syntax\fP
 .\"
-documentation. Only the short names for properties are supported. For example,
-\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
-Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
-compatibility with Perl 5.6. PCRE2 does not support this.
+documentation. In general, only the short names for properties are supported.
+For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not
+supported. Furthermore, in Perl, many properties may optionally be prefixed by
+"Is", for compatibility with Perl 5.6. PCRE2 does not support this.
 .
 .
 .SH "WIDE CHARACTERS AND UTF MODES"
@@ -106,8 +109,8 @@ and \eB, because they are defined in terms of \ew and \eW. If you want
 to test for a wider sense of, say, "digit", you can use explicit Unicode
 property tests such as \ep{Nd}. Alternatively, if you set the PCRE2_UCP option,
 the way that the character escapes work is changed so that Unicode properties
-are used to determine which characters match. There are more details in the
-section on
+are used to determine which characters match, though there are some options
+that suppress this for individual escapes. For details see the section on
 .\" HTML <a href="pcre2pattern.html#genericchartypes">
 .\" </a>
 generic character types
@@ -118,12 +121,13 @@ in the
 .\"
 documentation.
 .P
-Similarly, characters that match the POSIX named character classes are all
-low-valued characters, unless the PCRE2_UCP option is set.
+Like the escapes, characters that match the POSIX named character classes are
+all low-valued characters unless the PCRE2_UCP option is set, but there is an
+option to override this.
 .P
-However, the special horizontal and vertical white space matching escapes (\eh,
-\eH, \ev, and \eV) do match all the appropriate Unicode characters, whether or
-not PCRE2_UCP is set.
+In contrast to the character escapes and character classes, the special
+horizontal and vertical white space escapes (\eh, \eH, \ev, and \eV) do match
+all the appropriate Unicode characters, whether or not PCRE2_UCP is set.
 .
 .
 .SH "UNICODE CASE-EQUIVALENCE"
@@ -136,6 +140,13 @@ lookup is used for speed. A few Unicode characters such as Greek sigma have
 more than two code points that are case-equivalent, and these are treated
 specially. Setting PCRE2_UCP without PCRE2_UTF allows Unicode-style case
 processing for non-UTF character encodings such as UCS-2.
+.P
+There are two ASCII characters (S and K) that, in addition to their ASCII lower
+case equivalents, have a non-ASCII one as well (long S and Kelvin sign).
+Recognition of these non-ASCII characters as case-equivalent to their ASCII
+counterparts can be disabled by setting the PCRE2_EXTRA_CASELESS_RESTRICT
+option. When this is set, all characters in a case equivalence must either be
+ASCII or non-ASCII; there can be no mixing.
 .
 .
 .\" HTML <a name="scriptruns"></a>
@@ -408,6 +419,13 @@ not by \fBpcre2_dfa_match()\fP. When PCRE2_MATCH_INVALID_UTF is set, it forces
 PCRE2_UTF to be set as well. Note, however, that the pattern itself must be a
 valid UTF string.
 .P
+If you do not set PCRE2_MATCH_INVALID_UTF when calling \fBpcre2_compile\fP, and
+you are not certain that your subject strings are valid UTF sequences, you
+should not make use of the JIT "fast path" function \fBpcre2_jit_match()\fP
+because it bypasses sanity checks, including the one for UTF validity. An
+invalid string may cause undefined behaviour, including looping, crashing, or
+giving the wrong answer.
+.P
 Setting PCRE2_MATCH_INVALID_UTF does not affect what \fBpcre2_compile()\fP
 generates, but if \fBpcre2_jit_compile()\fP is subsequently called, it does
 generate different code. If JIT is not used, the option affects the behaviour
@@ -441,6 +459,11 @@ would match an instance of WORD that is surrounded by invalid UTF code units.
 Using PCRE2_MATCH_INVALID_UTF, an application can run matches on arbitrary
 data, knowing that any matched strings that are returned are valid UTF. This
 can be useful when searching for UTF text in executable or other binary files.
+.P
+Note, however, that the 16-bit and 32-bit PCRE2 libraries process strings as
+sequences of uint16_t or uint32_t code points. They cannot find valid UTF
+sequences within an arbitrary string of bytes unless such sequences are
+suitably aligned.
 .
 .
 .SH AUTHOR
@@ -448,7 +471,7 @@ can be useful when searching for UTF text in executable or other binary files.
 .sp
 .nf
 Philip Hazel
-University Computing Service
+Retired from University Computing Service
 Cambridge, England.
 .fi
 .
@@ -457,6 +480,6 @@ Cambridge, England.
 .rs
 .sp
 .nf
-Last updated: 23 February 2020
-Copyright (c) 1997-2020 University of Cambridge.
+Last updated: 12 October 2023
+Copyright (c) 1997-2023 University of Cambridge.
 .fi

+ 1 - 1
regex.mod/pcre/install-sh

@@ -9,5 +9,5 @@ Name: libpcre2-posix
 Description: Posix compatible interface to libpcre2-8
 Version: @PACKAGE_VERSION@
 Libs: -L${libdir} -lpcre2-posix@LIB_POSTFIX@
-Cflags: -I${includedir} @PCRE2_STATIC_CFLAG@
+Cflags: -I${includedir} @PCRE2POSIX_CFLAG@
 Requires.private: libpcre2-8

+ 96 - 28
regex.mod/pcre/ltmain.sh

@@ -1,12 +1,12 @@
-#! /bin/sh
+#! /usr/bin/env sh
 ## DO NOT EDIT - This file generated from ./build-aux/ltmain.in
-##               by inline-source v2018-07-24.06
+##               by inline-source v2019-02-19.15
 
-# libtool (GNU libtool) 2.4.6.42-b88ce-dirty
+# libtool (GNU libtool) 2.4.7.4-1ec8f-dirty
 # Provide generalized library-building support services.
 # Written by Gordon Matzigkeit <[email protected]>, 1996
 
-# Copyright (C) 1996-2018 Free Software Foundation, Inc.
+# Copyright (C) 1996-2019, 2021-2022 Free Software Foundation, Inc.
 # This is free software; see the source for copying conditions.  There is NO
 # warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
@@ -31,8 +31,8 @@
 
 PROGRAM=libtool
 PACKAGE=libtool
-VERSION=2.4.6.42-b88ce-dirty
-package_revision=2.4.6.42
+VERSION=2.4.7.4-1ec8f-dirty
+package_revision=2.4.7.4
 
 
 ## ------ ##
@@ -64,7 +64,7 @@ package_revision=2.4.6.42
 # libraries, which are installed to $pkgauxdir.
 
 # Set a version string for this script.
-scriptversion=2018-07-24.06; # UTC
+scriptversion=2019-02-19.15; # UTC
 
 # General shell script boiler plate, and helper functions.
 # Written by Gary V. Vaughan, 2004
@@ -72,10 +72,10 @@ scriptversion=2018-07-24.06; # UTC
 # This is free software.  There is NO warranty; not even for
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
-# Copyright (C) 2004-2018 Bootstrap Authors
+# Copyright (C) 2004-2019, 2021 Bootstrap Authors
 #
 # This file is dual licensed under the terms of the MIT license
-# <https://opensource.org/license/MIT>, and GPL version 3 or later
+# <https://opensource.org/license/MIT>, and GPL version 2 or later
 # <http://www.gnu.org/licenses/gpl-2.0.html>.  You must apply one of
 # these licenses when using or redistributing this software or any of
 # the files within it.  See the URLs above, or the file `LICENSE`
@@ -130,6 +130,12 @@ do
 	  _G_safe_locale=\"$_G_var=C; \$_G_safe_locale\"
 	fi"
 done
+# These NLS vars are set unconditionally (bootstrap issue #24).  Unset those
+# in case the environment reset is needed later and the $save_* variant is not
+# defined (see the code above).
+LC_ALL=C
+LANGUAGE=C
+export LANGUAGE LC_ALL
 
 # Make sure IFS has a sensible default
 sp=' '
@@ -368,6 +374,35 @@ sed_double_backslash="\
   s/\\([^$_G_bs]\\)$_G_bs2$_G_dollar/\\1$_G_bs2$_G_bs$_G_dollar/g
   s/\n//g"
 
+# require_check_ifs_backslash
+# ---------------------------
+# Check if we can use backslash as IFS='\' separator, and set
+# $check_ifs_backshlash_broken to ':' or 'false'.
+require_check_ifs_backslash=func_require_check_ifs_backslash
+func_require_check_ifs_backslash ()
+{
+  _G_save_IFS=$IFS
+  IFS='\'
+  _G_check_ifs_backshlash='a\\b'
+  for _G_i in $_G_check_ifs_backshlash
+  do
+  case $_G_i in
+  a)
+    check_ifs_backshlash_broken=false
+    ;;
+  '')
+    break
+    ;;
+  *)
+    check_ifs_backshlash_broken=:
+    break
+    ;;
+  esac
+  done
+  IFS=$_G_save_IFS
+  require_check_ifs_backslash=:
+}
+
 
 ## ----------------- ##
 ## Global variables. ##
@@ -1108,6 +1143,8 @@ func_quote_portable ()
 {
     $debug_cmd
 
+    $require_check_ifs_backslash
+
     func_quote_portable_result=$2
 
     # one-time-loop (easy break)
@@ -1122,8 +1159,10 @@ func_quote_portable ()
       # Quote for eval.
       case $func_quote_portable_result in
         *[\\\`\"\$]*)
-          case $func_quote_portable_result in
-            *[\[\*\?]*)
+          # Fallback to sed for $func_check_bs_ifs_broken=:, or when the string
+          # contains the shell wildcard characters.
+          case $check_ifs_backshlash_broken$func_quote_portable_result in
+            :*|*[\[\*\?]*)
               func_quote_portable_result=`$ECHO "$func_quote_portable_result" \
                   | $SED "$sed_quote_subst"`
               break
@@ -1497,10 +1536,10 @@ func_lt_ver ()
 # This is free software.  There is NO warranty; not even for
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 #
-# Copyright (C) 2010-2018 Bootstrap Authors
+# Copyright (C) 2010-2019, 2021 Bootstrap Authors
 #
 # This file is dual licensed under the terms of the MIT license
-# <https://opensource.org/license/MIT>, and GPL version 3 or later
+# <https://opensource.org/license/MIT>, and GPL version 2 or later
 # <http://www.gnu.org/licenses/gpl-2.0.html>.  You must apply one of
 # these licenses when using or redistributing this software or any of
 # the files within it.  See the URLs above, or the file `LICENSE`
@@ -1510,7 +1549,7 @@ func_lt_ver ()
 # <https://github.com/gnulib-modules/bootstrap/issues>
 
 # Set a version string for this script.
-scriptversion=2018-07-24.06; # UTC
+scriptversion=2019-02-19.15; # UTC
 
 
 ## ------ ##
@@ -2056,7 +2095,7 @@ else
 
       func_split_equals_lhs=`expr "x$1" : 'x\([^=]*\)'`
       func_split_equals_rhs=
-      test "x$func_split_equals_lhs" = "x$1" \
+      test "x$func_split_equals_lhs=" = "x$1" \
         || func_split_equals_rhs=`expr "x$1" : 'x[^=]*=\(.*\)$'`
   }
 fi #func_split_equals
@@ -2082,7 +2121,7 @@ else
   {
       $debug_cmd
 
-      func_split_short_opt_name=`expr "x$1" : 'x-\(.\)'`
+      func_split_short_opt_name=`expr "x$1" : 'x\(-.\)'`
       func_split_short_opt_arg=`expr "x$1" : 'x-.\(.*\)$'`
   }
 fi #func_split_short_opt
@@ -2176,7 +2215,7 @@ func_version ()
 # End:
 
 # Set a version string.
-scriptversion='(GNU libtool) 2.4.6.42-b88ce-dirty'
+scriptversion='(GNU libtool) 2.4.7.4-1ec8f-dirty'
 
 
 # func_echo ARG...
@@ -2267,7 +2306,7 @@ include the following information:
        compiler:       $LTCC
        compiler flags: $LTCFLAGS
        linker:         $LD (gnu? $with_gnu_ld)
-       version:        $progname (GNU libtool) 2.4.6.42-b88ce-dirty
+       version:        $progname (GNU libtool) 2.4.7.4-1ec8f-dirty
        automake:       `($AUTOMAKE --version) 2>/dev/null |$SED 1q`
        autoconf:       `($AUTOCONF --version) 2>/dev/null |$SED 1q`
 
@@ -3862,7 +3901,8 @@ This mode accepts the following additional options:
   -prefer-non-pic   try to build non-PIC objects only
   -shared           do not build a '.o' file suitable for static linking
   -static           only build a '.o' file suitable for static linking
-  -Wc,FLAG          pass FLAG directly to the compiler
+  -Wc,FLAG
+  -Xcompiler FLAG   pass FLAG directly to the compiler
 
 COMPILE-COMMAND is a command to be used in creating a 'standard' object file
 from the given SOURCEFILE.
@@ -3968,6 +4008,8 @@ The following components of LINK-COMMAND are treated specially:
   -weak LIBNAME     declare that the target provides the LIBNAME interface
   -Wc,FLAG
   -Xcompiler FLAG   pass linker-specific FLAG directly to the compiler
+  -Wa,FLAG
+  -Xassembler FLAG  pass linker-specific FLAG directly to the assembler
   -Wl,FLAG
   -Xlinker FLAG     pass linker-specific FLAG directly to the linker
   -XCClinker FLAG   pass link-specific FLAG to the compiler driver (CC)
@@ -7064,6 +7106,13 @@ func_mode_link ()
 	  prev=
 	  continue
 	  ;;
+	xassembler)
+	  func_append compiler_flags " -Xassembler $qarg"
+	  prev=
+	  func_append compile_command " -Xassembler $qarg"
+	  func_append finalize_command " -Xassembler $qarg"
+	  continue
+	  ;;
 	xcclinker)
 	  func_append linker_flags " $qarg"
 	  func_append compiler_flags " $qarg"
@@ -7234,7 +7283,7 @@ func_mode_link ()
 	    # These systems don't actually have a C library (as such)
 	    test X-lc = "X$arg" && continue
 	    ;;
-	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*)
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*)
 	    # Do not include libc due to us having libc/libc_r.
 	    test X-lc = "X$arg" && continue
 	    ;;
@@ -7254,7 +7303,7 @@ func_mode_link ()
 	  esac
 	elif test X-lc_r = "X$arg"; then
 	 case $host in
-	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig*)
+	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-bitrig* | *-*-midnightbsd*)
 	   # Do not include libc_r directly, use -pthread flag.
 	   continue
 	   ;;
@@ -7284,8 +7333,20 @@ func_mode_link ()
 	prev=xcompiler
 	continue
 	;;
-
-      -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe \
+     # Solaris ld rejects as of 11.4. Refer to Oracle bug 22985199.
+     -pthread)
+	case $host in
+	  *solaris2*) ;;
+	  *)
+	    case "$new_inherited_linker_flags " in
+	        *" $arg "*) ;;
+	        * ) func_append new_inherited_linker_flags " $arg" ;;
+	    esac
+	  ;;
+	esac
+	continue
+	;;
+      -mt|-mthreads|-kthread|-Kthread|-pthreads|--thread-safe \
       |-threads|-fopenmp|-openmp|-mp|-xopenmp|-omp|-qsmp=*)
 	func_append compiler_flags " $arg"
 	func_append compile_command " $arg"
@@ -7452,6 +7513,11 @@ func_mode_link ()
 	arg=$func_stripname_result
 	;;
 
+      -Xassembler)
+        prev=xassembler
+        continue
+        ;;
+
       -Xcompiler)
 	prev=xcompiler
 	continue
@@ -7491,10 +7557,12 @@ func_mode_link ()
       # -stdlib=*            select c++ std lib with clang
       # -fsanitize=*         Clang/GCC memory and address sanitizer
       # -fuse-ld=*           Linker select flags for GCC
+      # -Wa,*                Pass flags directly to the assembler
+      # -Werror, -Werror=*   Report (specified) warnings as errors
       -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*| \
       -t[45]*|-txscale*|-p|-pg|--coverage|-fprofile-*|-F*|@*|-tp=*|--sysroot=*| \
       -O*|-g*|-flto*|-fwhopr*|-fuse-linker-plugin|-fstack-protector*|-stdlib=*| \
-      -specs=*|-fsanitize=*|-fuse-ld=*)
+      -specs=*|-fsanitize=*|-fuse-ld=*|-Wa,*|-Werror|-Werror=*)
         func_quote_arg pretty "$arg"
 	arg=$func_quote_arg_result
         func_append compile_command " $arg"
@@ -8851,7 +8919,7 @@ func_mode_link ()
       test CXX = "$tagname" && {
         case $host_os in
         linux*)
-          case `$CC -V 2>&1 | sed 5q` in
+          case `$CC -V 2>&1 | $SED 5q` in
           *Sun\ C*) # Sun C++ 5.9
             func_suncc_cstd_abi
 
@@ -9024,7 +9092,7 @@ func_mode_link ()
 	  #
 	  case $version_type in
 	  # correct linux to gnu/linux during the next big refactor
-	  darwin|freebsd-elf|linux|osf|windows|none)
+	  darwin|freebsd-elf|linux|midnightbsd-elf|osf|windows|none)
 	    func_arith $number_major + $number_minor
 	    current=$func_arith_result
 	    age=$number_minor
@@ -9115,7 +9183,7 @@ func_mode_link ()
 	  versuffix=.$current.$revision
 	  ;;
 
-	freebsd-elf)
+	freebsd-elf | midnightbsd-elf)
 	  func_arith $current - $age
 	  major=.$func_arith_result
 	  versuffix=$major.$age.$revision
@@ -9341,7 +9409,7 @@ func_mode_link ()
 	  *-*-netbsd*)
 	    # Don't link with libc until the a.out ld.so is fixed.
 	    ;;
-	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly* | *-*-midnightbsd*)
 	    # Do not include libc due to us having libc/libc_r.
 	    ;;
 	  *-*-sco3.2v5* | *-*-sco5v6*)

+ 67 - 61
regex.mod/pcre/m4/libtool.m4

@@ -1,6 +1,7 @@
 # libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
 #
-#   Copyright (C) 1996-2001, 2003-2018 Free Software Foundation, Inc.
+#   Copyright (C) 1996-2001, 2003-2019, 2021-2022 Free Software
+#   Foundation, Inc.
 #   Written by Gordon Matzigkeit, 1996
 #
 # This file is free software; the Free Software Foundation gives
@@ -31,7 +32,7 @@ m4_define([_LT_COPYING], [dnl
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ])
 
-# serial 58 LT_INIT
+# serial 59 LT_INIT
 
 
 # LT_PREREQ(VERSION)
@@ -181,6 +182,7 @@ m4_require([_LT_FILEUTILS_DEFAULTS])dnl
 m4_require([_LT_CHECK_SHELL_FEATURES])dnl
 m4_require([_LT_PATH_CONVERSION_FUNCTIONS])dnl
 m4_require([_LT_CMD_RELOAD])dnl
+m4_require([_LT_DECL_FILECMD])dnl
 m4_require([_LT_CHECK_MAGIC_METHOD])dnl
 m4_require([_LT_CHECK_SHAREDLIB_FROM_LINKLIB])dnl
 m4_require([_LT_CMD_OLD_ARCHIVE])dnl
@@ -777,7 +779,7 @@ _LT_EOF
   # if finds mixed CR/LF and LF-only lines.  Since sed operates in
   # text mode, it properly converts lines to CR/LF.  This bash problem
   # is reportedly fixed, but why not run on old versions too?
-  sed '$q' "$ltmain" >> "$cfgfile" \
+  $SED '$q' "$ltmain" >> "$cfgfile" \
      || (rm -f "$cfgfile"; exit 1)
 
    mv -f "$cfgfile" "$ofile" ||
@@ -1066,17 +1068,12 @@ _LT_EOF
       _lt_dar_allow_undefined='$wl-undefined ${wl}suppress' ;;
     darwin1.*)
       _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;;
-    darwin*) # darwin 5.x on
-      # if running on 10.5 or later, the deployment target defaults
-      # to the OS version, if on x86, and 10.4, the deployment
-      # target defaults to 10.4. Don't you love it?
-      case ${MACOSX_DEPLOYMENT_TARGET-10.0},$host in
-	10.0,*86*-darwin8*|10.0,*-darwin[[91]]*)
-	  _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;;
-	10.[[012]][[,.]]*)
-	  _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;;
-	10.*)
-	  _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;;
+    darwin*)
+      case $MACOSX_DEPLOYMENT_TARGET,$host in
+        10.[[012]],*|,*powerpc*-darwin[[5-8]]*)
+          _lt_dar_allow_undefined='$wl-flat_namespace $wl-undefined ${wl}suppress' ;;
+        *)
+          _lt_dar_allow_undefined='$wl-undefined ${wl}dynamic_lookup' ;;
       esac
     ;;
   esac
@@ -1125,12 +1122,12 @@ m4_defun([_LT_DARWIN_LINKER_FEATURES],
     output_verbose_link_cmd=func_echo_all
     _LT_TAGVAR(archive_cmds, $1)="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dsymutil"
     _LT_TAGVAR(module_cmds, $1)="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dsymutil"
-    _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil"
-    _LT_TAGVAR(module_expsym_cmds, $1)="sed -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil"
+    _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring $_lt_dar_single_mod$_lt_dar_export_syms$_lt_dsymutil"
+    _LT_TAGVAR(module_expsym_cmds, $1)="$SED -e 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs \$deplibs \$compiler_flags$_lt_dar_export_syms$_lt_dsymutil"
     m4_if([$1], [CXX],
 [   if test yes != "$lt_cv_apple_cc_single_mod"; then
       _LT_TAGVAR(archive_cmds, $1)="\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dsymutil"
-      _LT_TAGVAR(archive_expsym_cmds, $1)="sed 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil"
+      _LT_TAGVAR(archive_expsym_cmds, $1)="$SED 's|^|_|' < \$export_symbols > \$output_objdir/\$libname-symbols.expsym~\$CC -r -keep_private_externs -nostdlib -o \$lib-master.o \$libobjs~\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$lib-master.o \$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring$_lt_dar_export_syms$_lt_dsymutil"
     fi
 ],[])
   else
@@ -1244,7 +1241,8 @@ _LT_DECL([], [ECHO], [1], [An echo program that protects backslashes])
 # _LT_WITH_SYSROOT
 # ----------------
 AC_DEFUN([_LT_WITH_SYSROOT],
-[AC_MSG_CHECKING([for sysroot])
+[m4_require([_LT_DECL_SED])dnl
+AC_MSG_CHECKING([for sysroot])
 AC_ARG_WITH([sysroot],
 [AS_HELP_STRING([--with-sysroot@<:@=DIR@:>@],
   [Search for dependent libraries within DIR (or the compiler's sysroot
@@ -1261,7 +1259,7 @@ case $with_sysroot in #(
    fi
    ;; #(
  /*)
-   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   lt_sysroot=`echo "$with_sysroot" | $SED -e "$sed_quote_subst"`
    ;; #(
  no|'')
    ;; #(
@@ -1291,7 +1289,7 @@ ia64-*-hpux*)
   # options accordingly.
   echo 'int i;' > conftest.$ac_ext
   if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.$ac_objext` in
+    case `$FILECMD conftest.$ac_objext` in
       *ELF-32*)
 	HPUX_IA64_MODE=32
 	;;
@@ -1308,7 +1306,7 @@ ia64-*-hpux*)
   echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
   if AC_TRY_EVAL(ac_compile); then
     if test yes = "$lt_cv_prog_gnu_ld"; then
-      case `/usr/bin/file conftest.$ac_objext` in
+      case `$FILECMD conftest.$ac_objext` in
 	*32-bit*)
 	  LD="${LD-ld} -melf32bsmip"
 	  ;;
@@ -1320,7 +1318,7 @@ ia64-*-hpux*)
 	;;
       esac
     else
-      case `/usr/bin/file conftest.$ac_objext` in
+      case `$FILECMD conftest.$ac_objext` in
 	*32-bit*)
 	  LD="${LD-ld} -32"
 	  ;;
@@ -1342,7 +1340,7 @@ mips64*-*linux*)
   echo '[#]line '$LINENO' "configure"' > conftest.$ac_ext
   if AC_TRY_EVAL(ac_compile); then
     emul=elf
-    case `/usr/bin/file conftest.$ac_objext` in
+    case `$FILECMD conftest.$ac_objext` in
       *32-bit*)
 	emul="${emul}32"
 	;;
@@ -1350,7 +1348,7 @@ mips64*-*linux*)
 	emul="${emul}64"
 	;;
     esac
-    case `/usr/bin/file conftest.$ac_objext` in
+    case `$FILECMD conftest.$ac_objext` in
       *MSB*)
 	emul="${emul}btsmip"
 	;;
@@ -1358,7 +1356,7 @@ mips64*-*linux*)
 	emul="${emul}ltsmip"
 	;;
     esac
-    case `/usr/bin/file conftest.$ac_objext` in
+    case `$FILECMD conftest.$ac_objext` in
       *N32*)
 	emul="${emul}n32"
 	;;
@@ -1378,14 +1376,14 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
   # not appear in the list.
   echo 'int i;' > conftest.$ac_ext
   if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.o` in
+    case `$FILECMD conftest.o` in
       *32-bit*)
 	case $host in
 	  x86_64-*kfreebsd*-gnu)
 	    LD="${LD-ld} -m elf_i386_fbsd"
 	    ;;
 	  x86_64-*linux*)
-	    case `/usr/bin/file conftest.o` in
+	    case `$FILECMD conftest.o` in
 	      *x86-64*)
 		LD="${LD-ld} -m elf32_x86_64"
 		;;
@@ -1453,7 +1451,7 @@ s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
   # options accordingly.
   echo 'int i;' > conftest.$ac_ext
   if AC_TRY_EVAL(ac_compile); then
-    case `/usr/bin/file conftest.o` in
+    case `$FILECMD conftest.o` in
     *64-bit*)
       case $lt_cv_prog_gnu_ld in
       yes*)
@@ -1726,7 +1724,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
     lt_cv_sys_max_cmd_len=8192;
     ;;
 
-  bitrig* | darwin* | dragonfly* | freebsd* | netbsd* | openbsd*)
+  bitrig* | darwin* | dragonfly* | freebsd* | midnightbsd* | netbsd* | openbsd*)
     # This has been around since 386BSD, at least.  Likely further.
     if test -x /sbin/sysctl; then
       lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
@@ -1769,7 +1767,7 @@ AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
   sysv5* | sco5v6* | sysv4.2uw2*)
     kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
     if test -n "$kargmax"; then
-      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[	 ]]//'`
+      lt_cv_sys_max_cmd_len=`echo $kargmax | $SED 's/.*[[	 ]]//'`
     else
       lt_cv_sys_max_cmd_len=32768
     fi
@@ -2570,7 +2568,7 @@ cygwin* | mingw* | pw32* | cegcc*)
     case $host_os in
     cygwin*)
       # Cygwin DLLs use 'cyg' prefix rather than 'lib'
-      soname_spec='`echo $libname | sed -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext'
+      soname_spec='`echo $libname | $SED -e 's/^lib/cyg/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext'
 m4_if([$1], [],[
       sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"])
       ;;
@@ -2580,7 +2578,7 @@ m4_if([$1], [],[
       ;;
     pw32*)
       # pw32 DLLs use 'pw' prefix rather than 'lib'
-      library_names_spec='`echo $libname | sed -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext'
+      library_names_spec='`echo $libname | $SED -e 's/^lib/pw/'``echo $release | $SED -e 's/[[.]]/-/g'`$versuffix$shared_ext'
       ;;
     esac
     dynamic_linker='Win32 ld.exe'
@@ -2606,7 +2604,7 @@ m4_if([$1], [],[
       done
       IFS=$lt_save_ifs
       # Convert to MSYS style.
-      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 's|\\\\|/|g' -e 's| \\([[a-zA-Z]]\\):| /\\1|g' -e 's|^ ||'`
       ;;
     cygwin*)
       # Convert to unix form, then to dos form, then back to unix form
@@ -2676,7 +2674,7 @@ dgux*)
   shlibpath_var=LD_LIBRARY_PATH
   ;;
 
-freebsd* | dragonfly*)
+freebsd* | dragonfly* | midnightbsd*)
   # DragonFly does not have aout.  When/if they implement a new
   # versioning mechanism, adjust this.
   if test -x /usr/bin/objformat; then
@@ -3475,7 +3473,7 @@ beos*)
 
 bsdi[[45]]*)
   lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
-  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_cmd='$FILECMD -L'
   lt_cv_file_magic_test_file=/shlib/libc.so
   ;;
 
@@ -3509,14 +3507,14 @@ darwin* | rhapsody*)
   lt_cv_deplibs_check_method=pass_all
   ;;
 
-freebsd* | dragonfly*)
+freebsd* | dragonfly* | midnightbsd*)
   if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
     case $host_cpu in
     i*86 )
       # Not sure whether the presence of OpenBSD here was a mistake.
       # Let's accept both of them until this is cleared up.
       lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
-      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_cmd=$FILECMD
       lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
       ;;
     esac
@@ -3530,7 +3528,7 @@ haiku*)
   ;;
 
 hpux10.20* | hpux11*)
-  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_cmd=$FILECMD
   case $host_cpu in
   ia64*)
     lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
@@ -3577,7 +3575,7 @@ netbsd*)
 
 newos6*)
   lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
-  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_cmd=$FILECMD
   lt_cv_file_magic_test_file=/usr/lib/libnls.so
   ;;
 
@@ -3704,13 +3702,13 @@ else
 	mingw*) lt_bad_file=conftest.nm/nofile ;;
 	*) lt_bad_file=/dev/null ;;
 	esac
-	case `"$tmp_nm" -B $lt_bad_file 2>&1 | sed '1q'` in
+	case `"$tmp_nm" -B $lt_bad_file 2>&1 | $SED '1q'` in
 	*$lt_bad_file* | *'Invalid file or object type'*)
 	  lt_cv_path_NM="$tmp_nm -B"
 	  break 2
 	  ;;
 	*)
-	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  case `"$tmp_nm" -p /dev/null 2>&1 | $SED '1q'` in
 	  */dev/null*)
 	    lt_cv_path_NM="$tmp_nm -p"
 	    break 2
@@ -3736,7 +3734,7 @@ else
     # Let the user override the test.
   else
     AC_CHECK_TOOLS(DUMPBIN, [dumpbin "link -dump"], :)
-    case `$DUMPBIN -symbols -headers /dev/null 2>&1 | sed '1q'` in
+    case `$DUMPBIN -symbols -headers /dev/null 2>&1 | $SED '1q'` in
     *COFF*)
       DUMPBIN="$DUMPBIN -symbols -headers"
       ;;
@@ -3976,7 +3974,7 @@ esac
 
 if test "$lt_cv_nm_interface" = "MS dumpbin"; then
   # Gets list of data symbols to import.
-  lt_cv_sys_global_symbol_to_import="sed -n -e 's/^I .* \(.*\)$/\1/p'"
+  lt_cv_sys_global_symbol_to_import="$SED -n -e 's/^I .* \(.*\)$/\1/p'"
   # Adjust the below global symbol transforms to fixup imported variables.
   lt_cdecl_hook=" -e 's/^I .* \(.*\)$/extern __declspec(dllimport) char \1;/p'"
   lt_c_name_hook=" -e 's/^I .* \(.*\)$/  {\"\1\", (void *) 0},/p'"
@@ -3994,20 +3992,20 @@ fi
 # Transform an extracted symbol line into a proper C declaration.
 # Some systems (esp. on ia64) link data and code symbols differently,
 # so use this general approach.
-lt_cv_sys_global_symbol_to_cdecl="sed -n"\
+lt_cv_sys_global_symbol_to_cdecl="$SED -n"\
 $lt_cdecl_hook\
 " -e 's/^T .* \(.*\)$/extern int \1();/p'"\
 " -e 's/^$symcode$symcode* .* \(.*\)$/extern char \1;/p'"
 
 # Transform an extracted symbol line into symbol name and symbol address
-lt_cv_sys_global_symbol_to_c_name_address="sed -n"\
+lt_cv_sys_global_symbol_to_c_name_address="$SED -n"\
 $lt_c_name_hook\
 " -e 's/^: \(.*\) .*$/  {\"\1\", (void *) 0},/p'"\
 " -e 's/^$symcode$symcode* .* \(.*\)$/  {\"\1\", (void *) \&\1},/p'"
 
 # Transform an extracted symbol line into symbol name with lib prefix and
 # symbol address.
-lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n"\
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="$SED -n"\
 $lt_c_name_lib_hook\
 " -e 's/^: \(.*\) .*$/  {\"\1\", (void *) 0},/p'"\
 " -e 's/^$symcode$symcode* .* \(lib.*\)$/  {\"\1\", (void *) \&\1},/p'"\
@@ -4049,9 +4047,9 @@ for ac_symprfx in "" "_"; do
 "     s[1]~prfx {split(s[1],t,\"@\"); print f,t[1],substr(t[1],length(prfx))}"\
 "     ' prfx=^$ac_symprfx]"
   else
-    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[	 ]]\($symcode$symcode*\)[[	 ]][[	 ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+    lt_cv_sys_global_symbol_pipe="$SED -n -e 's/^.*[[	 ]]\($symcode$symcode*\)[[	 ]][[	 ]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
   fi
-  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ __gnu_lto/d'"
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | $SED '/ __gnu_lto/d'"
 
   # Check to see that the pipe works correctly.
   pipe_works=no
@@ -4338,7 +4336,7 @@ m4_if([$1], [CXX], [
 	    ;;
 	esac
 	;;
-      freebsd* | dragonfly*)
+      freebsd* | dragonfly* | midnightbsd*)
 	# FreeBSD uses GNU C++
 	;;
       hpux9* | hpux10* | hpux11*)
@@ -4421,7 +4419,7 @@ m4_if([$1], [CXX], [
 	    _LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
 	    ;;
 	  *)
-	    case `$CC -V 2>&1 | sed 5q` in
+	    case `$CC -V 2>&1 | $SED 5q` in
 	    *Sun\ C*)
 	      # Sun C++ 5.9
 	      _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
@@ -4757,7 +4755,7 @@ m4_if([$1], [CXX], [
 	_LT_TAGVAR(lt_prog_compiler_static, $1)='-qstaticlink'
 	;;
       *)
-	case `$CC -V 2>&1 | sed 5q` in
+	case `$CC -V 2>&1 | $SED 5q` in
 	*Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [[1-7]].* | *Sun*Fortran*\ 8.[[0-3]]*)
 	  # Sun Fortran 8.3 passes all unrecognized flags to the linker
 	  _LT_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
@@ -5065,7 +5063,7 @@ dnl Note also adjust exclude_expsyms for C++ above.
       _LT_TAGVAR(whole_archive_flag_spec, $1)=
     fi
     supports_anon_versioning=no
-    case `$LD -v | $SED -e 's/([^)]\+)\s\+//' 2>&1` in
+    case `$LD -v | $SED -e 's/([[^)]]\+)\s\+//' 2>&1` in
       *GNU\ gold*) supports_anon_versioning=yes ;;
       *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
       *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
@@ -5192,7 +5190,7 @@ _LT_EOF
       # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
       # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-      _LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      _LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
       ;;
 
     gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
@@ -5235,7 +5233,7 @@ _LT_EOF
 	  _LT_TAGVAR(compiler_needs_object, $1)=yes
 	  ;;
 	esac
-	case `$CC -V 2>&1 | sed 5q` in
+	case `$CC -V 2>&1 | $SED 5q` in
 	*Sun\ C*)			# Sun C 5.9
 	  _LT_TAGVAR(whole_archive_flag_spec, $1)='$wl--whole-archive`new_convenience=; for conv in $convenience\"\"; do test -z \"$conv\" || new_convenience=\"$new_convenience,$conv\"; done; func_echo_all \"$new_convenience\"` $wl--no-whole-archive'
 	  _LT_TAGVAR(compiler_needs_object, $1)=yes
@@ -5247,7 +5245,7 @@ _LT_EOF
 
         if test yes = "$supports_anon_versioning"; then
           _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-            cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+            cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
             echo "local: *; };" >> $output_objdir/$libname.ver~
             $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib'
         fi
@@ -5263,7 +5261,7 @@ _LT_EOF
 	  _LT_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname -o $lib'
 	  if test yes = "$supports_anon_versioning"; then
 	    _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-              cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+              cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
               echo "local: *; };" >> $output_objdir/$libname.ver~
               $LD -shared $libobjs $deplibs $linker_flags -soname $soname -version-script $output_objdir/$libname.ver -o $lib'
 	  fi
@@ -5672,7 +5670,7 @@ _LT_EOF
       ;;
 
     # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
-    freebsd* | dragonfly*)
+    freebsd* | dragonfly* | midnightbsd*)
       _LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
       _LT_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
       _LT_TAGVAR(hardcode_direct, $1)=yes
@@ -6784,7 +6782,7 @@ if test yes != "$_lt_caught_CXX_error"; then
         _LT_TAGVAR(archive_cmds_need_lc, $1)=no
         ;;
 
-      freebsd* | dragonfly*)
+      freebsd* | dragonfly* | midnightbsd*)
         # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
         # conventions
         _LT_TAGVAR(ld_shlibs, $1)=yes
@@ -6921,7 +6919,7 @@ if test yes != "$_lt_caught_CXX_error"; then
 	# 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
 	# time.  Moving up from 0x10000000 also allows more sbrk(2) space.
 	_LT_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
-	_LT_TAGVAR(archive_expsym_cmds, $1)='sed "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+	_LT_TAGVAR(archive_expsym_cmds, $1)='$SED "s|^|_|" $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags $wl-h,$soname $wl--retain-symbols-file,$output_objdir/$soname.expsym $wl--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
 	;;
       irix5* | irix6*)
         case $cc_basename in
@@ -7061,13 +7059,13 @@ if test yes != "$_lt_caught_CXX_error"; then
 	    _LT_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname -o $lib'
 	    if test yes = "$supports_anon_versioning"; then
 	      _LT_TAGVAR(archive_expsym_cmds, $1)='echo "{ global:" > $output_objdir/$libname.ver~
-                cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+                cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
                 echo "local: *; };" >> $output_objdir/$libname.ver~
                 $CC -qmkshrobj $libobjs $deplibs $compiler_flags $wl-soname $wl$soname $wl-version-script $wl$output_objdir/$libname.ver -o $lib'
 	    fi
 	    ;;
 	  *)
-	    case `$CC -V 2>&1 | sed 5q` in
+	    case `$CC -V 2>&1 | $SED 5q` in
 	    *Sun\ C*)
 	      # Sun C++ 5.9
 	      _LT_TAGVAR(no_undefined_flag, $1)=' -zdefs'
@@ -8213,6 +8211,14 @@ _LT_DECL([], [DLLTOOL], [1], [DLL creation program])
 AC_SUBST([DLLTOOL])
 ])
 
+# _LT_DECL_FILECMD
+# ----------------
+# Check for a file(cmd) program that can be used to detect file type and magic
+m4_defun([_LT_DECL_FILECMD],
+[AC_CHECK_TOOL([FILECMD], [file], [:])
+_LT_DECL([], [FILECMD], [1], [A file(cmd) program that detects file types])
+])# _LD_DECL_FILECMD
+
 # _LT_DECL_SED
 # ------------
 # Check for a fully-functional sed program, that truncates

Some files were not shown because too many files changed in this diff