فهرست منبع

Merge pull request #26585 from akien-mga/pcre2-10.32

pcre2: Sync with upstream 10.32
Rémi Verschelde 6 سال پیش
والد
کامیت
fcabdf9e83
37فایلهای تغییر یافته به همراه2862 افزوده شده و 2137 حذف شده
  1. 3 3
      thirdparty/README.md
  2. 7 7
      thirdparty/pcre2/LICENCE
  3. 18 14
      thirdparty/pcre2/src/config.h
  4. 23 7
      thirdparty/pcre2/src/pcre2.h
  5. 4 3
      thirdparty/pcre2/src/pcre2_auto_possess.c
  6. 25 25
      thirdparty/pcre2/src/pcre2_chartables.c
  7. 233 103
      thirdparty/pcre2/src/pcre2_compile.c
  8. 9 3
      thirdparty/pcre2/src/pcre2_convert.c
  9. 230 41
      thirdparty/pcre2/src/pcre2_dfa_match.c
  10. 10 5
      thirdparty/pcre2/src/pcre2_error.c
  11. 4 4
      thirdparty/pcre2/src/pcre2_extuni.c
  12. 2 1
      thirdparty/pcre2/src/pcre2_find_bracket.c
  13. 47 84
      thirdparty/pcre2/src/pcre2_internal.h
  14. 17 0
      thirdparty/pcre2/src/pcre2_intmodedep.h
  15. 26 11
      thirdparty/pcre2/src/pcre2_jit_compile.c
  16. 1 8
      thirdparty/pcre2/src/pcre2_maketables.c
  17. 35 19
      thirdparty/pcre2/src/pcre2_match.c
  18. 2 1
      thirdparty/pcre2/src/pcre2_pattern_info.c
  19. 20 2
      thirdparty/pcre2/src/pcre2_serialize.c
  20. 37 1
      thirdparty/pcre2/src/pcre2_string_utils.c
  21. 4 2
      thirdparty/pcre2/src/pcre2_study.c
  22. 37 10
      thirdparty/pcre2/src/pcre2_substitute.c
  23. 196 182
      thirdparty/pcre2/src/pcre2_tables.c
  24. 1378 1345
      thirdparty/pcre2/src/pcre2_ucd.c
  25. 27 21
      thirdparty/pcre2/src/pcre2_ucp.h
  26. 23 6
      thirdparty/pcre2/src/sljit/sljitConfigInternal.h
  27. 8 1
      thirdparty/pcre2/src/sljit/sljitExecAllocator.c
  28. 9 1
      thirdparty/pcre2/src/sljit/sljitLir.c
  29. 10 10
      thirdparty/pcre2/src/sljit/sljitLir.h
  30. 160 90
      thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
  31. 108 4
      thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
  32. 2 2
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
  33. 2 2
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
  34. 39 9
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
  35. 51 28
      thirdparty/pcre2/src/sljit/sljitNativeX86_32.c
  36. 55 65
      thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
  37. 0 17
      thirdparty/pcre2/src/sljit/sljitNativeX86_common.c

+ 3 - 3
thirdparty/README.md

@@ -436,16 +436,16 @@ Files extracted from upstream source:
 ## pcre2
 
 - Upstream: http://www.pcre.org/
-- Version: 10.31
+- Version: 10.32
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
 
 - Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
-- All .h files in src/
+- All .h files in src/ apart from pcre2posix.h
+- src/pcre2_jit_compile.c
 - src/pcre2_jit_match.c
 - src/pcre2_jit_misc.c
-- src/pcre2_jit_maketables.c
 - src/sljit/*
 - AUTHORS and LICENCE
 

+ 7 - 7
thirdparty/pcre2/LICENCE

@@ -4,11 +4,11 @@ PCRE2 LICENCE
 PCRE2 is a library of functions to support regular expressions whose syntax
 and semantics are as close as possible to those of the Perl 5 language.
 
-Release 10 of PCRE2 is distributed under the terms of the "BSD" licence, as
-specified below, with one exemption for certain binary redistributions. The
-documentation for PCRE2, supplied in the "doc" directory, is distributed under
-the same terms as the software itself. The data in the testdata directory is
-not copyrighted and is in the public domain.
+Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
+licence, as specified below, with one exemption for certain binary
+redistributions. The documentation for PCRE2, supplied in the "doc" directory,
+is distributed under the same terms as the software itself. The data in the
+testdata directory is not copyrighted and is in the public domain.
 
 The basic library functions are written in C and are freestanding. Also
 included in the distribution is a just-in-time compiler that can be used to
@@ -35,7 +35,7 @@ PCRE2 JUST-IN-TIME COMPILATION SUPPORT
 
 Written by:       Zoltan Herczeg
 Email local part: hzmester
-Emain domain:     freemail.hu
+Email domain:     freemail.hu
 
 Copyright(c) 2010-2018 Zoltan Herczeg
 All rights reserved.
@@ -46,7 +46,7 @@ STACK-LESS JUST-IN-TIME COMPILER
 
 Written by:       Zoltan Herczeg
 Email local part: hzmester
-Emain domain:     freemail.hu
+Email domain:     freemail.hu
 
 Copyright(c) 2009-2018 Zoltan Herczeg
 All rights reserved.

+ 18 - 14
thirdparty/pcre2/src/config.h

@@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
 but if you do, default values will be taken from config.h for non-boolean
 macros that are not defined on the command line.
 
-Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
-(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
-macros are listed as a commented #undef in config.h.generic. Macros such as
-MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
+Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
+defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
+such macros are listed as a commented #undef in config.h.generic. Macros such
+as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
 surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
 
 PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
@@ -132,17 +132,18 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to 1 if you have the <zlib.h> header file. */
 /* #undef HAVE_ZLIB_H */
 
-/* This limits the amount of memory that pcre2_match() may use while matching
-   a pattern. The value is in kilobytes. */
+/* This limits the amount of memory that may be used while matching a pattern.
+   It applies to both pcre2_match() and pcre2_dfa_match(). It does not apply
+   to JIT matching. The value is in kibibytes (units of 1024 bytes). */
 #ifndef HEAP_LIMIT
 #define HEAP_LIMIT 20000000
 #endif
 
 /* The value of LINK_SIZE determines the number of bytes used to store links
    as offsets within the compiled regex. The default is 2, which allows for
-   compiled patterns up to 64K long. This covers the vast majority of cases.
-   However, PCRE2 can also be compiled to use 3 or 4 bytes instead. This
-   allows for longer patterns in extreme cases. */
+   compiled patterns up to 65535 code units long. This covers the vast
+   majority of cases. However, PCRE2 can also be compiled to use 3 or 4 bytes
+   instead. This allows for longer patterns in extreme cases. */
 #ifndef LINK_SIZE
 #define LINK_SIZE 2
 #endif
@@ -155,7 +156,8 @@ sure both macros are undefined; an emulation function will then be used. */
 
 /* The value of MATCH_LIMIT determines the default number of times the
    pcre2_match() function can record a backtrack position during a single
-   matching attempt. There is a runtime interface for setting a different
+   matching attempt. The value is also used to limit a loop counter in
+   pcre2_dfa_match(). There is a runtime interface for setting a different
    limit. The limit exists in order to catch runaway regular expressions that
    take for ever to determine that they do not match. The default is set very
    large so that it does not accidentally catch legitimate cases. */
@@ -170,7 +172,9 @@ sure both macros are undefined; an emulation function will then be used. */
    MATCH_LIMIT_DEPTH provides this facility. To have any useful effect, it
    must be less than the value of MATCH_LIMIT. The default is to use the same
    value as MATCH_LIMIT. There is a runtime method for setting a different
-   limit. */
+   limit. In the case of pcre2_dfa_match(), this limit controls the depth of
+   the internal nested function calls that are used for pattern recursions,
+   lookarounds, and atomic groups. */
 #ifndef MATCH_LIMIT_DEPTH
 #define MATCH_LIMIT_DEPTH MATCH_LIMIT
 #endif
@@ -210,7 +214,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.31"
+#define PACKAGE_STRING "PCRE2 10.32"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -219,7 +223,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.31"
+#define PACKAGE_VERSION "10.32"
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -339,7 +343,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 
 /* Version number of package */
-#define VERSION "10.31"
+#define VERSION "10.32"
 
 /* Define to 1 if on MINIX. */
 /* #undef _MINIX */

+ 23 - 7
thirdparty/pcre2/src/pcre2.h

@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, second API, to be
 #included by applications that call PCRE2 functions.
 
-           Copyright (c) 2016-2017 University of Cambridge
+           Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,16 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* The current PCRE version information. */
 
-#define PCRE2_MAJOR          10
-#define PCRE2_MINOR          31
-#define PCRE2_PRERELEASE     
-#define PCRE2_DATE           2018-02-12
+#define PCRE2_MAJOR           10
+#define PCRE2_MINOR           32
+#define PCRE2_PRERELEASE      
+#define PCRE2_DATE            2018-09-10
+
+/* For the benefit of systems without stdint.h, an alternative is to use
+inttypes.h. The existence of these headers is checked by configure or CMake. */
+
+#define PCRE2_HAVE_STDINT_H   1
+#define PCRE2_HAVE_INTTYPES_H 1
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -81,12 +87,18 @@ set, we ensure here that it has no effect. */
 #define PCRE2_CALL_CONVENTION
 #endif
 
-/* Have to include limits.h, stdlib.h and stdint.h to ensure that size_t and
-uint8_t, UCHAR_MAX, etc are defined. */
+/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
+that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
+header, the relevant values must be provided by some other means. */
 
 #include <limits.h>
 #include <stdlib.h>
+
+#if PCRE2_HAVE_STDINT_H
 #include <stdint.h>
+#elif PCRE2_HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
 
 /* Allow for C++ users compiling this directly. */
 
@@ -269,6 +281,7 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE       156
 #define PCRE2_ERROR_BACKSLASH_G_SYNTAX             157
 #define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
+/* Error 159 is obsolete and should now never occur */
 #define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED      159
 #define PCRE2_ERROR_VERB_UNKNOWN                   160
 #define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG      161
@@ -303,6 +316,8 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP      190
 #define PCRE2_ERROR_NO_SURROGATES_IN_UTF16         191
 #define PCRE2_ERROR_BAD_LITERAL_OPTIONS            192
+#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE      193
+#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS      194
 
 
 /* "Expected" matching error codes: no match and partial match. */
@@ -387,6 +402,7 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
 #define PCRE2_ERROR_HEAPLIMIT         (-63)
 #define PCRE2_ERROR_CONVERT_SYNTAX    (-64)
+#define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
 
 
 /* Request types for pcre2_pattern_info() */

+ 4 - 3
thirdparty/pcre2/src/pcre2_auto_possess.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -505,7 +505,7 @@ Arguments:
   utf         TRUE in UTF mode
   cb          compile data block
   base_list   the data list of the base opcode
-  base_end    the end of the data list
+  base_end    the end of the base opcode
   rec_limit   points to recursion depth counter
 
 Returns:      TRUE if the auto-possessification is possible
@@ -730,7 +730,7 @@ for(;;)
       if ((*xclass_flags & XCL_MAP) == 0)
         {
         /* No bits are set for characters < 256. */
-        if (list[1] == 0) return TRUE;
+        if (list[1] == 0) return (*xclass_flags & XCL_NOT) == 0;
         /* Might be an empty repeat. */
         continue;
         }
@@ -1235,6 +1235,7 @@ for (;;)
 #endif
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
     case OP_THEN_ARG:

+ 25 - 25
thirdparty/pcre2/src/pcre2_chartables.c

@@ -2,23 +2,24 @@
 *      Perl-Compatible Regular Expressions       *
 *************************************************/
 
-/* This file contains character tables that are used when no external tables
-are passed to PCRE2 by the application that calls it. The tables are used only
-for characters whose code values are less than 256.
-
-This is a default version of the tables that assumes ASCII encoding. A program
-called dftables (which is distributed with PCRE2) can be used to build
-alternative versions of this file. This is necessary if you are running in an
-EBCDIC environment, or if you want to default to a different encoding, for
-example ISO-8859-1. When dftables is run, it creates these tables in the
-current locale. If PCRE2 is configured with --enable-rebuild-chartables, this
-happens automatically.
-
-The following #includes are present because without them gcc 4.x may remove the
-array definition from the final binary if PCRE2 is built into a static library
-and dead code stripping is activated. This leads to link errors. Pulling in the
-header ensures that the array gets flagged as "someone outside this compilation
-unit might reference this" and so it will always be supplied to the linker. */
+/* This file was automatically written by the dftables auxiliary
+program. It contains character tables that are used when no external
+tables are passed to PCRE2 by the application that calls it. The tables
+are used only for characters whose code values are less than 256. */
+
+/*The dftables program (which is distributed with PCRE2) can be used to
+build alternative versions of this file. This is necessary if you are
+running in an EBCDIC environment, or if you want to default to a different
+encoding, for example ISO-8859-1. When dftables is run, it creates these
+tables in the current locale. This happens automatically if PCRE2 is
+configured with --enable-rebuild-chartables. */
+
+/* The following #include is present because without it gcc 4.x may remove
+the array definition from the final binary if PCRE2 is built into a static
+library and dead code stripping is activated. This leads to link errors.
+Pulling in the header ensures that the array gets flagged as "someone
+outside this compilation unit might reference this" and so it will always
+be supplied to the linker. */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -101,7 +102,7 @@ const uint8_t PRIV(default_tables)[] = {
 /* This table contains bit maps for various character classes. Each map is 32
 bytes long and the bits run from the least significant end of each byte. The
 classes that have their own maps are: space, xdigit, digit, upper, lower, word,
-graph, print, punct, and cntrl. Other classes are built from combinations. */
+graph print, punct, and cntrl. Other classes are built from combinations. */
 
   0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -159,25 +160,24 @@ graph, print, punct, and cntrl. Other classes are built from combinations. */
   0x04   decimal digit
   0x08   hexadecimal digit
   0x10   alphanumeric or '_'
-  0x80   regular expression metacharacter or binary zero
 */
 
-  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
   0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /*   8- 15 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
-  0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
-  0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
+  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
+  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
-  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
+  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
-  0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
+  0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /*  X - _  */
   0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
-  0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
+  0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

+ 233 - 103
thirdparty/pcre2/src/pcre2_compile.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -63,8 +63,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* Other debugging code can be enabled by these defines. */
 
-// #define DEBUG_SHOW_CAPTURES
-// #define DEBUG_SHOW_PARSED
+/* #define DEBUG_SHOW_CAPTURES */
+/* #define DEBUG_SHOW_PARSED */
 
 /* There are a few things that vary with different code unit sizes. Handle them
 by defining macros in order to minimize #if usage. */
@@ -250,34 +250,35 @@ is present where expected in a conditional group. */
 #define META_LOOKBEHINDNOT    0x80250000u  /* (?<! */
 
 /* These must be kept in this order, with consecutive values, and the _ARG
-versions of PRUNE, SKIP, and THEN immediately after their non-argument
+versions of COMMIT, PRUNE, SKIP, and THEN immediately after their non-argument
 versions. */
 
 #define META_MARK             0x80260000u  /* (*MARK) */
 #define META_ACCEPT           0x80270000u  /* (*ACCEPT) */
-#define META_COMMIT           0x80280000u  /* (*COMMIT) */
-#define META_FAIL             0x80290000u  /* (*FAIL) */
-#define META_PRUNE            0x802a0000u  /* These pairs must    */
-#define META_PRUNE_ARG        0x802b0000u  /*   be                */
-#define META_SKIP             0x802c0000u  /*     kept            */
-#define META_SKIP_ARG         0x802d0000u  /*         in          */
-#define META_THEN             0x802e0000u  /*           this      */
-#define META_THEN_ARG         0x802f0000u  /*               order */
+#define META_FAIL             0x80280000u  /* (*FAIL) */
+#define META_COMMIT           0x80290000u  /* These               */
+#define META_COMMIT_ARG       0x802a0000u  /*   pairs             */
+#define META_PRUNE            0x802b0000u  /*     must            */
+#define META_PRUNE_ARG        0x802c0000u  /*       be            */
+#define META_SKIP             0x802d0000u  /*         kept        */
+#define META_SKIP_ARG         0x802e0000u  /*           in        */
+#define META_THEN             0x802f0000u  /*             this    */
+#define META_THEN_ARG         0x80300000u  /*               order */
 
 /* These must be kept in groups of adjacent 3 values, and all together. */
 
-#define META_ASTERISK         0x80300000u  /* *  */
-#define META_ASTERISK_PLUS    0x80310000u  /* *+ */
-#define META_ASTERISK_QUERY   0x80320000u  /* *? */
-#define META_PLUS             0x80330000u  /* +  */
-#define META_PLUS_PLUS        0x80340000u  /* ++ */
-#define META_PLUS_QUERY       0x80350000u  /* +? */
-#define META_QUERY            0x80360000u  /* ?  */
-#define META_QUERY_PLUS       0x80370000u  /* ?+ */
-#define META_QUERY_QUERY      0x80380000u  /* ?? */
-#define META_MINMAX           0x80390000u  /* {n,m}  repeat */
-#define META_MINMAX_PLUS      0x803a0000u  /* {n,m}+ repeat */
-#define META_MINMAX_QUERY     0x803b0000u  /* {n,m}? repeat */
+#define META_ASTERISK         0x80310000u  /* *  */
+#define META_ASTERISK_PLUS    0x80320000u  /* *+ */
+#define META_ASTERISK_QUERY   0x80330000u  /* *? */
+#define META_PLUS             0x80340000u  /* +  */
+#define META_PLUS_PLUS        0x80350000u  /* ++ */
+#define META_PLUS_QUERY       0x80360000u  /* +? */
+#define META_QUERY            0x80370000u  /* ?  */
+#define META_QUERY_PLUS       0x80380000u  /* ?+ */
+#define META_QUERY_QUERY      0x80390000u  /* ?? */
+#define META_MINMAX           0x803a0000u  /* {n,m}  repeat */
+#define META_MINMAX_PLUS      0x803b0000u  /* {n,m}+ repeat */
+#define META_MINMAX_QUERY     0x803c0000u  /* {n,m}? repeat */
 
 #define META_FIRST_QUANTIFIER META_ASTERISK
 #define META_LAST_QUANTIFIER  META_MINMAX_QUERY
@@ -327,8 +328,9 @@ static unsigned char meta_extra_lengths[] = {
   SIZEOFFSET,    /* META_LOOKBEHINDNOT */
   1,             /* META_MARK - plus the string length */
   0,             /* META_ACCEPT */
-  0,             /* META_COMMIT */
   0,             /* META_FAIL */
+  0,             /* META_COMMIT */
+  1,             /* META_COMMIT_ARG - plus the string length */
   0,             /* META_PRUNE */
   1,             /* META_PRUNE_ARG - plus the string length */
   0,             /* META_SKIP */
@@ -510,17 +512,17 @@ static const short int escapes[] = {
      -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
      CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
      CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
-     CHAR_GRAVE_ACCENT,       ESC_a,
+     CHAR_GRAVE_ACCENT,       CHAR_BEL,
      -ESC_b,                  0,
-     -ESC_d,                  ESC_e,
-     ESC_f,                   0,
+     -ESC_d,                  CHAR_ESC,
+     CHAR_FF,                 0,
      -ESC_h,                  0,
      0,                       -ESC_k,
      0,                       0,
-     ESC_n,                   0,
+     CHAR_LF,                 0,
      -ESC_p,                  0,
-     ESC_r,                   -ESC_s,
-     ESC_tee,                 0,
+     CHAR_CR,                 -ESC_s,
+     CHAR_HT,                 0,
      -ESC_v,                  -ESC_w,
      0,                       0,
      -ESC_z
@@ -544,22 +546,22 @@ because it is defined as 'a', which of course picks up the ASCII value. */
 #endif
 
 static const short int escapes[] = {
-/*  80 */        ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
-/*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
-/*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
-/*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
-/*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
-/*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
-/*  B0 */     0,     0,      0,       0,      0,     0,      0,      0,
-/*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
-/*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
-/*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
-/*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
-/*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
-/*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
-/*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
-/*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
-/*  F8 */     0,     0
+/*  80 */         CHAR_BEL, -ESC_b,       0, -ESC_d, CHAR_ESC, CHAR_FF,      0,
+/*  88 */ -ESC_h,        0,      0,     '{',      0,        0,       0,      0,
+/*  90 */      0,        0, -ESC_k,       0,      0,  CHAR_LF,       0, -ESC_p,
+/*  98 */      0,  CHAR_CR,      0,     '}',      0,        0,       0,      0,
+/*  A0 */      0,      '~', -ESC_s, CHAR_HT,      0,   -ESC_v,  -ESC_w,      0,
+/*  A8 */      0,   -ESC_z,      0,       0,      0,      '[',       0,      0,
+/*  B0 */      0,        0,      0,       0,      0,        0,       0,      0,
+/*  B8 */      0,        0,      0,       0,      0,      ']',     '=',    '-',
+/*  C0 */    '{',   -ESC_A, -ESC_B,  -ESC_C, -ESC_D,   -ESC_E,       0, -ESC_G,
+/*  C8 */ -ESC_H,        0,      0,       0,      0,        0,       0,      0,
+/*  D0 */    '}',        0, -ESC_K,       0,      0,   -ESC_N,       0, -ESC_P,
+/*  D8 */ -ESC_Q,   -ESC_R,      0,       0,      0,        0,       0,      0,
+/*  E0 */   '\\',        0, -ESC_S,       0,      0,   -ESC_V,  -ESC_W, -ESC_X,
+/*  E8 */      0,   -ESC_Z,      0,       0,      0,        0,       0,      0,
+/*  F0 */      0,        0,      0,       0,      0,        0,       0,      0,
+/*  F8 */      0,        0
 };
 
 /* We also need a table of characters that may follow \c in an EBCDIC
@@ -586,9 +588,9 @@ static const char verbnames[] =
   "\0"                       /* Empty name is a shorthand for MARK */
   STRING_MARK0
   STRING_ACCEPT0
-  STRING_COMMIT0
   STRING_F0
   STRING_FAIL0
+  STRING_COMMIT0
   STRING_PRUNE0
   STRING_SKIP0
   STRING_THEN;
@@ -596,11 +598,11 @@ static const char verbnames[] =
 static const verbitem verbs[] = {
   { 0, META_MARK,   +1 },  /* > 0 => must have an argument */
   { 4, META_MARK,   +1 },
-  { 6, META_ACCEPT, -1 },  /* < 0 => must not have an argument */
-  { 6, META_COMMIT, -1 },
+  { 6, META_ACCEPT, -1 },  /* < 0 => Optional argument, convert to pre-MARK */
   { 1, META_FAIL,   -1 },
   { 4, META_FAIL,   -1 },
-  { 5, META_PRUNE,   0 },  /* Argument is optional; bump META code if found */
+  { 6, META_COMMIT,  0 },
+  { 5, META_PRUNE,   0 },  /* Optional argument; bump META code if found */
   { 4, META_SKIP,    0 },
   { 4, META_THEN,    0 }
 };
@@ -610,8 +612,8 @@ static const int verbcount = sizeof(verbs)/sizeof(verbitem);
 /* Verb opcodes, indexed by their META code offset from META_MARK. */
 
 static const uint32_t verbops[] = {
-  OP_MARK, OP_ACCEPT, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_PRUNE_ARG, OP_SKIP,
-  OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
+  OP_MARK, OP_ACCEPT, OP_FAIL, OP_COMMIT, OP_COMMIT_ARG, OP_PRUNE,
+  OP_PRUNE_ARG, OP_SKIP, OP_SKIP_ARG, OP_THEN, OP_THEN_ARG };
 
 /* Offsets from OP_STAR for case-independent and negative repeat opcodes. */
 
@@ -729,7 +731,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
        ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
        ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
        ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
-       ERR91, ERR92};
+       ERR91, ERR92, ERR93, ERR94 };
 
 /* This is a table of start-of-pattern options such as (*UTF) and settings such
 as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -976,8 +978,8 @@ for (;;)
     case META_POSIX_NEG: fprintf(stderr, "META_POSIX_NEG %d", *pptr++); break;
 
     case META_ACCEPT: fprintf(stderr, "META (*ACCEPT)"); break;
-    case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
     case META_FAIL: fprintf(stderr, "META (*FAIL)"); break;
+    case META_COMMIT: fprintf(stderr, "META (*COMMIT)"); break;
     case META_PRUNE: fprintf(stderr, "META (*PRUNE)"); break;
     case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
     case META_THEN: fprintf(stderr, "META (*THEN)"); break;
@@ -1067,6 +1069,10 @@ for (;;)
     fprintf(stderr, "META (*MARK:");
     goto SHOWARG;
 
+    case META_COMMIT_ARG:
+    fprintf(stderr, "META (*COMMIT:");
+    goto SHOWARG;
+
     case META_PRUNE_ARG:
     fprintf(stderr, "META (*PRUNE:");
     goto SHOWARG;
@@ -1435,6 +1441,48 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
     escape = -i;                    /* Else return a special escape */
     if (cb != NULL && (escape == ESC_P || escape == ESC_p || escape == ESC_X))
       cb->external_flags |= PCRE2_HASBKPORX;   /* Note \P, \p, or \X */
+
+    /* Perl supports \N{name} for character names and \N{U+dddd} for numerical
+    Unicode code points, as well as plain \N for "not newline". PCRE does not
+    support \N{name}. However, it does support quantification such as \N{2,3},
+    so if \N{ is not followed by U+dddd we check for a quantifier. */
+
+    if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
+      {
+      PCRE2_SPTR p = ptr + 1;
+
+      /* \N{U+ can be handled by the \x{ code. However, this construction is
+      not valid in EBCDIC environments because it specifies a Unicode
+      character, not a codepoint in the local code. For example \N{U+0041}
+      must be "A" in all environments. Also, in Perl, \N{U+ forces Unicode
+      casing semantics for the entire pattern, so allow it only in UTF (i.e.
+      Unicode) mode. */
+
+      if (ptrend - p > 1 && *p == CHAR_U && p[1] == CHAR_PLUS)
+        {
+#ifdef EBCDIC
+        *errorcodeptr = ERR93;
+#else
+        if (utf)
+          {
+          ptr = p + 1;
+          escape = 0;   /* Not a fancy escape after all */
+          goto COME_FROM_NU;
+          }
+        else *errorcodeptr = ERR93;
+#endif
+        }
+
+      /* Give an error if what follows is not a quantifier, but don't override
+      an error set by the quantifier reader (e.g. number overflow). */
+
+      else
+        {
+        if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
+             *errorcodeptr == 0)
+          *errorcodeptr = ERR37;
+        }
+      }
     }
   }
 
@@ -1462,6 +1510,7 @@ else
     /* A number of Perl escapes are not handled by PCRE. We give an explicit
     error. */
 
+    case CHAR_F:
     case CHAR_l:
     case CHAR_L:
     *errorcodeptr = ERR37;
@@ -1719,6 +1768,9 @@ else
       {
       if (ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET)
         {
+#ifndef EBCDIC
+        COME_FROM_NU:
+#endif
         if (++ptr >= ptrend || *ptr == CHAR_RIGHT_CURLY_BRACKET)
           {
           *errorcodeptr = ERR78;
@@ -1852,19 +1904,6 @@ else
     }
   }
 
-/* Perl supports \N{name} for character names, as well as plain \N for "not
-newline". PCRE does not support \N{name}. However, it does support
-quantification such as \N{2,3}. */
-
-if (escape == ESC_N && ptr < ptrend && *ptr == CHAR_LEFT_CURLY_BRACKET &&
-    ptrend - ptr > 2)
-  {
-  PCRE2_SPTR p = ptr + 1;
-  if (!read_repeat_counts(&p, ptrend, NULL, NULL, errorcodeptr) &&
-       *errorcodeptr == 0)
-    *errorcodeptr = ERR37;
-  }
-
 /* Set the pointer to the next character before returning. */
 
 *ptrptr = ptr;
@@ -2251,11 +2290,14 @@ typedef struct nest_save {
 #define NSF_RESET          0x0001u
 #define NSF_CONDASSERT     0x0002u
 
-/* Of the options that are changeable within the pattern, these are tracked
-during parsing. The rest are used from META_OPTIONS items when compiling. */
+/* Options that are changeable within the pattern must be tracked during
+parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
+but all must be tracked so that META_OPTIONS items set the correct values for
+the main compiling phase. */
 
-#define PARSE_TRACKED_OPTIONS \
-  (PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
+#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
+  PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+  PCRE2_UNGREEDY)
 
 /* States used for analyzing ranges in character classes. The two OK values
 must be last. */
@@ -2290,6 +2332,7 @@ uint32_t *previous_callout = NULL;
 uint32_t *parsed_pattern = cb->parsed_pattern;
 uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
 uint32_t meta_quantifier = 0;
+uint32_t add_after_mark = 0;
 uint16_t nest_depth = 0;
 int after_manual_callout = 0;
 int expect_cond_assert = 0;
@@ -2434,11 +2477,17 @@ while (ptr < ptrend)
         /* EITHER: not both options set */
         ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
                     (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
-        /* OR: character > 255 */
-        c > 255 ||
-        /* OR: not a # comment or white space */
-        (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
-       ))
+#ifdef SUPPORT_UNICODE
+        /* OR: character > 255 AND not Unicode Pattern White Space */
+        (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
+#endif
+        /* OR: not a # comment or isspace() white space */
+        (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
+#ifdef SUPPORT_UNICODE
+        /* and not CHAR_NEL when Unicode is supported */
+          && c != CHAR_NEL
+#endif
+       )))
     {
     PCRE2_SIZE verbnamelength;
 
@@ -2461,6 +2510,16 @@ while (ptr < ptrend)
         goto FAILED;
         }
       *verblengthptr = (uint32_t)verbnamelength;
+
+      /* If this name was on a verb such as (*ACCEPT) which does not continue,
+      a (*MARK) was generated for the name. We now add the original verb as the
+      next item. */
+
+      if (add_after_mark != 0)
+        {
+        *parsed_pattern++ = add_after_mark;
+        add_after_mark = 0;
+        }
       break;
 
       case CHAR_BACKSLASH:
@@ -2510,11 +2569,18 @@ while (ptr < ptrend)
 
   /* Skip over whitespace and # comments in extended mode. Note that c is a
   character, not a code unit, so we must not use MAX_255 to test its size
-  because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
+  because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
+  whitespace characters are those designated as "Pattern White Space" by
+  Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
+  U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
+  subset of space characters that match \h and \v. */
 
   if ((options & PCRE2_EXTENDED) != 0)
     {
     if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
+#ifdef SUPPORT_UNICODE
+    if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
+#endif
     if (c == CHAR_NUMBER_SIGN)
       {
       while (ptr < ptrend)
@@ -3206,7 +3272,6 @@ while (ptr < ptrend)
         tempptr = ptr;
         escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode,
           options, TRUE, cb);
-
         if (errorcode != 0)
           {
           CLASS_ESCAPE_FAILED:
@@ -3454,13 +3519,25 @@ while (ptr < ptrend)
 
         if (*ptr++ == CHAR_COLON)   /* Skip past : or ) */
           {
-          if (verbs[i].has_arg < 0)  /* Argument is forbidden */
+          /* Some optional arguments can be treated as a preceding (*MARK) */
+
+          if (verbs[i].has_arg < 0)
             {
-            errorcode = ERR59;
-            goto FAILED;
+            add_after_mark = verbs[i].meta;
+            *parsed_pattern++ = META_MARK;
             }
-          *parsed_pattern++ = verbs[i].meta +
-            ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+
+          /* The remaining verbs with arguments (except *MARK) need a different
+          opcode. */
+
+          else
+            {
+            *parsed_pattern++ = verbs[i].meta +
+              ((verbs[i].meta != META_MARK)? 0x00010000u:0);
+            }
+
+          /* Set up for reading the name in the main loop. */
+
           verblengthptr = parsed_pattern++;
           verbnamestart = ptr;
           inverbname = TRUE;
@@ -3521,17 +3598,39 @@ while (ptr < ptrend)
 
       else
         {
+        BOOL hyphenok = TRUE;
+        uint32_t oldoptions = options;
+
         top_nest->reset_group = 0;
         top_nest->max_group = 0;
         set = unset = 0;
         optset = &set;
 
+        /* ^ at the start unsets imnsx and disables the subsequent use of - */
+
+        if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
+          {
+          options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
+                       PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
+          hyphenok = FALSE;
+          ptr++;
+          }
+
         while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
                                *ptr != CHAR_COLON)
           {
           switch (*ptr++)
             {
-            case CHAR_MINUS: optset = &unset; break;
+            case CHAR_MINUS:
+            if (!hyphenok)
+              {
+              errorcode = ERR94;
+              ptr--;  /* Correct the offset */
+              goto FAILED;
+              }
+            optset = &unset;
+            hyphenok = FALSE;
+            break;
 
             case CHAR_J:  /* Record that it changed in the external options */
             *optset |= PCRE2_DUPNAMES;
@@ -3591,7 +3690,7 @@ while (ptr < ptrend)
 
         /* If nothing changed, no need to record. */
 
-        if (set != 0 || unset != 0)
+        if (options != oldoptions)
           {
           *parsed_pattern++ = META_OPTIONS;
           *parsed_pattern++ = options;
@@ -3896,9 +3995,8 @@ while (ptr < ptrend)
         if (*ptr == CHAR_DOT)
           {
           if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
-          if (!read_number(&ptr, ptrend, -1, 99 , ERR79, &minor, &errorcode))
-            goto FAILED;
-          if (minor < 10) minor *= 10;
+          minor = (*ptr++ - CHAR_0) * 10;
+          if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
           if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
             goto BAD_VERSION_CONDITION;
           }
@@ -4261,11 +4359,11 @@ goto FAILED;
 
 
 /*************************************************
-*      Find first significant op code            *
+*       Find first significant opcode            *
 *************************************************/
 
 /* This is called by several functions that scan a compiled expression looking
-for a fixed first character, or an anchoring op code etc. It skips over things
+for a fixed first character, or an anchoring opcode etc. It skips over things
 that do not influence this. For some calls, it makes sense to skip negative
 forward and all backward assertions, and also the \b assertion; for others it
 does not.
@@ -5472,7 +5570,7 @@ for (;; pptr++)
       set xclass = TRUE. Then, in the pre-compile phase, accumulate the length
       of the extra data and reset the pointer. This is so that very large
       classes that contain a zillion wide characters or Unicode property tests
-      do not overwrite the work space (which is on the stack). */
+      do not overwrite the workspace (which is on the stack). */
 
       if (class_uchardata > class_uchardata_base)
         {
@@ -5563,7 +5661,7 @@ for (;; pptr++)
       if (class_has_8bitchar > 0)
         {
         *code++ |= XCL_MAP;
-        memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
+        (void)memmove(code + (32 / sizeof(PCRE2_UCHAR)), code,
           CU2BYTES(class_uchardata - code));
         if (negate_class && !xclass_has_prop)
           for (i = 0; i < 32; i++) classbits[i] = ~classbits[i];
@@ -5655,6 +5753,7 @@ for (;; pptr++)
     cb->had_pruneorskip = TRUE;
     /* Fall through */
     case META_MARK:
+    case META_COMMIT_ARG:
     VERB_ARG:
     *code++ = verbops[(meta - META_MARK) >> 16];
     /* The length is in characters. */
@@ -6509,7 +6608,7 @@ for (;; pptr++)
 
       /* Wrap the recursion call in OP_BRA brackets. */
 
-      memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
+      (void)memmove(previous + 1 + LINK_SIZE, previous, CU2BYTES(1 + LINK_SIZE));
       op_previous = *previous = OP_BRA;
       PUT(previous, 1, 2 + 2*LINK_SIZE);
       previous[2 + 2*LINK_SIZE] = OP_KET;
@@ -6589,7 +6688,7 @@ for (;; pptr++)
 
           if (repeat_max <= 1 || repeat_max == REPEAT_UNLIMITED)
             {
-            memmove(previous + 1, previous, CU2BYTES(len));
+            (void)memmove(previous + 1, previous, CU2BYTES(len));
             code++;
             if (repeat_max == 0)
               {
@@ -6610,7 +6709,7 @@ for (;; pptr++)
           else
             {
             int linkoffset;
-            memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
+            (void)memmove(previous + 2 + LINK_SIZE, previous, CU2BYTES(len));
             code += 2 + LINK_SIZE;
             *previous++ = OP_BRAZERO + repeat_type;
             *previous++ = OP_BRA;
@@ -6811,7 +6910,7 @@ for (;; pptr++)
               if (*bracode == OP_COND || *bracode == OP_SCOND)
                 {
                 int nlen = (int)(code - bracode);
-                memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
+                (void)memmove(bracode + 1 + LINK_SIZE, bracode, CU2BYTES(nlen));
                 code += 1 + LINK_SIZE;
                 nlen += 1 + LINK_SIZE;
                 *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
@@ -7082,7 +7181,7 @@ for (;; pptr++)
 
         else
           {
-          memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
+          (void)memmove(tempcode + 1 + LINK_SIZE, tempcode, CU2BYTES(len));
           code += 1 + LINK_SIZE;
           len += 1 + LINK_SIZE;
           tempcode[0] = OP_ONCE;
@@ -7460,7 +7559,7 @@ length of the BRA and KET and any extra code units that are required at the
 beginning. We accumulate in a local variable to save frequent testing of
 lengthptr for NULL. We cannot do this by looking at the value of 'code' at the
 start and end of each alternative, because compiled items are discarded during
-the pre-compile phase so that the work space is not exceeded. */
+the pre-compile phase so that the workspace is not exceeded. */
 
 length = 2 + 2*LINK_SIZE + skipunits;
 
@@ -7622,7 +7721,7 @@ for (;;)
       {
       if (cb->open_caps->flag)
         {
-        memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
+        (void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
           CU2BYTES(code - start_bracket));
         *start_bracket = OP_ONCE;
         code += 1 + LINK_SIZE;
@@ -7765,10 +7864,11 @@ do {
      if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
      }
 
-   /* Condition */
+   /* Condition. If there is no second branch, it can't be anchored. */
 
-   else if (op == OP_COND)
+   else if (op == OP_COND || op == OP_SCOND)
      {
+     if (scode[GET(scode,1)] != OP_ALT) return FALSE;
      if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
        return FALSE;
      }
@@ -8003,6 +8103,7 @@ for (;;)
       break;
 
       case OP_MARK:
+      case OP_COMMIT_ARG:
       case OP_PRUNE_ARG:
       case OP_SKIP_ARG:
       case OP_THEN_ARG:
@@ -8221,7 +8322,7 @@ for (i = 0; i < tablecount; i++)
 
   if (crc < 0)
     {
-    memmove(slot + cb->name_entry_size, slot,
+    (void)memmove(slot + cb->name_entry_size, slot,
       CU2BYTES((tablecount - i) * cb->name_entry_size));
     break;
     }
@@ -8311,6 +8412,7 @@ for (;; pptr++)
     break;
 
     case META_MARK:     /* Add the length of the name. */
+    case META_COMMIT_ARG:
     case META_PRUNE_ARG:
     case META_SKIP_ARG:
     case META_THEN_ARG:
@@ -8501,6 +8603,7 @@ for (;; pptr++)
     goto EXIT;
 
     case META_MARK:
+    case META_COMMIT_ARG:
     case META_PRUNE_ARG:
     case META_SKIP_ARG:
     case META_THEN_ARG:
@@ -8572,6 +8675,32 @@ for (;; pptr++)
     case META_LOOKAHEADNOT:
     pptr = parsed_skip(pptr + 1, PSKIP_KET);
     if (pptr == NULL) goto PARSED_SKIP_FAILED;
+
+    /* Also ignore any qualifiers that follow a lookahead assertion. */
+
+    switch (pptr[1])
+      {
+      case META_ASTERISK:
+      case META_ASTERISK_PLUS:
+      case META_ASTERISK_QUERY:
+      case META_PLUS:
+      case META_PLUS_PLUS:
+      case META_PLUS_QUERY:
+      case META_QUERY:
+      case META_QUERY_PLUS:
+      case META_QUERY_QUERY:
+      pptr++;
+      break;
+
+      case META_MINMAX:
+      case META_MINMAX_PLUS:
+      case META_MINMAX_QUERY:
+      pptr += 3;
+      break;
+
+      default:
+      break;
+      }
     break;
 
     /* Lookbehinds can be ignored, but must themselves be checked. */
@@ -8942,6 +9071,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
     break;
 
     case META_MARK:
+    case META_COMMIT_ARG:
     case META_PRUNE_ARG:
     case META_SKIP_ARG:
     case META_THEN_ARG:

+ 9 - 3
thirdparty/pcre2/src/pcre2_convert.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -1066,11 +1066,12 @@ BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
 uint32_t pattype = options & TYPE_OPTIONS;
 
 if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
+
 if ((options & ~ALL_OPTIONS) != 0 ||        /* Undefined bit set */
     (pattype & (~pattype+1)) != pattype ||  /* More than one type set */
     pattype == 0)                           /* No type set */
   {
-  *bufflenptr = 0;  /* Error offset */
+  *bufflenptr = 0;                          /* Error offset */
   return PCRE2_ERROR_BADOPTION;
   }
 
@@ -1081,7 +1082,11 @@ if (ccontext == NULL) ccontext =
 /* Check UTF if required. */
 
 #ifndef SUPPORT_UNICODE
-if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+if (utf)
+  {
+  *bufflenptr = 0;  /* Error offset */
+  return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
+  }
 #else
 if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
   {
@@ -1126,6 +1131,7 @@ for (i = 0; i < 2; i++)
     break;
 
     default:
+    *bufflenptr = 0;  /* Error offset */
     return PCRE2_ERROR_INTERNAL;
     }
 

+ 230 - 41
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
-  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0,                          /* COMMIT, COMMIT_ARG                     */
+  0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
   0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
 };
 
@@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
   0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
   0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
   0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
-  0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
+  0, 0,                          /* COMMIT, COMMIT_ARG                     */
+  0, 0, 0,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */
   0, 0, 0                        /* CLOSE, SKIPZERO, DEFINE                */
 };
 
@@ -292,6 +294,35 @@ typedef struct stateblock {
 #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
 
 
+/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
+local working space and output vectors that were created on the stack. This has
+caused issues for some patterns, especially in small-stack environments such as
+Windows. A new scheme is now in use which sets up a vector on the stack, but if
+this is too small, heap memory is used, up to the heap_limit. The main
+parameters are all numbers of ints because the workspace is a vector of ints.
+
+The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
+defined in pcre2_internal.h so as to be available to pcre2test when it is
+finding the minimum heap requirement for a match. */
+
+#define OVEC_UNIT  (sizeof(PCRE2_SIZE)/sizeof(int))
+
+#define RWS_BASE_SIZE   (DFA_START_RWS_SIZE/sizeof(int))  /* Stack vector */
+#define RWS_RSIZE       1000                    /* Work size for recursion */
+#define RWS_OVEC_RSIZE  (1000*OVEC_UNIT)        /* Ovector for recursion */
+#define RWS_OVEC_OSIZE  (2*OVEC_UNIT)           /* Ovector in other cases */
+
+/* This structure is at the start of each workspace block. */
+
+typedef struct RWS_anchor {
+  struct RWS_anchor *next;
+  unsigned int size;  /* Number of ints */
+  unsigned int free;  /* Number of ints */
+} RWS_anchor;
+
+#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
+
+
 
 /*************************************************
 *               Process a callout                *
@@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data);
 
 
 
+/*************************************************
+*         Expand local workspace memory          *
+*************************************************/
+
+/* This function is called when internal_dfa_match() is about to be called
+recursively and there is insufficient working space left in the current
+workspace block. If there's an existing next block, use it; otherwise get a new
+block unless the heap limit is reached.
+
+Arguments:
+  rwsptr     pointer to block pointer (updated)
+  ovecsize   space needed for an ovector
+  mb         the match block
+
+Returns:     0 rwsptr has been updated
+            !0 an error code
+*/
+
+static int
+more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
+{
+RWS_anchor *rws = *rwsptr;
+RWS_anchor *new;
+
+if (rws->next != NULL)
+  {
+  new = rws->next;
+  }
+
+/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
+kibibytes. */
+
+else
+  {
+  unsigned int newsize = rws->size * 2;
+  unsigned int heapleft = (unsigned int)
+    (((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
+  if (newsize > heapleft) newsize = heapleft;
+  if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
+    return PCRE2_ERROR_HEAPLIMIT;
+  new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
+  if (new == NULL) return PCRE2_ERROR_NOMEMORY;
+  mb->heap_used += newsize;
+  new->next = NULL;
+  new->size = newsize;
+  rws->next = new;
+  }
+
+new->free = new->size - RWS_ANCHOR_SIZE;
+*rwsptr = new;
+return 0;
+}
+
+
+
 /*************************************************
 *     Match a Regular Expression - DFA engine    *
 *************************************************/
@@ -431,7 +517,8 @@ internal_dfa_match(
   uint32_t offsetcount,
   int *workspace,
   int wscount,
-  uint32_t  rlevel)
+  uint32_t rlevel,
+  int *RWS)
 {
 stateblock *active_states, *new_states, *temp_states;
 stateblock *next_active_state, *next_new_state;
@@ -788,7 +875,7 @@ for (;;)
             else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
               match_count = 0;
           count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
-          if (count > 0) memmove(offsets + 2, offsets,
+          if (count > 0) (void)memmove(offsets + 2, offsets,
             (size_t)count * sizeof(PCRE2_SIZE));
           if (offsetcount >= 2)
             {
@@ -2587,10 +2674,22 @@ for (;;)
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
         {
-        PCRE2_SPTR endasscode = code + GET(code, 1);
-        PCRE2_SIZE local_offsets[2];
         int rc;
-        int local_workspace[1000];
+        int *local_workspace;
+        PCRE2_SIZE *local_offsets;
+        PCRE2_SPTR endasscode = code + GET(code, 1);
+        RWS_anchor *rws = (RWS_anchor *)RWS;
+
+        if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+          {
+          rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+          if (rc != 0) return rc;
+          RWS = (int *)rws;
+          }
+
+        local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+        local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+        rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
 
         while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
 
@@ -2600,10 +2699,13 @@ for (;;)
           ptr,                                  /* where we currently are */
           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
           local_offsets,                        /* offset vector */
-          sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+          RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
           local_workspace,                      /* workspace vector */
-          sizeof(local_workspace)/sizeof(int),  /* size of same */
-          rlevel);                              /* function recursion level */
+          RWS_RSIZE,                            /* size of same */
+          rlevel,                               /* function recursion level */
+          RWS);                                 /* recursion workspace */
+
+        rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
 
         if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
@@ -2615,8 +2717,6 @@ for (;;)
       case OP_COND:
       case OP_SCOND:
         {
-        PCRE2_SIZE local_offsets[1000];
-        int local_workspace[1000];
         int codelink = (int)GET(code, 1);
         PCRE2_UCHAR condcode;
 
@@ -2673,8 +2773,22 @@ for (;;)
         else
           {
           int rc;
+          int *local_workspace;
+          PCRE2_SIZE *local_offsets;
           PCRE2_SPTR asscode = code + LINK_SIZE + 1;
           PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
+          RWS_anchor *rws = (RWS_anchor *)RWS;
+
+          if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+            {
+            rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+            if (rc != 0) return rc;
+            RWS = (int *)rws;
+            }
+
+          local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+          local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+          rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
 
           while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
 
@@ -2684,10 +2798,13 @@ for (;;)
             ptr,                                  /* where we currently are */
             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
             local_offsets,                        /* offset vector */
-            sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+            RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
             local_workspace,                      /* workspace vector */
-            sizeof(local_workspace)/sizeof(int),  /* size of same */
-            rlevel);                              /* function recursion level */
+            RWS_RSIZE,                            /* size of same */
+            rlevel,                               /* function recursion level */
+            RWS);                                 /* recursion workspace */
+
+          rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
 
           if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
           if ((rc >= 0) ==
@@ -2702,13 +2819,25 @@ for (;;)
       /*-----------------------------------------------------------------*/
       case OP_RECURSE:
         {
+        int rc;
+        int *local_workspace;
+        PCRE2_SIZE *local_offsets;
+        RWS_anchor *rws = (RWS_anchor *)RWS;
         dfa_recursion_info *ri;
-        PCRE2_SIZE local_offsets[1000];
-        int local_workspace[1000];
         PCRE2_SPTR callpat = start_code + GET(code, 1);
         uint32_t recno = (callpat == mb->start_code)? 0 :
           GET2(callpat, 1 + LINK_SIZE);
-        int rc;
+
+        if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
+          {
+          rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
+          if (rc != 0) return rc;
+          RWS = (int *)rws;
+          }
+
+        local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+        local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
+        rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
 
         /* Check for repeating a recursion without advancing the subject
         pointer. This should catch convoluted mutual recursions. (Some simple
@@ -2732,11 +2861,13 @@ for (;;)
           ptr,                                  /* where we currently are */
           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
           local_offsets,                        /* offset vector */
-          sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+          RWS_OVEC_RSIZE/OVEC_UNIT,             /* size of same */
           local_workspace,                      /* workspace vector */
-          sizeof(local_workspace)/sizeof(int),  /* size of same */
-          rlevel);                              /* function recursion level */
+          RWS_RSIZE,                            /* size of same */
+          rlevel,                               /* function recursion level */
+          RWS);                                 /* recursion workspace */
 
+        rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
         mb->recursive = new_recursive.prevrec;  /* Done this recursion */
 
         /* Ran out of internal offsets */
@@ -2782,10 +2913,25 @@ for (;;)
       case OP_SCBRAPOS:
       case OP_BRAPOSZERO:
         {
+        int rc;
+        int *local_workspace;
+        PCRE2_SIZE *local_offsets;
         PCRE2_SIZE charcount, matched_count;
         PCRE2_SPTR local_ptr = ptr;
+        RWS_anchor *rws = (RWS_anchor *)RWS;
         BOOL allow_zero;
 
+        if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+          {
+          rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+          if (rc != 0) return rc;
+          RWS = (int *)rws;
+          }
+
+        local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+        local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+        rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
+
         if (codevalue == OP_BRAPOSZERO)
           {
           allow_zero = TRUE;
@@ -2798,19 +2944,17 @@ for (;;)
 
         for (matched_count = 0;; matched_count++)
           {
-          PCRE2_SIZE local_offsets[2];
-          int local_workspace[1000];
-
-          int rc = internal_dfa_match(
+          rc = internal_dfa_match(
             mb,                                   /* fixed match data */
             code,                                 /* this subexpression's code */
             local_ptr,                            /* where we currently are */
             (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
             local_offsets,                        /* offset vector */
-            sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+            RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
             local_workspace,                      /* workspace vector */
-            sizeof(local_workspace)/sizeof(int),  /* size of same */
-            rlevel);                              /* function recursion level */
+            RWS_RSIZE,                            /* size of same */
+            rlevel,                               /* function recursion level */
+            RWS);                                 /* recursion workspace */
 
           /* Failed to match */
 
@@ -2827,6 +2971,8 @@ for (;;)
           local_ptr += charcount;    /* Advance temporary position ptr */
           }
 
+        rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
+
         /* At this point we have matched the subpattern matched_count
         times, and local_ptr is pointing to the character after the end of the
         last match. */
@@ -2869,19 +3015,35 @@ for (;;)
       /*-----------------------------------------------------------------*/
       case OP_ONCE:
         {
-        PCRE2_SIZE local_offsets[2];
-        int local_workspace[1000];
+        int rc;
+        int *local_workspace;
+        PCRE2_SIZE *local_offsets;
+        RWS_anchor *rws = (RWS_anchor *)RWS;
 
-        int rc = internal_dfa_match(
+        if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
+          {
+          rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
+          if (rc != 0) return rc;
+          RWS = (int *)rws;
+          }
+
+        local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
+        local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
+        rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
+
+        rc = internal_dfa_match(
           mb,                                   /* fixed match data */
           code,                                 /* this subexpression's code */
           ptr,                                  /* where we currently are */
           (PCRE2_SIZE)(ptr - start_subject),    /* start offset */
           local_offsets,                        /* offset vector */
-          sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
+          RWS_OVEC_OSIZE/OVEC_UNIT,             /* size of same */
           local_workspace,                      /* workspace vector */
-          sizeof(local_workspace)/sizeof(int),  /* size of same */
-          rlevel);                              /* function recursion level */
+          RWS_RSIZE,                            /* size of same */
+          rlevel,                               /* function recursion level */
+          RWS);                                 /* recursion workspace */
+
+        rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
 
         if (rc >= 0)
           {
@@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
   pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
 {
+int rc;
 const pcre2_real_code *re = (const pcre2_real_code *)code;
 
 PCRE2_SPTR start_match;
@@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
 PCRE2_SPTR req_cu_ptr;
 
 BOOL utf, anchored, startline, firstline;
-
 BOOL has_first_cu = FALSE;
 BOOL has_req_cu = FALSE;
+
 PCRE2_UCHAR first_cu = 0;
 PCRE2_UCHAR first_cu2 = 0;
 PCRE2_UCHAR req_cu = 0;
@@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
 dfa_match_block actual_match_block;
 dfa_match_block *mb = &actual_match_block;
 
+/* Set up a starting block of memory for use during recursive calls to
+internal_dfa_match(). By putting this on the stack, it minimizes resource use
+in the case when it is not needed. If this is too small, more memory is
+obtained from the heap. At the start of each block is an anchor structure.*/
+
+int base_recursion_workspace[RWS_BASE_SIZE];
+RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
+rws->next = NULL;
+rws->size = RWS_BASE_SIZE;
+rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
+
 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
 subject string. */
 
@@ -3184,6 +3358,7 @@ if (mcontext == NULL)
   mb->memctl = re->memctl;
   mb->match_limit = PRIV(default_match_context).match_limit;
   mb->match_limit_depth = PRIV(default_match_context).depth_limit;
+  mb->heap_limit = PRIV(default_match_context).heap_limit;
   }
 else
   {
@@ -3198,6 +3373,7 @@ else
   mb->memctl = mcontext->memctl;
   mb->match_limit = mcontext->match_limit;
   mb->match_limit_depth = mcontext->depth_limit;
+  mb->heap_limit = mcontext->heap_limit;
   }
 
 if (mb->match_limit > re->limit_match)
@@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
 if (mb->match_limit_depth > re->limit_depth)
   mb->match_limit_depth = re->limit_depth;
 
+if (mb->heap_limit > re->limit_heap)
+  mb->heap_limit = re->limit_heap;
+
 mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
   re->name_count * re->name_entry_size;
 mb->tables = re->tables;
@@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
 mb->moptions = options;
 mb->poptions = re->overall_options;
 mb->match_call_count = 0;
+mb->heap_used = 0;
 
 /* Process the \R and newline settings. */
 
@@ -3351,8 +3531,6 @@ a match. */
 
 for (;;)
   {
-  int rc;
-
   /* ----------------- Start of match optimizations ---------------- */
 
   /* There are some optimizations that avoid running the match if a known
@@ -3544,7 +3722,7 @@ for (;;)
       in characters, we treat it as code units to avoid spending too much time
       in this optimization. */
 
-      if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
+      if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
 
       /* If req_cu is set, we know that that code unit must appear in the
       subject for the match to succeed. If the first code unit is set, req_cu
@@ -3621,7 +3799,8 @@ for (;;)
     (uint32_t)match_data->oveccount * 2,  /* actual size of same */
     workspace,                    /* workspace vector */
     (int)wscount,                 /* size of same */
-    0);                           /* function recurse level */
+    0,                            /* function recurse level */
+    base_recursion_workspace);    /* initial workspace for recursion */
 
   /* Anything other than "no match" means we are done, always; otherwise, carry
   on only if not anchored. */
@@ -3637,7 +3816,7 @@ for (;;)
     match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
     match_data->startchar = (PCRE2_SIZE)(start_match - subject);
     match_data->rc = rc;
-    return rc;
+    goto EXIT;
     }
 
   /* Advance to the next subject character unless we are at the end of a line
@@ -3668,8 +3847,18 @@ for (;;)
 
   }   /* "Bumpalong" loop */
 
+NOMATCH_EXIT:
+rc = PCRE2_ERROR_NOMATCH;
+
+EXIT:
+while (rws->next != NULL)
+  {
+  RWS_anchor *next = rws->next;
+  rws->next = next->next;
+  mb->memctl.free(next, mb->memctl.memory_data);
+  }
 
-return PCRE2_ERROR_NOMATCH;
+return rc;
 }
 
 /* End of pcre2_dfa_match.c */

+ 10 - 5
thirdparty/pcre2/src/pcre2_error.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -107,7 +107,7 @@ static const unsigned char compile_error_texts[] =
   /* 35 */
   "lookbehind is too complicated\0"
   "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0"
-  "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
+  "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0"
   "number after (?C is greater than 255\0"
   "closing parenthesis for (?C expected\0"
   /* 40 */
@@ -133,7 +133,8 @@ static const unsigned char compile_error_texts[] =
   "internal error: unknown newline setting\0"
   "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0"
   "(?R (recursive pattern call) must be followed by a closing parenthesis\0"
-  "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
+  /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */
+  "obsolete error (should not occur)\0"  /* Was the above */
   /* 60 */
   "(*VERB) not recognized or malformed\0"
   "group number is too big\0"
@@ -160,7 +161,7 @@ static const unsigned char compile_error_texts[] =
   "using UCP is disabled by the application\0"
   "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
   "character code point value in \\u.... sequence is too large\0"
-  "digits missing in \\x{} or \\o{}\0"
+  "digits missing in \\x{} or \\o{} or \\N{U+}\0"
   "syntax error or number too big in (?(VERSION condition\0"
   /* 80 */
   "internal error: unknown opcode in auto_possessify()\0"
@@ -178,6 +179,8 @@ static const unsigned char compile_error_texts[] =
   "internal error: bad code value in parsed_skip()\0"
   "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
   "invalid option bits with PCRE2_LITERAL\0"
+  "\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
+  "invalid hyphen in option setting\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */
@@ -255,11 +258,13 @@ static const unsigned char match_error_texts[] =
   "expected closing curly bracket in replacement string\0"
   "bad substitution in replacement string\0"
   /* 60 */
-  "match with end before start is not supported\0"
+  "match with end before start or start moved backwards is not supported\0"
   "too many replacements (more than INT_MAX)\0"
   "bad serialized data\0"
   "heap limit exceeded\0"
   "invalid syntax\0"
+  /* 65 */
+  "internal error - duplicate substitution match\0"
   ;
 
 

+ 4 - 4
thirdparty/pcre2/src/pcre2_extuni.c

@@ -129,11 +129,11 @@ while (eptr < end_subject)
     if ((ricount & 1) != 0) break;  /* Grapheme break required */
     }
 
-  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-  any number of Extend before a following E_Modifier. */
+  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+  allows any number of them before a following Extended_Pictographic. */
 
-  if (rgb != ucp_gbExtend ||
-      (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+       lgb != ucp_gbExtended_Pictographic)
     lgb = rgb;
 
   eptr += len;

+ 2 - 1
thirdparty/pcre2/src/pcre2_find_bracket.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -131,6 +131,7 @@ for (;;)
       break;
 
       case OP_MARK:
+      case OP_COMMIT_ARG:
       case OP_PRUNE_ARG:
       case OP_SKIP_ARG:
       case OP_THEN_ARG:

+ 47 - 84
thirdparty/pcre2/src/pcre2_internal.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -165,6 +165,16 @@ by "configure". */
 #define INT64_OR_DOUBLE double
 #endif
 
+/* External (in the C sense) functions and tables that are private to the
+libraries are always referenced using the PRIV macro. This makes it possible
+for pcre2test.c to include some of the source files from the libraries using a
+different PRIV definition to avoid name clashes. It also makes it clear in the
+code that a non-static object is being referenced. */
+
+#ifndef PRIV
+#define PRIV(name) _pcre2_##name
+#endif
+
 /* When compiling for use with the Virtual Pascal compiler, these functions
 need to have their names changed. PCRE2 must be compiled with the -DVPCOMPAT
 option on the command line. */
@@ -178,50 +188,15 @@ option on the command line. */
 #define memset(s,c,n)    _memset(s,c,n)
 #else  /* VPCOMPAT */
 
-/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
-define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
-is set. Otherwise, include an emulating function for those systems that have
-neither (there some non-Unix environments where this is the case). */
+/* Otherwise, to cope with SunOS4 and other systems that lack memmove(), define
+a macro that calls an emulating function. */
 
 #ifndef HAVE_MEMMOVE
-#undef  memmove        /* some systems may have a macro */
-#ifdef HAVE_BCOPY
-#define memmove(a, b, c) bcopy(b, a, c)
-#else  /* HAVE_BCOPY */
-static void *
-pcre2_memmove(void *d, const void *s, size_t n)
-{
-size_t i;
-unsigned char *dest = (unsigned char *)d;
-const unsigned char *src = (const unsigned char *)s;
-if (dest > src)
-  {
-  dest += n;
-  src += n;
-  for (i = 0; i < n; ++i) *(--dest) = *(--src);
-  return (void *)dest;
-  }
-else
-  {
-  for (i = 0; i < n; ++i) *dest++ = *src++;
-  return (void *)(dest - n);
-  }
-}
-#define memmove(a, b, c) pcre2_memmove(a, b, c)
-#endif   /* not HAVE_BCOPY */
+#undef  memmove          /* Some systems may have a macro */
+#define memmove(a, b, c) PRIV(memmove)(a, b, c)
 #endif   /* not HAVE_MEMMOVE */
 #endif   /* not VPCOMPAT */
 
-/* External (in the C sense) functions and tables that are private to the
-libraries are always referenced using the PRIV macro. This makes it possible
-for pcre2test.c to include some of the source files from the libraries using a
-different PRIV definition to avoid name clashes. It also makes it clear in the
-code that a non-static object is being referenced. */
-
-#ifndef PRIV
-#define PRIV(name) _pcre2_##name
-#endif
-
 /* This is an unsigned int value that no UTF character can ever have, as
 Unicode doesn't go beyond 0x0010ffff. */
 
@@ -247,12 +222,17 @@ not rely on this. */
 pcre2_match() is allocated on the system stack, of this size (bytes). The size
 must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
 multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
-on the number of capturing parentheses) so 20K handles quite a few frames. A
+on the number of capturing parentheses) so 20KiB handles quite a few frames. A
 larger vector on the heap is obtained for patterns that need more frames. The
 maximum size of this can be limited. */
 
 #define START_FRAMES_SIZE 20480
 
+/* Similarly, for DFA matching, an initial internal workspace vector is
+allocated on the stack. */
+
+#define DFA_START_RWS_SIZE 30720
+
 /* Define the default BSR convention. */
 
 #ifdef BSR_ANYCRLF
@@ -585,14 +565,15 @@ these tables. */
 #define cbit_cntrl   288      /* [:cntrl:] */
 #define cbit_length  320      /* Length of the cbits table */
 
-/* Bit definitions for entries in the ctypes table. */
+/* Bit definitions for entries in the ctypes table. Do not change these values
+without checking pcre2_jit_compile.c, which has an assertion to ensure that
+ctype_word has the value 16. */
 
 #define ctype_space   0x01
 #define ctype_letter  0x02
 #define ctype_digit   0x04
-#define ctype_xdigit  0x08
+#define ctype_xdigit  0x08    /* not actually used any more */
 #define ctype_word    0x10    /* alphanumeric or '_' */
-#define ctype_meta    0x80    /* regexp meta char or zero (end pattern) */
 
 /* Offsets of the various tables from the base tables pointer, and
 total length of the tables. */
@@ -1267,36 +1248,6 @@ contain characters with values greater than 255. */
 #define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
 #define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
 
-/* Escape items that are just an encoding of a particular data value. These
-appear in the escapes[] table in pcre2_compile.c as positive numbers. */
-
-#ifndef ESC_a
-#define ESC_a CHAR_BEL
-#endif
-
-#ifndef ESC_e
-#define ESC_e CHAR_ESC
-#endif
-
-#ifndef ESC_f
-#define ESC_f CHAR_FF
-#endif
-
-#ifndef ESC_n
-#define ESC_n CHAR_LF
-#endif
-
-#ifndef ESC_r
-#define ESC_r CHAR_CR
-#endif
-
-/* We can't officially use ESC_t because it is a POSIX reserved identifier
-(presumably because of all the others like size_t). */
-
-#ifndef ESC_tee
-#define ESC_tee CHAR_HT
-#endif
-
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
 for a data character. In the escapes[] table in pcre2_compile.c their values
@@ -1578,23 +1529,26 @@ enum {
   OP_THEN,           /* 155 */
   OP_THEN_ARG,       /* 156 same, but with argument */
   OP_COMMIT,         /* 157 */
+  OP_COMMIT_ARG,     /* 158 same, but with argument */
 
-  /* These are forced failure and success verbs */
+  /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
+  argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
+  without the need for a special opcode. */
 
-  OP_FAIL,           /* 158 */
-  OP_ACCEPT,         /* 159 */
-  OP_ASSERT_ACCEPT,  /* 160 Used inside assertions */
-  OP_CLOSE,          /* 161 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 159 */
+  OP_ACCEPT,         /* 160 */
+  OP_ASSERT_ACCEPT,  /* 161 Used inside assertions */
+  OP_CLOSE,          /* 162 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO,       /* 162 */
+  OP_SKIPZERO,       /* 163 */
 
   /* This is used to identify a DEFINE group during compilation so that it can
   be checked for having only one branch. It is changed to OP_FALSE before
   compilation finishes. */
 
-  OP_DEFINE,         /* 163 */
+  OP_DEFINE,         /* 164 */
 
   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1650,7 +1604,7 @@ some cases doesn't actually use these names at all). */
   "Cond false", "Cond true",                                      \
   "Brazero", "Braminzero", "Braposzero",                          \
   "*MARK", "*PRUNE", "*PRUNE", "*SKIP", "*SKIP",                  \
-  "*THEN", "*THEN", "*COMMIT", "*FAIL",                           \
+  "*THEN", "*THEN", "*COMMIT", "*COMMIT", "*FAIL",                \
   "*ACCEPT", "*ASSERT_ACCEPT",                                    \
   "Close", "Skip zero", "Define"
 
@@ -1742,7 +1696,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   3, 1, 3,                       /* MARK, PRUNE, PRUNE_ARG                 */ \
   1, 3,                          /* SKIP, SKIP_ARG                         */ \
   1, 3,                          /* THEN, THEN_ARG                         */ \
-  1, 1, 1, 1,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */ \
+  1, 3,                          /* COMMIT, COMMIT_ARG                     */ \
+  1, 1, 1,                       /* FAIL, ACCEPT, ASSERT_ACCEPT            */ \
   1+IMM2_SIZE, 1,                /* CLOSE, SKIPZERO                        */ \
   1                              /* DEFINE                                 */
 
@@ -1896,7 +1851,7 @@ extern const ucd_record                PRIV(ucd_records)[];
 #if PCRE2_CODE_UNIT_WIDTH == 32
 extern const ucd_record                PRIV(dummy_ucd_record)[];
 #endif
-extern const uint8_t                   PRIV(ucd_stage1)[];
+extern const uint16_t                  PRIV(ucd_stage1)[];
 extern const uint16_t                  PRIV(ucd_stage2)[];
 extern const uint32_t                  PRIV(ucp_gbtable)[];
 extern const uint32_t                  PRIV(ucp_gentype)[];
@@ -1976,6 +1931,14 @@ extern int          _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
 extern BOOL         _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
                       uint32_t *, BOOL);
 extern BOOL         _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
+
+/* This function is needed only when memmove() is not available. */
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+#define _pcre2_memmove               PCRE2_SUFFIX(_pcre2_memmove)
+extern void *       _pcre2_memmove(void *, const void *, size_t);
+#endif
+
 #endif  /* PCRE2_CODE_UNIT_WIDTH */
 #endif  /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
 

+ 17 - 0
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -793,11 +793,23 @@ typedef struct heapframe {
   uint8_t return_id;         /* Where to go on in internal "return" */
   uint8_t op;                /* Processing opcode */
 
+  /* At this point, the structure is 16-bit aligned. On most architectures
+  the alignment requirement for a pointer will ensure that the eptr field below
+  is 32-bit or 64-bit aligned. However, on m68k it is fine to have a pointer
+  that is 16-bit aligned. We must therefore ensure that what comes between here
+  and eptr is an odd multiple of 16 bits so as to get back into 32-bit
+  alignment. This happens naturally when PCRE2_UCHAR is 8 bits wide, but needs
+  fudges in the other cases. In the 32-bit case the padding comes first so that
+  the occu field itself is 32-bit aligned. Without the padding, this structure
+  is no longer a multiple of PCRE2_SIZE on m68k, and the check below fails. */
+
 #if PCRE2_CODE_UNIT_WIDTH == 8
   PCRE2_UCHAR occu[6];       /* Used for other case code units */
 #elif PCRE2_CODE_UNIT_WIDTH == 16
   PCRE2_UCHAR occu[2];       /* Used for other case code units */
+  uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
 #else
+  uint8_t unused[2];         /* Ensure 32-bit alignment (see above) */
   PCRE2_UCHAR occu[1];       /* Used for other case code units */
 #endif
 
@@ -818,6 +830,9 @@ typedef struct heapframe {
   PCRE2_SIZE ovector[131072]; /* Must be last in the structure */
 } heapframe;
 
+/* This typedef is a check that the size of the heapframe structure is a
+multiple of PCRE2_SIZE. See various comments above. */
+
 typedef char check_heapframe_size[
   ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
 
@@ -881,6 +896,8 @@ typedef struct dfa_match_block {
   PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
   const uint8_t *tables;          /* Character tables */
   PCRE2_SIZE start_offset;        /* The start offset value */
+  PCRE2_SIZE heap_limit;          /* As it says */
+  PCRE2_SIZE heap_used;           /* As it says */
   uint32_t match_limit;           /* As it says */
   uint32_t match_limit_depth;     /* As it says */
   uint32_t match_call_count;      /* Number of calls of internal function */

+ 26 - 11
thirdparty/pcre2/src/pcre2_jit_compile.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -839,6 +839,7 @@ switch(*cc)
 #endif
 
   case OP_MARK:
+  case OP_COMMIT_ARG:
   case OP_PRUNE_ARG:
   case OP_SKIP_ARG:
   case OP_THEN_ARG:
@@ -939,6 +940,7 @@ while (cc < ccend)
     common->control_head_ptr = 1;
     /* Fall through. */
 
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_MARK:
     if (common->mark_ptr == 0)
@@ -1553,6 +1555,7 @@ while (cc < ccend)
     break;
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
@@ -1733,6 +1736,7 @@ while (cc < ccend)
     break;
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
@@ -2041,6 +2045,7 @@ while (cc < ccend)
     break;
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
@@ -2428,6 +2433,7 @@ while (cc < ccend)
     break;
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_THEN_ARG:
     SLJIT_ASSERT(common->mark_ptr != 0);
@@ -3666,7 +3672,8 @@ if (!common->utf)
 #endif
 
 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -5894,6 +5901,8 @@ for (i = 0; i < 32; i++)
     }
   }
 
+if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
+
 i = 0;
 j = 0;
 
@@ -6627,7 +6636,8 @@ if (needstype || needsscript)
 #endif
 
   OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
-  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
+  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
+  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
   OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
   OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
@@ -7254,10 +7264,11 @@ while (cc < end_subject)
     if ((ricount & 1) != 0) break;  /* Grapheme break required */
     }
 
-  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-  any number of Extend before a following E_Modifier. */
+  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+  allows any number of them before a following Extended_Pictographic. */
 
-  if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+       lgb != ucp_gbExtended_Pictographic)
     lgb = rgb;
 
   prevcc = cc;
@@ -7309,10 +7320,11 @@ while (cc < end_subject)
     if ((ricount & 1) != 0) break;  /* Grapheme break required */
     }
 
-  /* If Extend follows E_Base[_GAZ] do not update lgb; this allows
-  any number of Extend before a following E_Modifier. */
+  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
+  allows any number of them before a following Extended_Pictographic. */
 
-  if (rgb != ucp_gbExtend || (lgb != ucp_gbE_Base && lgb != ucp_gbE_Base_GAZ))
+  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
+       lgb != ucp_gbExtended_Pictographic)
     lgb = rgb;
 
   cc++;
@@ -10346,7 +10358,8 @@ backtrack_common *backtrack;
 PCRE2_UCHAR opcode = *cc;
 PCRE2_SPTR ccend = cc + 1;
 
-if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
+if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
+    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
   ccend += 2 + cc[1];
 
 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@@ -10358,7 +10371,7 @@ if (opcode == OP_SKIP)
   return ccend;
   }
 
-if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
+if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
   {
   OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
   OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
@@ -10677,6 +10690,7 @@ while (cc < ccend)
     case OP_THEN:
     case OP_THEN_ARG:
     case OP_COMMIT:
+    case OP_COMMIT_ARG:
     cc = compile_control_verb_matchingpath(common, cc, parent);
     break;
 
@@ -11751,6 +11765,7 @@ while (current)
     break;
 
     case OP_COMMIT:
+    case OP_COMMIT_ARG:
     if (!common->local_quit_available)
       OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
     if (common->quit_label == NULL)

+ 1 - 8
thirdparty/pcre2/src/pcre2_maketables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -141,13 +141,6 @@ for (i = 0; i < 256; i++)
   if (isdigit(i)) x += ctype_digit;
   if (isxdigit(i)) x += ctype_xdigit;
   if (isalnum(i) || i == '_') x += ctype_word;
-
-  /* Note: strchr includes the terminating zero in the characters it considers.
-  In this instance, that is ok because we want binary zero to be flagged as a
-  meta-character, which in this sense is any character that terminates a run
-  of data characters. */
-
-  if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
   *p++ = x;
   }
 

+ 35 - 19
thirdparty/pcre2/src/pcre2_match.c

@@ -43,11 +43,11 @@ POSSIBILITY OF SUCH DAMAGE.
 #include "config.h"
 #endif
 
-/* These defines enables debugging code */
+/* These defines enable debugging code */
 
-//#define DEBUG_FRAMES_DISPLAY
-//#define DEBUG_SHOW_OPS
-//#define DEBUG_SHOW_RMATCH
+/* #define DEBUG_FRAMES_DISPLAY */
+/* #define DEBUG_SHOW_OPS */
+/* #define DEBUG_SHOW_RMATCH */
 
 #ifdef DEBUG_FRAME_DISPLAY
 #include <stdarg.h>
@@ -149,7 +149,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync.  */
 enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
        RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
        RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
-       RM31,  RM32, RM33, RM34, RM35 };
+       RM31,  RM32, RM33, RM34, RM35, RM36 };
 
 #ifdef SUPPORT_WIDE_CHARS
 enum { RM100=100, RM101 };
@@ -770,7 +770,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     /* ===================================================================== */
     /* Real or forced end of the pattern, assertion, or recursion. In an
     assertion ACCEPT, update the last used pointer and remember the current
-    frame so that the captures can be fished out of it. */
+    frame so that the captures and mark can be fished out of it. */
 
     case OP_ASSERT_ACCEPT:
     if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@@ -1776,7 +1776,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
 
     /* ===================================================================== */
-    /* Match a bit-mapped character class, possibly repeatedly. These op codes
+    /* Match a bit-mapped character class, possibly repeatedly. These opcodes
     are used when all the characters in the class have values in the range
     0-255, and either the matching is caseful, or the characters are in the
     range 0-127 when UTF processing is enabled. The only difference between
@@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
           if (reptype == REPTYPE_POS) continue;    /* No backtracking */
 
+          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+          go too far. */
+
           for (;;)
             {
             RMATCH(Fecode, RM201);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
+            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
             BACKCHAR(Feptr);
             }
           }
@@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
 
+        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
+        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+        go too far. */
+
         for(;;)
           {
           RMATCH(Fecode, RM101);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
+          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 #ifdef SUPPORT_UNICODE
           if (utf) BACKCHAR(Feptr);
 #endif
@@ -2456,7 +2464,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     /* ===================================================================== */
     /* Match a single character type repeatedly. Note that the property type
-    does not need to be in a stack frame as it not used within an RMATCH()
+    does not need to be in a stack frame as it is not used within an RMATCH()
     loop. */
 
 #define Lstart_eptr  F->temp_sptr[0]
@@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
         if (reptype == REPTYPE_POS) continue;    /* No backtracking */
 
         /* After \C in UTF mode, Lstart_eptr might be in the middle of a
-        Unicode character. Use <= pp to ensure backtracking doesn't go too far.
-        */
+        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
+        go too far. */
 
         for(;;)
           {
@@ -4135,7 +4143,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             }
           break;
 
-          /* The "byte" (i.e. "code unit")  case is the same as non-UTF */
+          /* The "byte" (i.e. "code unit") case is the same as non-UTF */
 
           case OP_ANYBYTE:
           fc = Lmax - Lmin;
@@ -5111,7 +5119,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     /* Positive assertions are like other groups except that PCRE doesn't allow
     the effect of (*THEN) to escape beyond an assertion; it is therefore
     treated as NOMATCH. (*ACCEPT) is treated as successful assertion, with its
-    captures retained. Any other return is an error. */
+    captures and mark retained. Any other return is an error. */
 
 #define Lframe_type  F->temp_32[0]
 
@@ -5128,6 +5136,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               (char *)assert_accept_frame + offsetof(heapframe, ovector),
               assert_accept_frame->offset_top * sizeof(PCRE2_SIZE));
         Foffset_top = assert_accept_frame->offset_top;
+        Fmark = assert_accept_frame->mark;
         break;
         }
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
@@ -5416,7 +5425,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       Feptr -= number;
       }
 
-    /* Save the earliest consulted character, then skip to next op code */
+    /* Save the earliest consulted character, then skip to next opcode */
 
     if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
     Fecode += 1 + LINK_SIZE;
@@ -5501,7 +5510,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       frame so that it points to the final branch. */
 
       case OP_ONCE:
-      Fback_frame = ((char *)F - (char *)P) + frame_size;
+      Fback_frame = ((char *)F - (char *)P);
       for (;;)
         {
         uint32_t y = GET(P->ecode,1);
@@ -5829,6 +5838,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     mb->verb_current_recurse = Fcurrent_recurse;
     RRETURN(MATCH_COMMIT);
 
+    case OP_COMMIT_ARG:
+    Fmark = mb->nomatch_mark = Fecode + 2;
+    RMATCH(Fecode + PRIV(OP_lengths)[*Fecode] + Fecode[1], RM36);
+    if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+    mb->verb_current_recurse = Fcurrent_recurse;
+    RRETURN(MATCH_COMMIT);
+
     case OP_PRUNE:
     RMATCH(Fecode + PRIV(OP_lengths)[*Fecode], RM14);
     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
@@ -5921,7 +5937,7 @@ in rrc. */
 
 RETURN_SWITCH:
 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
-F = (heapframe *)((char *)F - Fback_frame);       /* Back track */
+F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
 
 #ifdef DEBUG_SHOW_RMATCH
@@ -5934,7 +5950,7 @@ switch (Freturn_id)
   LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
   LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
   LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
-  LBL(33) LBL(34) LBL(35)
+  LBL(33) LBL(34) LBL(35) LBL(36)
 
 #ifdef SUPPORT_WIDE_CHARS
   LBL(100) LBL(101)
@@ -6275,7 +6291,7 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
 /* If a pattern has very many capturing parentheses, the frame size may be very
 large. Ensure that there are at least 10 available frames by getting an initial
 vector on the heap if necessary, except when the heap limit prevents this. Get
-fewer if possible. (The heap limit is in kilobytes.) */
+fewer if possible. (The heap limit is in kibibytes.) */
 
 if (frame_size <= START_FRAMES_SIZE/10)
   {

+ 2 - 1
thirdparty/pcre2/src/pcre2_pattern_info.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -390,6 +390,7 @@ while (TRUE)
 #endif
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
     case OP_THEN_ARG:

+ 20 - 2
thirdparty/pcre2/src/pcre2_serialize.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -127,7 +127,25 @@ dst_bytes += tables_length;
 for (i = 0; i < number_of_codes; i++)
   {
   re = (const pcre2_real_code *)(codes[i]);
-  memcpy(dst_bytes, (char *)re, re->blocksize);
+  (void)memcpy(dst_bytes, (char *)re, re->blocksize);
+  
+  /* Certain fields in the compiled code block are re-set during 
+  deserialization. In order to ensure that the serialized data stream is always 
+  the same for the same pattern, set them to zero here. We can't assume the 
+  copy of the pattern is correctly aligned for accessing the fields as part of 
+  a structure. Note the use of sizeof(void *) in the second of these, to
+  specify the size of a pointer. If sizeof(uint8_t *) is used (tables is a 
+  pointer to uint8_t), gcc gives a warning because the first argument is also a 
+  pointer to uint8_t. Casting the first argument to (void *) can stop this, but 
+  it didn't stop Coverity giving the same complaint. */
+  
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, memctl), 0, 
+    sizeof(pcre2_memctl));
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, tables), 0, 
+    sizeof(void *));
+  (void)memset(dst_bytes + offsetof(pcre2_real_code, executable_jit), 0,
+    sizeof(void *));        
+ 
   dst_bytes += re->blocksize;
   }
 

+ 37 - 1
thirdparty/pcre2/src/pcre2_string_utils.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -50,6 +50,42 @@ functions work only on 8-bit data. */
 #include "pcre2_internal.h"
 
 
+/*************************************************
+*    Emulated memmove() for systems without it   *
+*************************************************/
+
+/* This function can make use of bcopy() if it is available. Otherwise do it by
+steam, as there some non-Unix environments that lack both memmove() and
+bcopy(). */
+
+#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
+void *
+PRIV(memmove)(void *d, const void *s, size_t n)
+{
+#ifdef HAVE_BCOPY
+bcopy(s, d, n);
+return d;
+#else
+size_t i;
+unsigned char *dest = (unsigned char *)d;
+const unsigned char *src = (const unsigned char *)s;
+if (dest > src)
+  {
+  dest += n;
+  src += n;
+  for (i = 0; i < n; ++i) *(--dest) = *(--src);
+  return (void *)dest;
+  }
+else
+  {
+  for (i = 0; i < n; ++i) *dest++ = *src++;
+  return (void *)(dest - n);
+  }
+#endif   /* not HAVE_BCOPY */
+}
+#endif   /* not VPCOMPAT && not HAVE_MEMMOVE */
+
+
 /*************************************************
 *    Compare two zero-terminated PCRE2 strings   *
 *************************************************/

+ 4 - 2
thirdparty/pcre2/src/pcre2_study.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -707,6 +707,7 @@ for (;;)
     /* Skip these, but we need to add in the name length. */
 
     case OP_MARK:
+    case OP_COMMIT_ARG:
     case OP_PRUNE_ARG:
     case OP_SKIP_ARG:
     case OP_THEN_ARG:
@@ -956,6 +957,7 @@ do
       case OP_CIRCM:
       case OP_CLOSE:
       case OP_COMMIT:
+      case OP_COMMIT_ARG:
       case OP_COND:
       case OP_CREF:
       case OP_FALSE:
@@ -1274,7 +1276,7 @@ do
       break;
 
       /* Single character types set the bits and stop. Note that if PCRE2_UCP
-      is set, we do not see these op codes because \d etc are converted to
+      is set, we do not see these opcodes because \d etc are converted to
       properties. Therefore, these apply in the case when only characters less
       than 256 are recognized to match the types. */
 

+ 37 - 10
thirdparty/pcre2/src/pcre2_substitute.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -238,10 +238,12 @@ PCRE2_SPTR repend;
 PCRE2_SIZE extra_needed = 0;
 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
 PCRE2_SIZE *ovector;
+PCRE2_SIZE ovecsave[3];
 
 buff_offset = 0;
 lengthleft = buff_length = *blength;
 *blength = PCRE2_UNSET;
+ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
 
 /* Partial matching is not valid. */
 
@@ -361,13 +363,33 @@ do
     }
 
   /* Handle a successful match. Matches that use \K to end before they start
-  are not supported. */
-
-  if (ovector[1] < ovector[0])
+  or start before the current point in the subject are not supported. */
+  
+  if (ovector[1] < ovector[0] || ovector[0] < start_offset)
     {
     rc = PCRE2_ERROR_BADSUBSPATTERN;
     goto EXIT;
     }
+    
+  /* Check for the same match as previous. This is legitimate after matching an 
+  empty string that starts after the initial match offset. We have tried again
+  at the match point in case the pattern is one like /(?<=\G.)/ which can never
+  match at its starting point, so running the match achieves the bumpalong. If
+  we do get the same (null) match at the original match point, it isn't such a
+  pattern, so we now do the empty string magic. In all other cases, a repeat
+  match should never occur. */
+    
+  if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
+    {                                                                        
+    if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)     
+      {                                                                   
+      goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;                 
+      ovecsave[2] = start_offset;                                     
+      continue;    /* Back to the top of the loop */                        
+      }
+    rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
+    goto EXIT;   
+    }   
 
   /* Count substitutions with a paranoid check for integer overflow; surely no
   real call to this function would ever hit this! */
@@ -799,13 +821,18 @@ do
       } /* End handling a literal code unit */
     }   /* End of loop for scanning the replacement. */
 
-  /* The replacement has been copied to the output. Update the start offset to
-  point to the rest of the subject string. If we matched an empty string,
-  do the magic for global matches. */
-
-  start_offset = ovector[1];
-  goptions = (ovector[0] != ovector[1])? 0 :
+  /* The replacement has been copied to the output. Save the details of this
+  match. See above for how this data is used. If we matched an empty string, do
+  the magic for global matches. Finally, update the start offset to point to
+  the rest of the subject string. */
+  
+  ovecsave[0] = ovector[0];                                
+  ovecsave[1] = ovector[1];                                        
+  ovecsave[2] = start_offset;
+   
+  goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
+  start_offset = ovector[1];
   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
 
 /* Copy the rest of the subject. */

+ 196 - 182
thirdparty/pcre2/src/pcre2_tables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -137,9 +137,10 @@ const uint32_t PRIV(ucp_gentype)[] = {
 
 /* This table encodes the rules for finding the end of an extended grapheme
 cluster. Every code point has a grapheme break property which is one of the
-ucp_gbXX values defined in pcre2_ucp.h. The 2-dimensional table is indexed by
-the properties of two adjacent code points. The left property selects a word
-from the table, and the right property selects a bit from that word like this:
+ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
+10 and 11. The 2-dimensional table is indexed by the properties of two adjacent
+code points. The left property selects a word from the table, and the right
+property selects a bit from that word like this:
 
   PRIV(ucp_gbtable)[left-property] & (1 << right-property)
 
@@ -166,49 +167,41 @@ are implementing).
 
 6. Do not break after Prepend characters.
 
-7. Do not break within emoji modifier sequences (E_Base or E_Base_GAZ followed
-   by E_Modifier). Extend characters are allowed before the modifier; this
-   cannot be represented in this table, the code has to deal with it.
+7. Do not break within emoji modifier sequences or emoji zwj sequences. That
+   is, do not break between characters with the Extended_Pictographic property.
+   Extend and ZWJ characters are allowed between the characters; this cannot be
+   represented in this table, the code has to deal with it.
 
-8. Do not break within emoji zwj sequences (ZWJ followed by Glue_After_Zwj   or
-   E_Base_GAZ).
-
-9. Do not break within emoji flag sequences. That is, do not break between
+8. Do not break within emoji flag sequences. That is, do not break between
    regional indicator (RI) symbols if there are an odd number of RI characters
    before the break point. This table encodes "join RI characters"; the code
    has to deal with checking for previous adjoining RIs.
 
-10. Otherwise, break everywhere.
+9. Otherwise, break everywhere.
 */
 
 #define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
 
 const uint32_t PRIV(ucp_gbtable)[] = {
-   (1<<ucp_gbLF),                                           /*  0 CR */
-   0,                                                       /*  1 LF */
-   0,                                                       /*  2 Control */
-   ESZ,                                                     /*  3 Extend */
-   ESZ|(1<<ucp_gbPrepend)|                                  /*  4 Prepend */
+   (1<<ucp_gbLF),                                      /*  0 CR */
+   0,                                                  /*  1 LF */
+   0,                                                  /*  2 Control */
+   ESZ,                                                /*  3 Extend */
+   ESZ|(1<<ucp_gbPrepend)|                             /*  4 Prepend */
        (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
        (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
-       (1<<ucp_gbRegionalIndicator)|
-       (1<<ucp_gbE_Base)|(1<<ucp_gbE_Modifier)|
-       (1<<ucp_gbE_Base_GAZ)|
-       (1<<ucp_gbZWJ)|(1<<ucp_gbGlue_After_Zwj),
-   ESZ,                                                     /*  5 SpacingMark */
-   ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|             /*  6 L */
+       (1<<ucp_gbRegionalIndicator),
+   ESZ,                                                /*  5 SpacingMark */
+   ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|        /*  6 L */
        (1<<ucp_gbLVT),
-   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                           /*  7 V */
-   ESZ|(1<<ucp_gbT),                                        /*  8 T */
-   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                           /*  9 LV */
-   ESZ|(1<<ucp_gbT),                                        /* 10 LVT */
-   (1<<ucp_gbRegionalIndicator),                            /* 11 RegionalIndicator */
-   ESZ,                                                     /* 12 Other */
-   ESZ|(1<<ucp_gbE_Modifier),                               /* 13 E_Base */
-   ESZ,                                                     /* 14 E_Modifier */
-   ESZ|(1<<ucp_gbE_Modifier),                               /* 15 E_Base_GAZ */
-   ESZ|(1<<ucp_gbGlue_After_Zwj)|(1<<ucp_gbE_Base_GAZ),     /* 16 ZWJ */
-   ESZ                                                      /* 12 Glue_After_Zwj */
+   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                      /*  7 V */
+   ESZ|(1<<ucp_gbT),                                   /*  8 T */
+   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                      /*  9 LV */
+   ESZ|(1<<ucp_gbT),                                   /* 10 LVT */
+   (1<<ucp_gbRegionalIndicator),                       /* 11 RegionalIndicator */
+   ESZ,                                                /* 12 Other */
+   ESZ,                                                /* 13 ZWJ */
+   ESZ|(1<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
 };
 
 #undef ESZ
@@ -282,6 +275,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
 #define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
 #define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
 #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
 #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
 #define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
@@ -292,9 +286,11 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
 #define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
 #define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
+#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
 #define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
 #define STRING_Han0 STR_H STR_a STR_n "\0"
 #define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
+#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
 #define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
 #define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
 #define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
@@ -330,6 +326,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
 #define STRING_M0 STR_M "\0"
 #define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
+#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
 #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
 #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
 #define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
@@ -337,6 +334,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
 #define STRING_Mc0 STR_M STR_c "\0"
 #define STRING_Me0 STR_M STR_e "\0"
+#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
 #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
 #define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
 #define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
@@ -364,6 +362,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
 #define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
 #define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
+#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
 #define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
 #define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
 #define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
@@ -397,6 +396,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Sk0 STR_S STR_k "\0"
 #define STRING_Sm0 STR_S STR_m "\0"
 #define STRING_So0 STR_S STR_o "\0"
+#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
 #define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
 #define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
 #define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
@@ -469,6 +469,7 @@ const char PRIV(utt_names)[] =
   STRING_Cyrillic0
   STRING_Deseret0
   STRING_Devanagari0
+  STRING_Dogra0
   STRING_Duployan0
   STRING_Egyptian_Hieroglyphs0
   STRING_Elbasan0
@@ -479,9 +480,11 @@ const char PRIV(utt_names)[] =
   STRING_Grantha0
   STRING_Greek0
   STRING_Gujarati0
+  STRING_Gunjala_Gondi0
   STRING_Gurmukhi0
   STRING_Han0
   STRING_Hangul0
+  STRING_Hanifi_Rohingya0
   STRING_Hanunoo0
   STRING_Hatran0
   STRING_Hebrew0
@@ -517,6 +520,7 @@ const char PRIV(utt_names)[] =
   STRING_Lydian0
   STRING_M0
   STRING_Mahajani0
+  STRING_Makasar0
   STRING_Malayalam0
   STRING_Mandaic0
   STRING_Manichaean0
@@ -524,6 +528,7 @@ const char PRIV(utt_names)[] =
   STRING_Masaram_Gondi0
   STRING_Mc0
   STRING_Me0
+  STRING_Medefaidrin0
   STRING_Meetei_Mayek0
   STRING_Mende_Kikakui0
   STRING_Meroitic_Cursive0
@@ -551,6 +556,7 @@ const char PRIV(utt_names)[] =
   STRING_Old_North_Arabian0
   STRING_Old_Permic0
   STRING_Old_Persian0
+  STRING_Old_Sogdian0
   STRING_Old_South_Arabian0
   STRING_Old_Turkic0
   STRING_Oriya0
@@ -584,6 +590,7 @@ const char PRIV(utt_names)[] =
   STRING_Sk0
   STRING_Sm0
   STRING_So0
+  STRING_Sogdian0
   STRING_Sora_Sompeng0
   STRING_Soyombo0
   STRING_Sundanese0
@@ -656,154 +663,161 @@ const ucp_type_table PRIV(utt)[] = {
   { 265, PT_SC, ucp_Cyrillic },
   { 274, PT_SC, ucp_Deseret },
   { 282, PT_SC, ucp_Devanagari },
-  { 293, PT_SC, ucp_Duployan },
-  { 302, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 323, PT_SC, ucp_Elbasan },
-  { 331, PT_SC, ucp_Ethiopic },
-  { 340, PT_SC, ucp_Georgian },
-  { 349, PT_SC, ucp_Glagolitic },
-  { 360, PT_SC, ucp_Gothic },
-  { 367, PT_SC, ucp_Grantha },
-  { 375, PT_SC, ucp_Greek },
-  { 381, PT_SC, ucp_Gujarati },
-  { 390, PT_SC, ucp_Gurmukhi },
-  { 399, PT_SC, ucp_Han },
-  { 403, PT_SC, ucp_Hangul },
-  { 410, PT_SC, ucp_Hanunoo },
-  { 418, PT_SC, ucp_Hatran },
-  { 425, PT_SC, ucp_Hebrew },
-  { 432, PT_SC, ucp_Hiragana },
-  { 441, PT_SC, ucp_Imperial_Aramaic },
-  { 458, PT_SC, ucp_Inherited },
-  { 468, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 490, PT_SC, ucp_Inscriptional_Parthian },
-  { 513, PT_SC, ucp_Javanese },
-  { 522, PT_SC, ucp_Kaithi },
-  { 529, PT_SC, ucp_Kannada },
-  { 537, PT_SC, ucp_Katakana },
-  { 546, PT_SC, ucp_Kayah_Li },
-  { 555, PT_SC, ucp_Kharoshthi },
-  { 566, PT_SC, ucp_Khmer },
-  { 572, PT_SC, ucp_Khojki },
-  { 579, PT_SC, ucp_Khudawadi },
-  { 589, PT_GC, ucp_L },
-  { 591, PT_LAMP, 0 },
-  { 594, PT_SC, ucp_Lao },
-  { 598, PT_SC, ucp_Latin },
-  { 604, PT_SC, ucp_Lepcha },
-  { 611, PT_SC, ucp_Limbu },
-  { 617, PT_SC, ucp_Linear_A },
-  { 626, PT_SC, ucp_Linear_B },
-  { 635, PT_SC, ucp_Lisu },
-  { 640, PT_PC, ucp_Ll },
-  { 643, PT_PC, ucp_Lm },
-  { 646, PT_PC, ucp_Lo },
-  { 649, PT_PC, ucp_Lt },
-  { 652, PT_PC, ucp_Lu },
-  { 655, PT_SC, ucp_Lycian },
-  { 662, PT_SC, ucp_Lydian },
-  { 669, PT_GC, ucp_M },
-  { 671, PT_SC, ucp_Mahajani },
-  { 680, PT_SC, ucp_Malayalam },
-  { 690, PT_SC, ucp_Mandaic },
-  { 698, PT_SC, ucp_Manichaean },
-  { 709, PT_SC, ucp_Marchen },
-  { 717, PT_SC, ucp_Masaram_Gondi },
-  { 731, PT_PC, ucp_Mc },
-  { 734, PT_PC, ucp_Me },
-  { 737, PT_SC, ucp_Meetei_Mayek },
-  { 750, PT_SC, ucp_Mende_Kikakui },
-  { 764, PT_SC, ucp_Meroitic_Cursive },
-  { 781, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 802, PT_SC, ucp_Miao },
-  { 807, PT_PC, ucp_Mn },
-  { 810, PT_SC, ucp_Modi },
-  { 815, PT_SC, ucp_Mongolian },
-  { 825, PT_SC, ucp_Mro },
-  { 829, PT_SC, ucp_Multani },
-  { 837, PT_SC, ucp_Myanmar },
-  { 845, PT_GC, ucp_N },
-  { 847, PT_SC, ucp_Nabataean },
-  { 857, PT_PC, ucp_Nd },
-  { 860, PT_SC, ucp_New_Tai_Lue },
-  { 872, PT_SC, ucp_Newa },
-  { 877, PT_SC, ucp_Nko },
-  { 881, PT_PC, ucp_Nl },
-  { 884, PT_PC, ucp_No },
-  { 887, PT_SC, ucp_Nushu },
-  { 893, PT_SC, ucp_Ogham },
-  { 899, PT_SC, ucp_Ol_Chiki },
-  { 908, PT_SC, ucp_Old_Hungarian },
-  { 922, PT_SC, ucp_Old_Italic },
-  { 933, PT_SC, ucp_Old_North_Arabian },
-  { 951, PT_SC, ucp_Old_Permic },
-  { 962, PT_SC, ucp_Old_Persian },
-  { 974, PT_SC, ucp_Old_South_Arabian },
-  { 992, PT_SC, ucp_Old_Turkic },
-  { 1003, PT_SC, ucp_Oriya },
-  { 1009, PT_SC, ucp_Osage },
-  { 1015, PT_SC, ucp_Osmanya },
-  { 1023, PT_GC, ucp_P },
-  { 1025, PT_SC, ucp_Pahawh_Hmong },
-  { 1038, PT_SC, ucp_Palmyrene },
-  { 1048, PT_SC, ucp_Pau_Cin_Hau },
-  { 1060, PT_PC, ucp_Pc },
-  { 1063, PT_PC, ucp_Pd },
-  { 1066, PT_PC, ucp_Pe },
-  { 1069, PT_PC, ucp_Pf },
-  { 1072, PT_SC, ucp_Phags_Pa },
-  { 1081, PT_SC, ucp_Phoenician },
-  { 1092, PT_PC, ucp_Pi },
-  { 1095, PT_PC, ucp_Po },
-  { 1098, PT_PC, ucp_Ps },
-  { 1101, PT_SC, ucp_Psalter_Pahlavi },
-  { 1117, PT_SC, ucp_Rejang },
-  { 1124, PT_SC, ucp_Runic },
-  { 1130, PT_GC, ucp_S },
-  { 1132, PT_SC, ucp_Samaritan },
-  { 1142, PT_SC, ucp_Saurashtra },
-  { 1153, PT_PC, ucp_Sc },
-  { 1156, PT_SC, ucp_Sharada },
-  { 1164, PT_SC, ucp_Shavian },
-  { 1172, PT_SC, ucp_Siddham },
-  { 1180, PT_SC, ucp_SignWriting },
-  { 1192, PT_SC, ucp_Sinhala },
-  { 1200, PT_PC, ucp_Sk },
-  { 1203, PT_PC, ucp_Sm },
-  { 1206, PT_PC, ucp_So },
-  { 1209, PT_SC, ucp_Sora_Sompeng },
-  { 1222, PT_SC, ucp_Soyombo },
-  { 1230, PT_SC, ucp_Sundanese },
-  { 1240, PT_SC, ucp_Syloti_Nagri },
-  { 1253, PT_SC, ucp_Syriac },
-  { 1260, PT_SC, ucp_Tagalog },
-  { 1268, PT_SC, ucp_Tagbanwa },
-  { 1277, PT_SC, ucp_Tai_Le },
-  { 1284, PT_SC, ucp_Tai_Tham },
-  { 1293, PT_SC, ucp_Tai_Viet },
-  { 1302, PT_SC, ucp_Takri },
-  { 1308, PT_SC, ucp_Tamil },
-  { 1314, PT_SC, ucp_Tangut },
-  { 1321, PT_SC, ucp_Telugu },
-  { 1328, PT_SC, ucp_Thaana },
-  { 1335, PT_SC, ucp_Thai },
-  { 1340, PT_SC, ucp_Tibetan },
-  { 1348, PT_SC, ucp_Tifinagh },
-  { 1357, PT_SC, ucp_Tirhuta },
-  { 1365, PT_SC, ucp_Ugaritic },
-  { 1374, PT_SC, ucp_Vai },
-  { 1378, PT_SC, ucp_Warang_Citi },
-  { 1390, PT_ALNUM, 0 },
-  { 1394, PT_PXSPACE, 0 },
-  { 1398, PT_SPACE, 0 },
-  { 1402, PT_UCNC, 0 },
-  { 1406, PT_WORD, 0 },
-  { 1410, PT_SC, ucp_Yi },
-  { 1413, PT_GC, ucp_Z },
-  { 1415, PT_SC, ucp_Zanabazar_Square },
-  { 1432, PT_PC, ucp_Zl },
-  { 1435, PT_PC, ucp_Zp },
-  { 1438, PT_PC, ucp_Zs }
+  { 293, PT_SC, ucp_Dogra },
+  { 299, PT_SC, ucp_Duployan },
+  { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 329, PT_SC, ucp_Elbasan },
+  { 337, PT_SC, ucp_Ethiopic },
+  { 346, PT_SC, ucp_Georgian },
+  { 355, PT_SC, ucp_Glagolitic },
+  { 366, PT_SC, ucp_Gothic },
+  { 373, PT_SC, ucp_Grantha },
+  { 381, PT_SC, ucp_Greek },
+  { 387, PT_SC, ucp_Gujarati },
+  { 396, PT_SC, ucp_Gunjala_Gondi },
+  { 410, PT_SC, ucp_Gurmukhi },
+  { 419, PT_SC, ucp_Han },
+  { 423, PT_SC, ucp_Hangul },
+  { 430, PT_SC, ucp_Hanifi_Rohingya },
+  { 446, PT_SC, ucp_Hanunoo },
+  { 454, PT_SC, ucp_Hatran },
+  { 461, PT_SC, ucp_Hebrew },
+  { 468, PT_SC, ucp_Hiragana },
+  { 477, PT_SC, ucp_Imperial_Aramaic },
+  { 494, PT_SC, ucp_Inherited },
+  { 504, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 526, PT_SC, ucp_Inscriptional_Parthian },
+  { 549, PT_SC, ucp_Javanese },
+  { 558, PT_SC, ucp_Kaithi },
+  { 565, PT_SC, ucp_Kannada },
+  { 573, PT_SC, ucp_Katakana },
+  { 582, PT_SC, ucp_Kayah_Li },
+  { 591, PT_SC, ucp_Kharoshthi },
+  { 602, PT_SC, ucp_Khmer },
+  { 608, PT_SC, ucp_Khojki },
+  { 615, PT_SC, ucp_Khudawadi },
+  { 625, PT_GC, ucp_L },
+  { 627, PT_LAMP, 0 },
+  { 630, PT_SC, ucp_Lao },
+  { 634, PT_SC, ucp_Latin },
+  { 640, PT_SC, ucp_Lepcha },
+  { 647, PT_SC, ucp_Limbu },
+  { 653, PT_SC, ucp_Linear_A },
+  { 662, PT_SC, ucp_Linear_B },
+  { 671, PT_SC, ucp_Lisu },
+  { 676, PT_PC, ucp_Ll },
+  { 679, PT_PC, ucp_Lm },
+  { 682, PT_PC, ucp_Lo },
+  { 685, PT_PC, ucp_Lt },
+  { 688, PT_PC, ucp_Lu },
+  { 691, PT_SC, ucp_Lycian },
+  { 698, PT_SC, ucp_Lydian },
+  { 705, PT_GC, ucp_M },
+  { 707, PT_SC, ucp_Mahajani },
+  { 716, PT_SC, ucp_Makasar },
+  { 724, PT_SC, ucp_Malayalam },
+  { 734, PT_SC, ucp_Mandaic },
+  { 742, PT_SC, ucp_Manichaean },
+  { 753, PT_SC, ucp_Marchen },
+  { 761, PT_SC, ucp_Masaram_Gondi },
+  { 775, PT_PC, ucp_Mc },
+  { 778, PT_PC, ucp_Me },
+  { 781, PT_SC, ucp_Medefaidrin },
+  { 793, PT_SC, ucp_Meetei_Mayek },
+  { 806, PT_SC, ucp_Mende_Kikakui },
+  { 820, PT_SC, ucp_Meroitic_Cursive },
+  { 837, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 858, PT_SC, ucp_Miao },
+  { 863, PT_PC, ucp_Mn },
+  { 866, PT_SC, ucp_Modi },
+  { 871, PT_SC, ucp_Mongolian },
+  { 881, PT_SC, ucp_Mro },
+  { 885, PT_SC, ucp_Multani },
+  { 893, PT_SC, ucp_Myanmar },
+  { 901, PT_GC, ucp_N },
+  { 903, PT_SC, ucp_Nabataean },
+  { 913, PT_PC, ucp_Nd },
+  { 916, PT_SC, ucp_New_Tai_Lue },
+  { 928, PT_SC, ucp_Newa },
+  { 933, PT_SC, ucp_Nko },
+  { 937, PT_PC, ucp_Nl },
+  { 940, PT_PC, ucp_No },
+  { 943, PT_SC, ucp_Nushu },
+  { 949, PT_SC, ucp_Ogham },
+  { 955, PT_SC, ucp_Ol_Chiki },
+  { 964, PT_SC, ucp_Old_Hungarian },
+  { 978, PT_SC, ucp_Old_Italic },
+  { 989, PT_SC, ucp_Old_North_Arabian },
+  { 1007, PT_SC, ucp_Old_Permic },
+  { 1018, PT_SC, ucp_Old_Persian },
+  { 1030, PT_SC, ucp_Old_Sogdian },
+  { 1042, PT_SC, ucp_Old_South_Arabian },
+  { 1060, PT_SC, ucp_Old_Turkic },
+  { 1071, PT_SC, ucp_Oriya },
+  { 1077, PT_SC, ucp_Osage },
+  { 1083, PT_SC, ucp_Osmanya },
+  { 1091, PT_GC, ucp_P },
+  { 1093, PT_SC, ucp_Pahawh_Hmong },
+  { 1106, PT_SC, ucp_Palmyrene },
+  { 1116, PT_SC, ucp_Pau_Cin_Hau },
+  { 1128, PT_PC, ucp_Pc },
+  { 1131, PT_PC, ucp_Pd },
+  { 1134, PT_PC, ucp_Pe },
+  { 1137, PT_PC, ucp_Pf },
+  { 1140, PT_SC, ucp_Phags_Pa },
+  { 1149, PT_SC, ucp_Phoenician },
+  { 1160, PT_PC, ucp_Pi },
+  { 1163, PT_PC, ucp_Po },
+  { 1166, PT_PC, ucp_Ps },
+  { 1169, PT_SC, ucp_Psalter_Pahlavi },
+  { 1185, PT_SC, ucp_Rejang },
+  { 1192, PT_SC, ucp_Runic },
+  { 1198, PT_GC, ucp_S },
+  { 1200, PT_SC, ucp_Samaritan },
+  { 1210, PT_SC, ucp_Saurashtra },
+  { 1221, PT_PC, ucp_Sc },
+  { 1224, PT_SC, ucp_Sharada },
+  { 1232, PT_SC, ucp_Shavian },
+  { 1240, PT_SC, ucp_Siddham },
+  { 1248, PT_SC, ucp_SignWriting },
+  { 1260, PT_SC, ucp_Sinhala },
+  { 1268, PT_PC, ucp_Sk },
+  { 1271, PT_PC, ucp_Sm },
+  { 1274, PT_PC, ucp_So },
+  { 1277, PT_SC, ucp_Sogdian },
+  { 1285, PT_SC, ucp_Sora_Sompeng },
+  { 1298, PT_SC, ucp_Soyombo },
+  { 1306, PT_SC, ucp_Sundanese },
+  { 1316, PT_SC, ucp_Syloti_Nagri },
+  { 1329, PT_SC, ucp_Syriac },
+  { 1336, PT_SC, ucp_Tagalog },
+  { 1344, PT_SC, ucp_Tagbanwa },
+  { 1353, PT_SC, ucp_Tai_Le },
+  { 1360, PT_SC, ucp_Tai_Tham },
+  { 1369, PT_SC, ucp_Tai_Viet },
+  { 1378, PT_SC, ucp_Takri },
+  { 1384, PT_SC, ucp_Tamil },
+  { 1390, PT_SC, ucp_Tangut },
+  { 1397, PT_SC, ucp_Telugu },
+  { 1404, PT_SC, ucp_Thaana },
+  { 1411, PT_SC, ucp_Thai },
+  { 1416, PT_SC, ucp_Tibetan },
+  { 1424, PT_SC, ucp_Tifinagh },
+  { 1433, PT_SC, ucp_Tirhuta },
+  { 1441, PT_SC, ucp_Ugaritic },
+  { 1450, PT_SC, ucp_Vai },
+  { 1454, PT_SC, ucp_Warang_Citi },
+  { 1466, PT_ALNUM, 0 },
+  { 1470, PT_PXSPACE, 0 },
+  { 1474, PT_SPACE, 0 },
+  { 1478, PT_UCNC, 0 },
+  { 1482, PT_WORD, 0 },
+  { 1486, PT_SC, ucp_Yi },
+  { 1489, PT_GC, ucp_Z },
+  { 1491, PT_SC, ucp_Zanabazar_Square },
+  { 1508, PT_PC, ucp_Zl },
+  { 1511, PT_PC, ucp_Zp },
+  { 1514, PT_PC, ucp_Zs }
 };
 
 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 1378 - 1345
thirdparty/pcre2/src/pcre2_ucd.c


+ 27 - 21
thirdparty/pcre2/src/pcre2_ucp.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -100,27 +100,25 @@ enum {
   ucp_Zs     /* Space separator */
 };
 
-/* These are grapheme break properties. */
+/* These are grapheme break properties. The Extended Pictographic property
+comes from the emoji-data.txt file. */
 
 enum {
-  ucp_gbCR,                /*  0 */
-  ucp_gbLF,                /*  1 */
-  ucp_gbControl,           /*  2 */
-  ucp_gbExtend,            /*  3 */
-  ucp_gbPrepend,           /*  4 */
-  ucp_gbSpacingMark,       /*  5 */
-  ucp_gbL,                 /*  6 Hangul syllable type L */
-  ucp_gbV,                 /*  7 Hangul syllable type V */
-  ucp_gbT,                 /*  8 Hangul syllable type T */
-  ucp_gbLV,                /*  9 Hangul syllable type LV */
-  ucp_gbLVT,               /* 10 Hangul syllable type LVT */
-  ucp_gbRegionalIndicator, /* 11 */
-  ucp_gbOther,             /* 12 */
-  ucp_gbE_Base,            /* 13 */
-  ucp_gbE_Modifier,        /* 14 */
-  ucp_gbE_Base_GAZ,        /* 15 */
-  ucp_gbZWJ,               /* 16 */
-  ucp_gbGlue_After_Zwj     /* 17 */
+  ucp_gbCR,                    /*  0 */
+  ucp_gbLF,                    /*  1 */
+  ucp_gbControl,               /*  2 */
+  ucp_gbExtend,                /*  3 */
+  ucp_gbPrepend,               /*  4 */
+  ucp_gbSpacingMark,           /*  5 */
+  ucp_gbL,                     /*  6 Hangul syllable type L */
+  ucp_gbV,                     /*  7 Hangul syllable type V */
+  ucp_gbT,                     /*  8 Hangul syllable type T */
+  ucp_gbLV,                    /*  9 Hangul syllable type LV */
+  ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
+  ucp_gbRegionalIndicator,     /* 11 */
+  ucp_gbOther,                 /* 12 */
+  ucp_gbZWJ,                   /* 13 */
+  ucp_gbExtended_Pictographic  /* 14 */
 };
 
 /* These are the script identifications. */
@@ -274,7 +272,15 @@ enum {
   ucp_Masaram_Gondi,
   ucp_Nushu,
   ucp_Soyombo,
-  ucp_Zanabazar_Square
+  ucp_Zanabazar_Square,
+  /* New for Unicode 11.0.0 */
+  ucp_Dogra,
+  ucp_Gunjala_Gondi,
+  ucp_Hanifi_Rohingya,
+  ucp_Makasar,
+  ucp_Medefaidrin,
+  ucp_Old_Sogdian,
+  ucp_Sogdian
 };
 
 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

+ 23 - 6
thirdparty/pcre2/src/sljit/sljitConfigInternal.h

@@ -66,7 +66,7 @@
      SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
 
    Other macros:
-     SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
+     SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
      SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
 */
 
@@ -147,17 +147,23 @@
 #define SLJIT_CONFIG_UNSUPPORTED 1
 #endif
 
-#else /* !_WIN32 */
+#else /* _WIN32 */
 
 #if defined(_M_X64) || defined(__x86_64__)
 #define SLJIT_CONFIG_X86_64 1
+#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif (defined(_M_ARM) && _M_ARM >= 7)
+#define SLJIT_CONFIG_ARM_V7 1
 #elif defined(_ARM_)
 #define SLJIT_CONFIG_ARM_V5 1
+#elif defined(_M_ARM64) || defined(__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
 #else
 #define SLJIT_CONFIG_X86_32 1
 #endif
 
-#endif /* !WIN32 */
+#endif /* !_WIN32 */
 #endif /* SLJIT_CONFIG_AUTO */
 
 #if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@@ -324,6 +330,11 @@
 	sparc_cache_flush((from), (to))
 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 
+#elif defined _WIN32
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+	FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
+
 #else
 
 /* Calls __ARM_NR_cacheflush on ARM-Linux. */
@@ -371,12 +382,18 @@ typedef int sljit_sw;
 #define SLJIT_64BIT_ARCHITECTURE 1
 #define SLJIT_WORD_SHIFT 3
 #ifdef _WIN32
+#ifdef __GNUC__
+/* These types do not require windows.h */
+typedef unsigned long long sljit_uw;
+typedef long long sljit_sw;
+#else
 typedef unsigned __int64 sljit_uw;
 typedef __int64 sljit_sw;
-#else
+#endif
+#else /* !_WIN32 */
 typedef unsigned long int sljit_uw;
 typedef long int sljit_sw;
-#endif
+#endif /* _WIN32 */
 #endif
 
 typedef sljit_uw sljit_p;
@@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 
 #define SLJIT_NUMBER_OF_REGISTERS 26
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE 0
 
 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 

+ 8 - 1
thirdparty/pcre2/src/sljit/sljitExecAllocator.c

@@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 	void *retval;
 
 #ifdef MAP_ANON
-	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+
+	int flags = MAP_PRIVATE | MAP_ANON;
+
+#ifdef MAP_JIT
+	flags |= MAP_JIT;
+#endif
+
+	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
 #else
 	if (dev_zero < 0) {
 		if (open_dev_zero())

+ 9 - 1
thirdparty/pcre2/src/sljit/sljitLir.c

@@ -26,6 +26,13 @@
 
 #include "sljitLir.h"
 
+#ifdef _WIN32
+
+/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
+#include <windows.h>
+
+#endif /* _WIN32 */
+
 #if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
 
 /* These libraries are needed for the macros below. */
@@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
 
 #endif
 
-#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+	&& !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
 {

+ 10 - 10
thirdparty/pcre2/src/sljit/sljitLir.h

@@ -138,7 +138,7 @@ of sljitConfigInternal.h */
   be specified as scratch registers and the fifth one as saved register
   on the CPU above and any user code which requires four scratch
   registers can run unmodified. The SLJIT compiler automatically saves
-  the content of the two extra scrath register on the stack. Scratch
+  the content of the two extra scratch register on the stack. Scratch
   registers can also be preserved by saving their value on the stack
   but this needs to be done manually.
 
@@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
    be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
    register can hold any 32 or 64 bit value, and it is converted to a 32 bit
    compatible format first. This conversion is free (no instructions are
-   emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
+   emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
    value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
 
    Note: memory addressing always uses 64 bit values on 64 bit systems so
@@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
  */
 #define SLJIT_F32_OP		SLJIT_I32_OP
 
-/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
-   to the result of an operation. Other CPUs (MIPS) does not have status
+/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
+   to the result of an operation. Other CPUs (MIPS) do not have status
    flags, and results must be stored in registers. To cover both architecture
    types efficiently only two flags are defined by SLJIT:
 
@@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
 
    Using these flags can reduce the number of emitted instructions. E.g. a
    fast loop can be implemented by decreasing a counter register and set the
-   zero flag to jump back if the counter register is not reached zero.
+   zero flag to jump back if the counter register has not reached zero.
 
    Motivation: although CPUs can set a large number of flags, usually their
    values are ignored or only one of them is used. Emulating a large number
    of flags on systems without flag register is complicated so SLJIT
    instructions must specify the flag they want to use and only that flag
    will be emulated. The last arithmetic instruction can be repeated if
-   multiple flags needs to be checked.
+   multiple flags need to be checked.
 */
 
 /* Set Zero status flag. */
@@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 /* Starting index of opcodes for sljit_emit_op1. */
 #define SLJIT_OP1_BASE			32
 
-/* The MOV instruction transfer data from source to destination.
+/* The MOV instruction transfers data from source to destination.
 
    MOV instruction suffixes:
 
@@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
 #define SLJIT_FAST_CALL			25
 	/* Called function must be declared with the SLJIT_FUNC attribute. */
 #define SLJIT_CALL			26
-	/* Called function must be decalred with cdecl attribute.
+	/* Called function must be declared with cdecl attribute.
 	   This is the default attribute for C functions. */
 #define SLJIT_CALL_CDECL		27
 
@@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
 /* Set the destination address of the jump to this label. */
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
 
-/* Emit an indirect jump or fast call. Both direct and indirect form
+/* Emit an indirect jump or fast call.
    Direct form: set src to SLJIT_IMM() and srcw to the address
    Indirect form: any other valid addressing mode
     type must be between SLJIT_JUMP and SLJIT_FAST_CALL
@@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 #define SLJIT_MEM_POST		0x1000
 
 /* Emit a single memory load or store with update instruction. When the
-   requested instruction from is not supported by the CPU, it returns
+   requested instruction form is not supported by the CPU, it returns
    with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
    allows specializing tight loops based on the supported instruction
    forms (see SLJIT_MEM_SUPP flag).

+ 160 - 90
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c

@@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
 #define TMP_LR		(SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_SP		(SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_FP		(SLJIT_NUMBER_OF_REGISTERS + 5)
 
 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
 
 /* r18 - platform register, currently not used */
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
-	31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
+	31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
 };
 
 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 
 #define ADC 0x9a000000
 #define ADD 0x8b000000
+#define ADDE 0x8b200000
 #define ADDI 0x91000000
 #define AND 0x8a000000
 #define ANDI 0x92000000
@@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define FSUB 0x1e603800
 #define LDRI 0xf9400000
 #define LDP 0xa9400000
-#define LDP_PST 0xa8c00000
+#define LDP_PRE 0xa9c00000
+#define LDR_PRE 0xf8400c00
 #define LSLV 0x9ac02000
 #define LSRV 0x9ac02400
 #define MADD 0x9b000000
@@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
-	local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+	if (saved_regs_size & 0x8)
+		saved_regs_size += sizeof(sljit_sw);
+
 	local_size = (local_size + 15) & ~0xf;
-	compiler->local_size = local_size;
-
-	if (local_size <= (63 * sizeof(sljit_sw))) {
-		FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
-			| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
-		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
-		offs = (local_size - saved_regs_size) << (15 - 3);
-	} else {
-		offs = 0 << 15;
-		if (saved_regs_size & 0x8) {
-			offs = 1 << 15;
-			saved_regs_size += sizeof(sljit_sw);
-		}
-		local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
-		if (saved_regs_size > 0)
-			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
-	}
+	compiler->local_size = local_size + saved_regs_size;
+
+	FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+		| RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
+
+#ifdef _WIN32
+	if (local_size >= 4096)
+		FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
+	else if (local_size > 256)
+		FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
+#endif
 
 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
 	prev = -1;
+	offs = 2 << 15;
 	for (i = SLJIT_S0; i >= tmp; i--) {
 		if (prev == -1) {
-			if (!(offs & (1 << 15))) {
-				prev = i;
-				continue;
-			}
-			FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
-			offs += 1 << 15;
+			prev = i;
 			continue;
 		}
-		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 		offs += 2 << 15;
 		prev = -1;
 	}
 
 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
 		if (prev == -1) {
-			if (!(offs & (1 << 15))) {
-				prev = i;
-				continue;
-			}
-			FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
-			offs += 1 << 15;
+			prev = i;
 			continue;
 		}
-		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 		offs += 2 << 15;
 		prev = -1;
 	}
 
-	SLJIT_ASSERT(prev == -1);
+	if (prev != -1)
+		FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
 
-	if (compiler->local_size > (63 * sizeof(sljit_sw))) {
-		/* The local_size is already adjusted by the saved registers. */
-		if (local_size > 0xfff) {
-			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
-			local_size &= 0xfff;
-		}
-		if (local_size)
-			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
-		FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
-			| RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
-		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
-	}
+
+	FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
 
 	args = get_arg_count(arg_types);
 
@@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	if (args >= 3)
 		FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
 
+#ifdef _WIN32
+	if (local_size >= 4096) {
+		if (local_size < 4 * 4096) {
+			/* No need for a loop. */
+			if (local_size >= 2 * 4096) {
+				FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+				FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+				local_size -= 4096;
+			}
+
+			if (local_size >= 2 * 4096) {
+				FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+				FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+				local_size -= 4096;
+			}
+
+			FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+			local_size -= 4096;
+		}
+		else {
+			FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
+			FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+			FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
+			FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
+			FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+
+			local_size &= 0xfff;
+		}
+
+		if (local_size > 256) {
+			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
+			FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+		}
+		else if (local_size > 0)
+			FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
+
+		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+	}
+	else if (local_size > 256) {
+		FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+		FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+	}
+	else if (local_size > 0)
+		FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
+
+#else /* !_WIN32 */
+
+	/* The local_size does not include saved registers size. */
+	if (local_size > 0xfff) {
+		FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+		local_size &= 0xfff;
+	}
+	if (local_size != 0)
+		FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+#endif /* _WIN32 */
+
 	return SLJIT_SUCCESS;
 }
 
@@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
+	sljit_s32 saved_regs_size;
+
 	CHECK_ERROR();
 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
-	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
-	local_size = (local_size + 15) & ~0xf;
-	compiler->local_size = local_size;
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+	if (saved_regs_size & 0x8)
+		saved_regs_size += sizeof(sljit_sw);
+
+	compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
 	return SLJIT_SUCCESS;
 }
 
@@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
 
 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
 
-	local_size = compiler->local_size;
+	saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
+	if (saved_regs_size & 0x8)
+		saved_regs_size += sizeof(sljit_sw);
 
-	saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
-	if (local_size <= (63 * sizeof(sljit_sw)))
-		offs = (local_size - saved_regs_size) << (15 - 3);
+	local_size = compiler->local_size - saved_regs_size;
+
+	/* Load LR as early as possible. */
+	if (local_size == 0)
+		FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
+	else if (local_size < 63 * sizeof(sljit_sw)) {
+		FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+			| RN(SLJIT_SP) | (local_size << (15 - 3))));
+	}
 	else {
-		FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
-			| RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
-		offs = 0 << 15;
-		if (saved_regs_size & 0x8) {
-			offs = 1 << 15;
-			saved_regs_size += sizeof(sljit_sw);
-		}
-		local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
 		if (local_size > 0xfff) {
-			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+			FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
 			local_size &= 0xfff;
 		}
 		if (local_size)
-			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+			FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+		FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
 	}
 
 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
 	prev = -1;
+	offs = 2 << 15;
 	for (i = SLJIT_S0; i >= tmp; i--) {
 		if (prev == -1) {
-			if (!(offs & (1 << 15))) {
-				prev = i;
-				continue;
-			}
-			FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
-			offs += 1 << 15;
+			prev = i;
 			continue;
 		}
-		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 		offs += 2 << 15;
 		prev = -1;
 	}
 
 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
 		if (prev == -1) {
-			if (!(offs & (1 << 15))) {
-				prev = i;
-				continue;
-			}
-			FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
-			offs += 1 << 15;
+			prev = i;
 			continue;
 		}
-		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
 		offs += 2 << 15;
 		prev = -1;
 	}
 
-	SLJIT_ASSERT(prev == -1);
+	if (prev != -1)
+		FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
 
-	if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
-		FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
-			| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
-	} else if (saved_regs_size > 0) {
-		FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
-	}
-
-	FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
-	return SLJIT_SUCCESS;
+	/* These two can be executed in parallel. */
+	FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
+	return push_inst(compiler, RET | RN(TMP_LR));
 }
 
 /* --------------------------------------------------------------------- */
@@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
 	return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
 }
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
+{
+	sljit_s32 dst_reg;
+	sljit_ins ins;
+
+	CHECK_ERROR();
+	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
+
+	SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
+
+	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+	if (offset <= 0xffffff && offset >= -0xffffff) {
+		ins = ADDI;
+		if (offset < 0) {
+			offset = -offset;
+			ins = SUBI;
+		}
+
+		if (offset <= 0xfff)
+			FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
+		else {
+			FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
+
+			offset &= 0xfff;
+			if (offset != 0)
+				FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
+		}
+	}
+	else {
+		FAIL_IF(load_immediate (compiler, dst_reg, offset));
+		/* Add extended register form. */
+		FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
+	}
+
+	if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
+		return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
+	return SLJIT_SUCCESS;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
 	struct sljit_const *const_;

+ 108 - 4
thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c

@@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define ASRSI		0x1000
 #define ASR_W		0xfa40f000
 #define ASR_WI		0xea4f0020
+#define BCC		0xd000
 #define BICI		0xf0200000
 #define BKPT		0xbe00
 #define BLX		0x4780
@@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define EORS		0x4040
 #define EOR_W		0xea800000
 #define IT		0xbf00
+#define LDRI		0xf8500800
 #define LSLS		0x4080
 #define LSLSI		0x0000
 #define LSL_W		0xfa00f000
@@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define SBCI		0xf1600000
 #define SBCS		0x4180
 #define SBC_W		0xeb600000
+#define SDIV		0xfb90f0f0
 #define SMULL		0xfb800000
 #define STR_SP		0x9000
 #define SUBS		0x1a00
@@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define SXTH		0xb200
 #define SXTH_W		0xfa0ff080
 #define TST		0x4200
+#define UDIV		0xfbb0f0f0
 #define UMULL		0xfba00000
 #define UXTB		0xb2c0
 #define UXTB_W		0xfa5ff080
@@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
 
 	/* Really complex instruction form for branches. */
 	s = (diff >> 23) & 0x1;
-	j1 = (~(diff >> 21) ^ s) & 0x1;
-	j2 = (~(diff >> 22) ^ s) & 0x1;
+	j1 = (~(diff >> 22) ^ s) & 0x1;
+	j2 = (~(diff >> 21) ^ s) & 0x1;
 	jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
 	jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
 
@@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
 {
 	sljit_uw tmp;
 
+	/* MOVS cannot be used since it destroy flags. */
+
 	if (imm >= 0x10000) {
 		tmp = get_imm(imm);
 		if (tmp != INVALID_IMM)
@@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 {
 	sljit_s32 args, size, i, tmp;
 	sljit_ins push = 0;
+#ifdef _WIN32
+	sljit_uw imm;
+#endif
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
 	local_size = ((size + local_size + 7) & ~7) - size;
 	compiler->local_size = local_size;
+
+#ifdef _WIN32
+	if (local_size >= 256) {
+		if (local_size > 4096)
+			imm = get_imm(4096);
+		else
+			imm = get_imm(local_size & ~0xff);
+
+		SLJIT_ASSERT(imm != INVALID_IMM);
+		FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
+	}
+#else
 	if (local_size > 0) {
 		if (local_size <= (127 << 2))
 			FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
 		else
 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
 	}
+#endif
 
 	args = get_arg_count(arg_types);
 
@@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	if (args >= 3)
 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
 
+#ifdef _WIN32
+	if (local_size >= 256) {
+		if (local_size > 4096) {
+			imm = get_imm(4096);
+			SLJIT_ASSERT(imm != INVALID_IMM);
+
+			if (local_size < 4 * 4096) {
+				if (local_size > 2 * 4096) {
+					FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+					FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+					local_size -= 4096;
+				}
+
+				if (local_size > 2 * 4096) {
+					FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+					FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+					local_size -= 4096;
+				}
+
+				FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+				local_size -= 4096;
+
+				SLJIT_ASSERT(local_size > 0);
+			}
+			else {
+				FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
+				FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+				SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
+				FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
+				FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
+
+				local_size &= 0xfff;
+
+				if (local_size != 0)
+					FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+			}
+
+			if (local_size >= 256) {
+				imm = get_imm(local_size & ~0xff);
+				SLJIT_ASSERT(imm != INVALID_IMM);
+
+				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+			}
+		}
+
+		local_size &= 0xff;
+		FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
+
+		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
+	}
+	else if (local_size > 0)
+		FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
+#endif
+
 	return SLJIT_SUCCESS;
 }
 
@@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
 /*  Operators                                                            */
 /* --------------------------------------------------------------------- */
 
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#if defined(__GNUC__)
+#ifdef _WIN32
+extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
+extern long long __rt_sdiv(int denominator, int numerator);
+#elif defined(__GNUC__)
 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
 extern int __aeabi_idivmod(int numerator, int denominator);
 #else
@@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
 }
 #endif
 
+#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 {
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
 	sljit_sw saved_reg_list[3];
 	sljit_sw saved_reg_count;
+#endif
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op0(compiler, op));
@@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 			| (reg_map[SLJIT_R0] << 12)
 			| (reg_map[SLJIT_R0] << 16)
 			| reg_map[SLJIT_R1]);
+#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
+	case SLJIT_DIVMOD_UW:
+	case SLJIT_DIVMOD_SW:
+		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+		FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+		FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+		return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
+	case SLJIT_DIV_UW:
+	case SLJIT_DIV_SW:
+		return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
+#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
 	case SLJIT_DIVMOD_UW:
 	case SLJIT_DIVMOD_SW:
 	case SLJIT_DIV_UW:
@@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 			}
 		}
 
-#if defined(__GNUC__)
+#ifdef _WIN32
+		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
+		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
+		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
+#elif defined(__GNUC__)
 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
 #else
@@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
 		}
 		return SLJIT_SUCCESS;
+#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
 	}
 
 	return SLJIT_SUCCESS;

+ 2 - 2
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c

@@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_ins ins = NOP;
 	sljit_u8 offsets[4];
 
-	SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
 
 	arg_types >>= SLJIT_DEF_SHIFT;
 
@@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			else if (arg_count != word_arg_count)
 				ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
 			else if (arg_count == 1)
-				ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+				ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
 
 			arg_count--;
 			word_arg_count--;

+ 2 - 2
thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c

@@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_ins prev_ins = NOP;
 	sljit_ins ins = NOP;
 
-	SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
 
 	arg_types >>= SLJIT_DEF_SHIFT;
 
@@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			if (arg_count != word_arg_count)
 				ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
 			else if (arg_count == 1)
-				ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+				ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
 			arg_count--;
 			word_arg_count--;
 			break;

+ 39 - 9
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c

@@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
 #define RETURN_ADDR_REG	31
 
 /* Flags are kept in volatile registers. */
-#define EQUAL_FLAG	31
+#define EQUAL_FLAG	3
 #define OTHER_FLAG	1
 
 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
-	0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+	0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
 };
 
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 		/* Frequent case. */
 		FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
 		base = S(SLJIT_SP);
+		offs = local_size - (sljit_sw)sizeof(sljit_sw);
 	}
 	else {
-		FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+		FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
 		FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
-		FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
+		FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
 		base = S(TMP_REG2);
 		local_size = 0;
+		offs = -(sljit_sw)sizeof(sljit_sw);
 	}
 
-	offs = local_size - (sljit_sw)(sizeof(sljit_sw));
 	FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
 
 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
 		tmp_ar = reg_ar;
 		delay_slot = reg_ar;
-	} else {
+	}
+	else {
 		tmp_ar = DR(TMP_REG1);
 		delay_slot = MOVABLE_INS;
 	}
@@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
 
 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
 {
+	sljit_s32 tmp_ar, base, delay_slot;
+
 	if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
 		return compiler->error;
-	compiler->cache_arg = 0;
-	compiler->cache_argw = 0;
-	return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+
+	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+		tmp_ar = reg_ar;
+		delay_slot = reg_ar;
+	}
+	else {
+		tmp_ar = DR(TMP_REG1);
+		delay_slot = MOVABLE_INS;
+	}
+	base = arg & REG_MASK;
+
+	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+		argw &= 0x3;
+
+		if (SLJIT_UNLIKELY(argw)) {
+			FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
+			FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+		}
+		else
+			FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
+		return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+	}
+
+	FAIL_IF(load_immediate(compiler, tmp_ar, argw));
+
+	if (base != 0)
+		FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+
+	return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
 }
 
 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)

+ 51 - 28
thirdparty/pcre2/src/sljit/sljitNativeX86_32.c

@@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 
 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
 	if (args > 0) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+		inst += 2;
 	}
 	if (args > 1) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+		inst += 2;
 	}
 	if (args > 2) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
-		*inst++ = 0x24;
-		*inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
+		inst[2] = 0x24;
+		inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
 	}
 #else
 	if (args > 0) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
-		*inst++ = sizeof(sljit_sw) * 2;
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
+		inst[2] = sizeof(sljit_sw) * 2;
+		inst += 3;
 	}
 	if (args > 1) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
-		*inst++ = sizeof(sljit_sw) * 3;
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
+		inst[2] = sizeof(sljit_sw) * 3;
+		inst += 3;
 	}
 	if (args > 2) {
-		*inst++ = MOV_r_rm;
-		*inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
-		*inst++ = sizeof(sljit_sw) * 4;
+		inst[0] = MOV_r_rm;
+		inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
+		inst[2] = sizeof(sljit_sw) * 4;
 	}
 #endif
 
@@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	compiler->local_size = local_size;
 
 #ifdef _WIN32
-	if (local_size > 1024) {
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
-		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
-#else
-		/* Space for a single argument. This amount is excluded when the stack is allocated below. */
-		local_size -= sizeof(sljit_sw);
-		FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
-		FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
-			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
-#endif
-		FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+	if (local_size > 0) {
+		if (local_size <= 4 * 4096) {
+			if (local_size > 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+			if (local_size > 2 * 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+			if (local_size > 3 * 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
+		}
+		else {
+			EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+			EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+			SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+			FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+				SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+			FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+				SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
+
+			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+			FAIL_IF(!inst);
+
+			INC_SIZE(2);
+			inst[0] = JNE_i8;
+			inst[1] = (sljit_s8) -16;
+		}
+
+		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
 	}
 #endif
 

+ 55 - 65
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c

@@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
 
+	compiler->mode32 = 0;
+
 #ifdef _WIN64
 	/* Two/four register slots for parameters plus space for xmm6 register if needed. */
 	if (fscratches >= 6 || fsaveds >= 1)
@@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 
 #ifndef _WIN64
 		if (args > 0) {
-			*inst++ = REX_W;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+			inst[0] = REX_W;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+			inst += 3;
 		}
 		if (args > 1) {
-			*inst++ = REX_W | REX_R;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+			inst[0] = REX_W | REX_R;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+			inst += 3;
 		}
 		if (args > 2) {
-			*inst++ = REX_W | REX_R;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
+			inst[0] = REX_W | REX_R;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
 		}
 #else
 		if (args > 0) {
-			*inst++ = REX_W;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+			inst[0] = REX_W;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+			inst += 3;
 		}
 		if (args > 1) {
-			*inst++ = REX_W;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+			inst[0] = REX_W;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+			inst += 3;
 		}
 		if (args > 2) {
-			*inst++ = REX_W | REX_B;
-			*inst++ = MOV_r_rm;
-			*inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
+			inst[0] = REX_W | REX_B;
+			inst[1] = MOV_r_rm;
+			inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
 		}
 #endif
 	}
@@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	compiler->local_size = local_size;
 
 #ifdef _WIN64
-	if (local_size > 1024) {
-		/* Allocate stack for the callback, which grows the stack. */
-		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
-		FAIL_IF(!inst);
-		INC_SIZE(4 + (3 + sizeof(sljit_s32)));
-		*inst++ = REX_W;
-		*inst++ = GROUP_BINARY_83;
-		*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
-		/* Allocated size for registers must be divisible by 8. */
-		SLJIT_ASSERT(!(saved_register_size & 0x7));
-		/* Aligned to 16 byte. */
-		if (saved_register_size & 0x8) {
-			*inst++ = 5 * sizeof(sljit_sw);
-			local_size -= 5 * sizeof(sljit_sw);
-		} else {
-			*inst++ = 4 * sizeof(sljit_sw);
-			local_size -= 4 * sizeof(sljit_sw);
-		}
-		/* Second instruction */
-		SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
-		*inst++ = REX_W;
-		*inst++ = MOV_rm_i32;
-		*inst++ = MOD_REG | reg_lmap[SLJIT_R0];
-		sljit_unaligned_store_s32(inst, local_size);
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
-			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
-		compiler->skip_checks = 1;
-#endif
-		FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
-	}
-#endif
-
 	if (local_size > 0) {
-		if (local_size <= 127) {
-			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
-			FAIL_IF(!inst);
-			INC_SIZE(4);
-			*inst++ = REX_W;
-			*inst++ = GROUP_BINARY_83;
-			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
-			*inst++ = local_size;
+		if (local_size <= 4 * 4096) {
+			if (local_size > 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+			if (local_size > 2 * 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+			if (local_size > 3 * 4096)
+				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
 		}
 		else {
-			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+			EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+			SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+			FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+				SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+			FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+				TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
+
+			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
 			FAIL_IF(!inst);
-			INC_SIZE(7);
-			*inst++ = REX_W;
-			*inst++ = GROUP_BINARY_81;
-			*inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
-			sljit_unaligned_store_s32(inst, local_size);
-			inst += sizeof(sljit_s32);
+
+			INC_SIZE(2);
+			inst[0] = JNE_i8;
+			inst[1] = (sljit_s8) -19;
 		}
+
+		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
+	}
+#endif
+
+	if (local_size > 0) {
+		FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+			SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
 	}
 
 #ifdef _WIN64

+ 0 - 17
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c

@@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
 
-#ifdef _WIN32
-#include <malloc.h>
-
-static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
-{
-	/* Workaround for calling the internal _chkstk() function on Windows.
-	This function touches all 4k pages belongs to the requested stack space,
-	which size is passed in local_size. This is necessary on Windows where
-	the stack can only grow in 4k steps. However, this function just burn
-	CPU cycles if the stack is large enough. However, you don't know it in
-	advance, so it must always be called. I think this is a bad design in
-	general even if it has some reasons. */
-	*(volatile sljit_s32*)alloca(local_size) = 0;
-}
-
-#endif
-
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
 #include "sljitNativeX86_32.c"
 #else

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است