Browse Source

Merge pull request #38363 from akien-mga/pcre2-10.34

pcre2: Update to upstream version 10.34
Rémi Verschelde 5 years ago
parent
commit
f870118323
39 changed files with 4799 additions and 2465 deletions
  1. 1 2
      thirdparty/README.md
  2. 3 3
      thirdparty/pcre2/src/config.h
  3. 14 5
      thirdparty/pcre2/src/pcre2.h
  4. 7 0
      thirdparty/pcre2/src/pcre2_auto_possess.c
  5. 343 110
      thirdparty/pcre2/src/pcre2_compile.c
  6. 1 1
      thirdparty/pcre2/src/pcre2_context.c
  7. 99 30
      thirdparty/pcre2/src/pcre2_dfa_match.c
  8. 3 0
      thirdparty/pcre2/src/pcre2_error.c
  9. 60 45
      thirdparty/pcre2/src/pcre2_internal.h
  10. 6 4
      thirdparty/pcre2/src/pcre2_intmodedep.h
  11. 264 824
      thirdparty/pcre2/src/pcre2_jit_compile.c
  12. 0 1
      thirdparty/pcre2/src/pcre2_jit_match.c
  13. 321 0
      thirdparty/pcre2/src/pcre2_jit_neon_inc.h
  14. 993 0
      thirdparty/pcre2/src/pcre2_jit_simd_inc.h
  15. 11 0
      thirdparty/pcre2/src/pcre2_maketables.c
  16. 456 147
      thirdparty/pcre2/src/pcre2_match.c
  17. 14 1
      thirdparty/pcre2/src/pcre2_match_data.c
  18. 235 71
      thirdparty/pcre2/src/pcre2_study.c
  19. 164 152
      thirdparty/pcre2/src/pcre2_tables.c
  20. 898 863
      thirdparty/pcre2/src/pcre2_ucd.c
  21. 6 1
      thirdparty/pcre2/src/pcre2_ucp.h
  22. 4 0
      thirdparty/pcre2/src/sljit/sljitConfigInternal.h
  23. 24 4
      thirdparty/pcre2/src/sljit/sljitExecAllocator.c
  24. 68 1
      thirdparty/pcre2/src/sljit/sljitLir.c
  25. 19 3
      thirdparty/pcre2/src/sljit/sljitLir.h
  26. 92 34
      thirdparty/pcre2/src/sljit/sljitNativeARM_32.c
  27. 91 20
      thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
  28. 59 17
      thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
  29. 2 0
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
  30. 132 23
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
  31. 2 0
      thirdparty/pcre2/src/sljit/sljitNativePPC_32.c
  32. 0 3
      thirdparty/pcre2/src/sljit/sljitNativePPC_64.c
  33. 164 50
      thirdparty/pcre2/src/sljit/sljitNativePPC_common.c
  34. 2 0
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
  35. 76 26
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
  36. 3 1
      thirdparty/pcre2/src/sljit/sljitNativeX86_32.c
  37. 53 1
      thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
  38. 103 22
      thirdparty/pcre2/src/sljit/sljitNativeX86_common.c
  39. 6 0
      thirdparty/pcre2/src/sljit/sljitUtils.c

+ 1 - 2
thirdparty/README.md

@@ -457,14 +457,13 @@ Files extracted from upstream source:
 ## pcre2
 
 - Upstream: http://www.pcre.org
-- Version: 10.33
+- Version: 10.34
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
 
 - Files listed in the file NON-AUTOTOOLS-BUILD steps 1-4
 - All .h files in src/ apart from pcre2posix.h
-- src/pcre2_jit_compile.c
 - src/pcre2_jit_match.c
 - src/pcre2_jit_misc.c
 - src/sljit/*

+ 3 - 3
thirdparty/pcre2/src/config.h

@@ -218,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.33"
+#define PACKAGE_STRING "PCRE2 10.34"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -227,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.33"
+#define PACKAGE_VERSION "10.34"
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -352,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 
 /* Version number of package */
-#define VERSION "10.33"
+#define VERSION "10.34"
 
 /* Define to 1 if on MINIX. */
 /* #undef _MINIX */

+ 14 - 5
thirdparty/pcre2/src/pcre2.h

@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, second API, to be
 #included by applications that call PCRE2 functions.
 
-           Copyright (c) 2016-2018 University of Cambridge
+           Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           33
+#define PCRE2_MINOR           34
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2019-04-16
+#define PCRE2_DATE            2019-11-21
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -142,6 +142,7 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_USE_OFFSET_LIMIT    0x00800000u  /*   J M D */
 #define PCRE2_EXTENDED_MORE       0x01000000u  /* C       */
 #define PCRE2_LITERAL             0x02000000u  /* C       */
+#define PCRE2_MATCH_INVALID_UTF   0x04000000u  /*   J M D */
 
 /* An additional compile options word is available in the compile context. */
 
@@ -305,6 +306,8 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS      194
 #define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN        195
 #define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE       196
+#define PCRE2_ERROR_TOO_MANY_CAPTURES              197
+#define PCRE2_ERROR_CONDITION_ATOMIC_ASSERTION_EXPECTED  198
 
 
 /* "Expected" matching error codes: no match and partial match. */
@@ -390,6 +393,7 @@ released, the numbers must not be changed. */
 #define PCRE2_ERROR_HEAPLIMIT         (-63)
 #define PCRE2_ERROR_CONVERT_SYNTAX    (-64)
 #define PCRE2_ERROR_INTERNAL_DUPMATCH (-65)
+#define PCRE2_ERROR_DFA_UINVALID_UTF  (-66)
 
 
 /* Request types for pcre2_pattern_info() */
@@ -580,7 +584,7 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
-  pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
+  pcre2_set_character_tables(pcre2_compile_context *, const uint8_t *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -675,6 +679,8 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
   pcre2_match_data_free(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SPTR PCRE2_CALL_CONVENTION \
   pcre2_get_mark(pcre2_match_data *); \
+PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
+  pcre2_get_match_data_size(pcre2_match_data *); \
 PCRE2_EXP_DECL uint32_t PCRE2_CALL_CONVENTION \
   pcre2_get_ovector_count(pcre2_match_data *); \
 PCRE2_EXP_DECL PCRE2_SIZE PCRE2_CALL_CONVENTION \
@@ -773,7 +779,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
 PCRE2_EXP_DECL const uint8_t PCRE2_CALL_CONVENTION \
   *pcre2_maketables(pcre2_general_context *); \
-
+PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
+  pcre2_maketables_free(pcre2_general_context *, const uint8_t *);
 
 /* Define macros that generate width-specific names from generic versions. The
 three-level macro scheme is necessary to get the macros expanded when we want
@@ -838,6 +845,7 @@ pcre2_compile are called by application code. */
 #define pcre2_general_context_free            PCRE2_SUFFIX(pcre2_general_context_free_)
 #define pcre2_get_error_message               PCRE2_SUFFIX(pcre2_get_error_message_)
 #define pcre2_get_mark                        PCRE2_SUFFIX(pcre2_get_mark_)
+#define pcre2_get_match_data_size             PCRE2_SUFFIX(pcre2_get_match_data_size_)
 #define pcre2_get_ovector_pointer             PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
 #define pcre2_get_ovector_count               PCRE2_SUFFIX(pcre2_get_ovector_count_)
 #define pcre2_get_startchar                   PCRE2_SUFFIX(pcre2_get_startchar_)
@@ -848,6 +856,7 @@ pcre2_compile are called by application code. */
 #define pcre2_jit_stack_create                PCRE2_SUFFIX(pcre2_jit_stack_create_)
 #define pcre2_jit_stack_free                  PCRE2_SUFFIX(pcre2_jit_stack_free_)
 #define pcre2_maketables                      PCRE2_SUFFIX(pcre2_maketables_)
+#define pcre2_maketables_free                 PCRE2_SUFFIX(pcre2_maketables_free_)
 #define pcre2_match                           PCRE2_SUFFIX(pcre2_match_)
 #define pcre2_match_context_copy              PCRE2_SUFFIX(pcre2_match_context_copy_)
 #define pcre2_match_context_create            PCRE2_SUFFIX(pcre2_match_context_create_)

+ 7 - 0
thirdparty/pcre2/src/pcre2_auto_possess.c

@@ -624,6 +624,13 @@ for(;;)
       case OP_ASSERTBACK_NOT:
       case OP_ONCE:
       return !entered_a_group;
+
+      /* Non-atomic assertions - don't possessify last iterator. This needs
+      more thought. */
+
+      case OP_ASSERT_NA:
+      case OP_ASSERTBACK_NA:
+      return FALSE;
       }
 
     /* Skip over the bracket and inspect what comes next. */

File diff suppressed because it is too large
+ 343 - 110
thirdparty/pcre2/src/pcre2_compile.c


+ 1 - 1
thirdparty/pcre2/src/pcre2_context.c

@@ -323,7 +323,7 @@ data. */
 
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_character_tables(pcre2_compile_context *ccontext,
-  const unsigned char *tables)
+  const uint8_t *tables)
 {
 ccontext->tables = tables;
 return 0;

+ 99 - 30
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -173,6 +173,8 @@ static const uint8_t coptable[] = {
   0,                             /* Assert not                             */
   0,                             /* Assert behind                          */
   0,                             /* Assert behind not                      */
+  0,                             /* NA assert                              */
+  0,                             /* NA assert behind                       */
   0,                             /* ONCE                                   */
   0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
@@ -248,6 +250,8 @@ static const uint8_t poptable[] = {
   0,                             /* Assert not                             */
   0,                             /* Assert behind                          */
   0,                             /* Assert behind not                      */
+  0,                             /* NA assert                              */
+  0,                             /* NA assert behind                       */
   0,                             /* ONCE                                   */
   0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
@@ -962,7 +966,7 @@ for (;;)
       if (ptr >= end_subject)
         {
         if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
-          could_continue = TRUE;
+          return PCRE2_ERROR_PARTIAL;
         else { ADD_ACTIVE(state_offset + 1, 0); }
         }
       break;
@@ -1011,10 +1015,12 @@ for (;;)
 
       /*-----------------------------------------------------------------*/
       case OP_EODN:
-      if (clen == 0 && (mb->moptions & PCRE2_PARTIAL_HARD) != 0)
-        could_continue = TRUE;
-      else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
-        { ADD_ACTIVE(state_offset + 1, 0); }
+      if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
+        {
+        if ((mb->moptions & PCRE2_PARTIAL_HARD) != 0)
+          return PCRE2_ERROR_PARTIAL;
+        ADD_ACTIVE(state_offset + 1, 0);
+        }
       break;
 
       /*-----------------------------------------------------------------*/
@@ -3152,8 +3158,8 @@ for (;;)
 
   /* We have finished the processing at the current subject character. If no
   new states have been set for the next character, we have found all the
-  matches that we are going to find. If we are at the top level and partial
-  matching has been requested, check for appropriate conditions.
+  matches that we are going to find. If partial matching has been requested,
+  check for appropriate conditions.
 
   The "forced_ fail" variable counts the number of (*F) encountered for the
   character. If it is equal to the original active_count (saved in
@@ -3165,22 +3171,24 @@ for (;;)
 
   if (new_count <= 0)
     {
-    if (rlevel == 1 &&                               /* Top level, and */
-        could_continue &&                            /* Some could go on, and */
+    if (could_continue &&                            /* Some could go on, and */
         forced_fail != workspace[1] &&               /* Not all forced fail & */
         (                                            /* either... */
         (mb->moptions & PCRE2_PARTIAL_HARD) != 0      /* Hard partial */
         ||                                           /* or... */
         ((mb->moptions & PCRE2_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
-         match_count < 0)                            /* no matches */
+         match_count < 0)                             /* no matches */
         ) &&                                         /* And... */
         (
-        partial_newline ||                           /* Either partial NL */
-          (                                          /* or ... */
-          ptr >= end_subject &&                /* End of subject and */
-          ptr > mb->start_used_ptr)            /* Inspected non-empty string */
+        partial_newline ||                   /* Either partial NL */
+          (                                  /* or ... */
+          ptr >= end_subject &&              /* End of subject and */
+            (                                  /* either */
+            ptr > mb->start_used_ptr ||        /* Inspected non-empty string */
+            mb->allowemptypartial              /* or pattern has lookbehind */
+            )                                  /* or could match empty */
           )
-        )
+        ))
       match_count = PCRE2_ERROR_PARTIAL;
     break;  /* Exit from loop along the subject string */
     }
@@ -3246,6 +3254,11 @@ BOOL utf, anchored, startline, firstline;
 BOOL has_first_cu = FALSE;
 BOOL has_req_cu = FALSE;
 
+#if PCRE2_CODE_UNIT_WIDTH == 8
+BOOL memchr_not_found_first_cu = FALSE;
+BOOL memchr_not_found_first_cu2 = FALSE;
+#endif
+
 PCRE2_UCHAR first_cu = 0;
 PCRE2_UCHAR first_cu2 = 0;
 PCRE2_UCHAR req_cu = 0;
@@ -3295,6 +3308,11 @@ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
   return PCRE2_ERROR_BADOPTION;
 
+/* Invalid UTF support is not available for DFA matching. */
+
+if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
+  return PCRE2_ERROR_DFA_UINVALID_UTF;
+
 /* Check that the first field in the block is the magic number. If it is not,
 return with PCRE2_ERROR_BADMAGIC. */
 
@@ -3404,6 +3422,8 @@ mb->tables = re->tables;
 mb->start_subject = subject;
 mb->end_subject = end_subject;
 mb->start_offset = start_offset;
+mb->allowemptypartial = (re->max_lookbehind > 0) ||
+  (re->flags & PCRE2_MATCH_EMPTY) != 0;
 mb->moptions = options;
 mb->poptions = re->overall_options;
 mb->match_call_count = 0;
@@ -3619,7 +3639,10 @@ for (;;)
     /* Not anchored. Advance to a unique first code unit if there is one. In
     8-bit mode, the use of memchr() gives a big speed up, even though we have
     to call it twice in caseless mode, in order to find the earliest occurrence
-    of the character in either of its cases. */
+    of the character in either of its cases. If a call to memchr() that
+    searches the rest of the subject fails to find one case, remember that in
+    order not to keep on repeating the search. This can make a huge difference
+    when the strings are very long and only one case is present. */
 
     else
       {
@@ -3633,11 +3656,29 @@ for (;;)
                 (smc = UCHAR21TEST(start_match)) != first_cu &&
                   smc != first_cu2)
             start_match++;
+
 #else  /* 8-bit code units */
-          PCRE2_SPTR pp1 =
-            memchr(start_match, first_cu, end_subject-start_match);
-          PCRE2_SPTR pp2 =
-            memchr(start_match, first_cu2, end_subject-start_match);
+          PCRE2_SPTR pp1 = NULL;
+          PCRE2_SPTR pp2 = NULL;
+          PCRE2_SIZE cu2size = end_subject - start_match;
+
+          if (!memchr_not_found_first_cu)
+            {
+            pp1 = memchr(start_match, first_cu, end_subject - start_match);
+            if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
+              else cu2size = pp1 - start_match;
+            }
+
+          /* If pp1 is not NULL, we have arranged to search only as far as pp1,
+          to see if the other case is earlier, so we can set "not found" only
+          when both searches have returned NULL. */
+
+          if (!memchr_not_found_first_cu2)
+            {
+            pp2 = memchr(start_match, first_cu2, cu2size);
+            memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
+            }
+
           if (pp1 == NULL)
             start_match = (pp2 == NULL)? end_subject : pp2;
           else
@@ -3653,7 +3694,7 @@ for (;;)
           while (start_match < end_subject && UCHAR21TEST(start_match) !=
                  first_cu)
             start_match++;
-#else
+#else  /* 8-bit code units */
           start_match = memchr(start_match, first_cu, end_subject - start_match);
           if (start_match == NULL) start_match = end_subject;
 #endif
@@ -3740,6 +3781,8 @@ for (;;)
 
     if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0)
       {
+      PCRE2_SPTR p;
+
       /* The minimum matching length is a lower bound; no actual string of that
       length may actually match the pattern. Although the value is, strictly,
       in characters, we treat it as code units to avoid spending too much time
@@ -3753,37 +3796,63 @@ for (;;)
       point. This optimization can save a huge amount of backtracking in
       patterns with nested unlimited repeats that aren't going to match.
       Writing separate code for cased/caseless versions makes it go faster, as
-      does using an autoincrement and backing off on a match.
+      does using an autoincrement and backing off on a match. As in the case of
+      the first code unit, using memchr() in the 8-bit library gives a big
+      speed up. Unlike the first_cu check above, we do not need to call
+      memchr() twice in the caseless case because we only need to check for the
+      presence of the character in either case, not find the first occurrence.
+
+      The search can be skipped if the code unit was found later than the
+      current starting point in a previous iteration of the bumpalong loop.
 
       HOWEVER: when the subject string is very, very long, searching to its end
       can take a long time, and give bad performance on quite ordinary
       patterns. This showed up when somebody was matching something like
       /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
-      sufficiently long. */
+      sufficiently long, but it's worth searching a lot more for unanchored
+      patterns. */
 
-      if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
+      p = start_match + (has_first_cu? 1:0);
+      if (has_req_cu && p > req_cu_ptr)
         {
-        PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
-
-        /* We don't need to repeat the search if we haven't yet reached the
-        place we found it at last time. */
+        PCRE2_SIZE check_length = end_subject - start_match;
 
-        if (p > req_cu_ptr)
+        if (check_length < REQ_CU_MAX ||
+              (!anchored && check_length < REQ_CU_MAX * 1000))
           {
-          if (req_cu != req_cu2)
+          if (req_cu != req_cu2)  /* Caseless */
             {
+#if PCRE2_CODE_UNIT_WIDTH != 8
             while (p < end_subject)
               {
               uint32_t pp = UCHAR21INCTEST(p);
               if (pp == req_cu || pp == req_cu2) { p--; break; }
               }
+#else  /* 8-bit code units */
+            PCRE2_SPTR pp = p;
+            p = memchr(pp, req_cu, end_subject - pp);
+            if (p == NULL)
+              {
+              p = memchr(pp, req_cu2, end_subject - pp);
+              if (p == NULL) p = end_subject;
+              }
+#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
             }
+
+          /* The caseful case */
+
           else
             {
+#if PCRE2_CODE_UNIT_WIDTH != 8
             while (p < end_subject)
               {
               if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
               }
+
+#else  /* 8-bit code units */
+            p = memchr(p, req_cu, end_subject - p);
+            if (p == NULL) p = end_subject;
+#endif
             }
 
           /* If we can't find the required code unit, break the matching loop,

+ 3 - 0
thirdparty/pcre2/src/pcre2_error.c

@@ -184,6 +184,8 @@ static const unsigned char compile_error_texts[] =
   /* 95 */
   "(*alpha_assertion) not recognized\0"
   "script runs require Unicode support, which this version of PCRE2 does not have\0"
+  "too many capturing groups (maximum 65535)\0"
+  "atomic assertion expected after (?( or (?(?C)\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */
@@ -268,6 +270,7 @@ static const unsigned char match_error_texts[] =
   "invalid syntax\0"
   /* 65 */
   "internal error - duplicate substitution match\0"
+  "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0"
   ;
 
 

+ 60 - 45
thirdparty/pcre2/src/pcre2_internal.h

@@ -517,6 +517,7 @@ bytes in a code unit in that mode. */
 #define PCRE2_HASBKPORX     0x00100000  /* contains \P, \p, or \X */
 #define PCRE2_DUPCAPUSED    0x00200000  /* contains (?| */
 #define PCRE2_HASBKC        0x00400000  /* contains \C */
+#define PCRE2_HASACCEPT     0x00800000  /* contains (*ACCEPT) */
 
 #define PCRE2_MODE_MASK     (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
 
@@ -535,13 +536,14 @@ enum { PCRE2_MATCHEDBY_INTERPRETER,     /* pcre2_match() */
 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
 
 /* The maximum remaining length of subject we are prepared to search for a
-req_unit match. In 8-bit mode, memchr() is used and is much faster than the
-search loop that has to be used in 16-bit and 32-bit modes. */
+req_unit match from an anchored pattern. In 8-bit mode, memchr() is used and is
+much faster than the search loop that has to be used in 16-bit and 32-bit
+modes. */
 
 #if PCRE2_CODE_UNIT_WIDTH == 8
-#define REQ_CU_MAX 2000
+#define REQ_CU_MAX       5000
 #else
-#define REQ_CU_MAX 1000
+#define REQ_CU_MAX       2000
 #endif
 
 /* Offsets for the bitmap tables in the cbits set of tables. Each table
@@ -881,12 +883,16 @@ a positive value. */
 #define STRING_atomic0               "atomic\0"
 #define STRING_pla0                  "pla\0"
 #define STRING_plb0                  "plb\0"
+#define STRING_napla0                "napla\0"
+#define STRING_naplb0                "naplb\0"
 #define STRING_nla0                  "nla\0"
 #define STRING_nlb0                  "nlb\0"
 #define STRING_sr0                   "sr\0"
 #define STRING_asr0                  "asr\0"
 #define STRING_positive_lookahead0   "positive_lookahead\0"
 #define STRING_positive_lookbehind0  "positive_lookbehind\0"
+#define STRING_non_atomic_positive_lookahead0   "non_atomic_positive_lookahead\0"
+#define STRING_non_atomic_positive_lookbehind0  "non_atomic_positive_lookbehind\0"
 #define STRING_negative_lookahead0   "negative_lookahead\0"
 #define STRING_negative_lookbehind0  "negative_lookbehind\0"
 #define STRING_script_run0           "script_run\0"
@@ -1171,12 +1177,16 @@ only. */
 #define STRING_atomic0               STR_a STR_t STR_o STR_m STR_i STR_c "\0"
 #define STRING_pla0                  STR_p STR_l STR_a "\0"
 #define STRING_plb0                  STR_p STR_l STR_b "\0"
+#define STRING_napla0                STR_n STR_a STR_p STR_l STR_a "\0"
+#define STRING_naplb0                STR_n STR_a STR_p STR_l STR_b "\0"
 #define STRING_nla0                  STR_n STR_l STR_a "\0"
 #define STRING_nlb0                  STR_n STR_l STR_b "\0"
 #define STRING_sr0                   STR_s STR_r "\0"
 #define STRING_asr0                  STR_a STR_s STR_r "\0"
 #define STRING_positive_lookahead0   STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
 #define STRING_positive_lookbehind0  STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_non_atomic_positive_lookahead0   STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_non_atomic_positive_lookbehind0  STR_n STR_o STR_n STR_UNDERSCORE STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
 #define STRING_negative_lookahead0   STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
 #define STRING_negative_lookbehind0  STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
 #define STRING_script_run0           STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
@@ -1301,7 +1311,7 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in
 order to the list of escapes immediately above. Furthermore, values up to
 OP_DOLLM must not be changed without adjusting the table called autoposstab in
-pcre2_auto_possess.c
+pcre2_auto_possess.c.
 
 Whenever this list is updated, the two macro definitions that follow must be
 updated to match. The possessification table called "opcode_possessify" in
@@ -1499,80 +1509,81 @@ enum {
   OP_KETRMIN,        /* 123 order. They are for groups the repeat for ever. */
   OP_KETRPOS,        /* 124 Possessive unlimited repeat. */
 
-  /* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
-  asserts must remain in order. */
+  /* The assertions must come before BRA, CBRA, ONCE, and COND. */
 
   OP_REVERSE,        /* 125 Move pointer back - used in lookbehind assertions */
   OP_ASSERT,         /* 126 Positive lookahead */
   OP_ASSERT_NOT,     /* 127 Negative lookahead */
   OP_ASSERTBACK,     /* 128 Positive lookbehind */
   OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
+  OP_ASSERT_NA,      /* 130 Positive non-atomic lookahead */
+  OP_ASSERTBACK_NA,  /* 131 Positive non-atomic lookbehind */
 
   /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
   immediately after the assertions, with ONCE first, as there's a test for >=
   ONCE for a subpattern that isn't an assertion. The POS versions must
   immediately follow the non-POS versions in each case. */
 
-  OP_ONCE,           /* 130 Atomic group, contains captures */
-  OP_SCRIPT_RUN,     /* 131 Non-capture, but check characters' scripts */
-  OP_BRA,            /* 132 Start of non-capturing bracket */
-  OP_BRAPOS,         /* 133 Ditto, with unlimited, possessive repeat */
-  OP_CBRA,           /* 134 Start of capturing bracket */
-  OP_CBRAPOS,        /* 135 Ditto, with unlimited, possessive repeat */
-  OP_COND,           /* 136 Conditional group */
+  OP_ONCE,           /* 132 Atomic group, contains captures */
+  OP_SCRIPT_RUN,     /* 133 Non-capture, but check characters' scripts */
+  OP_BRA,            /* 134 Start of non-capturing bracket */
+  OP_BRAPOS,         /* 135 Ditto, with unlimited, possessive repeat */
+  OP_CBRA,           /* 136 Start of capturing bracket */
+  OP_CBRAPOS,        /* 137 Ditto, with unlimited, possessive repeat */
+  OP_COND,           /* 138 Conditional group */
 
   /* These five must follow the previous five, in the same order. There's a
   check for >= SBRA to distinguish the two sets. */
 
-  OP_SBRA,           /* 137 Start of non-capturing bracket, check empty  */
-  OP_SBRAPOS,        /* 138 Ditto, with unlimited, possessive repeat */
-  OP_SCBRA,          /* 139 Start of capturing bracket, check empty */
-  OP_SCBRAPOS,       /* 140 Ditto, with unlimited, possessive repeat */
-  OP_SCOND,          /* 141 Conditional group, check empty */
+  OP_SBRA,           /* 139 Start of non-capturing bracket, check empty  */
+  OP_SBRAPOS,        /* 149 Ditto, with unlimited, possessive repeat */
+  OP_SCBRA,          /* 141 Start of capturing bracket, check empty */
+  OP_SCBRAPOS,       /* 142 Ditto, with unlimited, possessive repeat */
+  OP_SCOND,          /* 143 Conditional group, check empty */
 
   /* The next two pairs must (respectively) be kept together. */
 
-  OP_CREF,           /* 142 Used to hold a capture number as condition */
-  OP_DNCREF,         /* 143 Used to point to duplicate names as a condition */
-  OP_RREF,           /* 144 Used to hold a recursion number as condition */
-  OP_DNRREF,         /* 145 Used to point to duplicate names as a condition */
-  OP_FALSE,          /* 146 Always false (used by DEFINE and VERSION) */
-  OP_TRUE,           /* 147 Always true (used by VERSION) */
+  OP_CREF,           /* 144 Used to hold a capture number as condition */
+  OP_DNCREF,         /* 145 Used to point to duplicate names as a condition */
+  OP_RREF,           /* 146 Used to hold a recursion number as condition */
+  OP_DNRREF,         /* 147 Used to point to duplicate names as a condition */
+  OP_FALSE,          /* 148 Always false (used by DEFINE and VERSION) */
+  OP_TRUE,           /* 149 Always true (used by VERSION) */
 
-  OP_BRAZERO,        /* 148 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 149 order. */
-  OP_BRAPOSZERO,     /* 150 */
+  OP_BRAZERO,        /* 150 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 151 order. */
+  OP_BRAPOSZERO,     /* 152 */
 
   /* These are backtracking control verbs */
 
-  OP_MARK,           /* 151 always has an argument */
-  OP_PRUNE,          /* 152 */
-  OP_PRUNE_ARG,      /* 153 same, but with argument */
-  OP_SKIP,           /* 154 */
-  OP_SKIP_ARG,       /* 155 same, but with argument */
-  OP_THEN,           /* 156 */
-  OP_THEN_ARG,       /* 157 same, but with argument */
-  OP_COMMIT,         /* 158 */
-  OP_COMMIT_ARG,     /* 159 same, but with argument */
+  OP_MARK,           /* 153 always has an argument */
+  OP_PRUNE,          /* 154 */
+  OP_PRUNE_ARG,      /* 155 same, but with argument */
+  OP_SKIP,           /* 156 */
+  OP_SKIP_ARG,       /* 157 same, but with argument */
+  OP_THEN,           /* 158 */
+  OP_THEN_ARG,       /* 159 same, but with argument */
+  OP_COMMIT,         /* 160 */
+  OP_COMMIT_ARG,     /* 161 same, but with argument */
 
   /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
   argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
   without the need for a special opcode. */
 
-  OP_FAIL,           /* 160 */
-  OP_ACCEPT,         /* 161 */
-  OP_ASSERT_ACCEPT,  /* 162 Used inside assertions */
-  OP_CLOSE,          /* 163 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 162 */
+  OP_ACCEPT,         /* 163 */
+  OP_ASSERT_ACCEPT,  /* 164 Used inside assertions */
+  OP_CLOSE,          /* 165 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO,       /* 164 */
+  OP_SKIPZERO,       /* 166 */
 
   /* This is used to identify a DEFINE group during compilation so that it can
   be checked for having only one branch. It is changed to OP_FALSE before
   compilation finishes. */
 
-  OP_DEFINE,         /* 165 */
+  OP_DEFINE,         /* 167 */
 
   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1585,7 +1596,7 @@ enum {
 /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
 definitions that follow must also be updated to match. There are also tables
 called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
-pcre2_dfa_exec.c that must be updated. */
+pcre2_dfa_match.c that must be updated. */
 
 
 /* This macro defines textual names for all the opcodes. These are used only
@@ -1618,7 +1629,9 @@ some cases doesn't actually use these names at all). */
   "class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi",  \
   "Recurse", "Callout", "CalloutStr",                             \
   "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
-  "Reverse", "Assert", "Assert not", "AssertB", "AssertB not",    \
+  "Reverse", "Assert", "Assert not",                              \
+  "Assert back", "Assert back not",                               \
+  "Non-atomic assert", "Non-atomic assert back",                  \
   "Once",                                                         \
   "Script run",                                                   \
   "Bra", "BraPos", "CBra", "CBraPos",                             \
@@ -1703,6 +1716,8 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* Assert not                             */ \
   1+LINK_SIZE,                   /* Assert behind                          */ \
   1+LINK_SIZE,                   /* Assert behind not                      */ \
+  1+LINK_SIZE,                   /* NA Assert                              */ \
+  1+LINK_SIZE,                   /* NA Assert behind                       */ \
   1+LINK_SIZE,                   /* ONCE                                   */ \
   1+LINK_SIZE,                   /* SCRIPT_RUN                             */ \
   1+LINK_SIZE,                   /* BRA                                    */ \

+ 6 - 4
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -205,19 +205,19 @@ whether its argument, which is assumed to be one code unit, is less than 256.
 The CHMAX_255 macro does not assume one code unit. The maximum length of a MARK
 name must fit in one code unit; currently it is set to 255 or 65535. The
 TABLE_GET macro is used to access elements of tables containing exactly 256
-items. When code points can be greater than 255, a check is needed before
-accessing these tables. */
+items. Its argument is a code unit. When code points can be greater than 255, a
+check is needed before accessing these tables. */
 
 #if PCRE2_CODE_UNIT_WIDTH == 8
 #define MAX_255(c) TRUE
 #define MAX_MARK ((1u << 8) - 1)
+#define TABLE_GET(c, table, default) ((table)[c])
 #ifdef SUPPORT_UNICODE
 #define SUPPORT_WIDE_CHARS
 #define CHMAX_255(c) ((c) <= 255u)
 #else
 #define CHMAX_255(c) TRUE
 #endif  /* SUPPORT_UNICODE */
-#define TABLE_GET(c, table, default) ((table)[c])
 
 #else  /* Code units are 16 or 32 bits */
 #define CHMAX_255(c) ((c) <= 255u)
@@ -228,7 +228,6 @@ accessing these tables. */
 #endif
 
 
-
 /* ----------------- Character-handling macros ----------------- */
 
 /* There is a proposed future special "UTF-21" mode, in which only the lowest
@@ -854,6 +853,7 @@ typedef struct match_block {
   uint32_t match_call_count;      /* Number of times a new frame is created */
   BOOL hitend;                    /* Hit the end of the subject at some point */
   BOOL hasthen;                   /* Pattern contains (*THEN) */
+  BOOL allowemptypartial;         /* Allow empty hard partial */
   const uint8_t *lcc;             /* Points to lower casing table */
   const uint8_t *fcc;             /* Points to case-flipping table */
   const uint8_t *ctypes;          /* Points to table of type maps */
@@ -866,6 +866,7 @@ typedef struct match_block {
   PCRE2_SPTR name_table;          /* Table of group names */
   PCRE2_SPTR start_code;          /* For use when recursing */
   PCRE2_SPTR start_subject;       /* Start of the subject string */
+  PCRE2_SPTR check_subject;       /* Where UTF-checked from */
   PCRE2_SPTR end_subject;         /* End of the subject string */
   PCRE2_SPTR end_match_ptr;       /* Subject position at end match */
   PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
@@ -908,6 +909,7 @@ typedef struct dfa_match_block {
   uint32_t poptions;              /* Pattern options */
   uint32_t nltype;                /* Newline type */
   uint32_t nllen;                 /* Newline string length */
+  BOOL allowemptypartial;         /* Allow empty hard partial */
   PCRE2_UCHAR nl[4];              /* Newline string when fixed */
   uint16_t bsr_convention;        /* \R interpretation */
   pcre2_callout_block *cb;        /* Points to a callout block */

File diff suppressed because it is too large
+ 264 - 824
thirdparty/pcre2/src/pcre2_jit_compile.c


+ 0 - 1
thirdparty/pcre2/src/pcre2_jit_match.c

@@ -74,7 +74,6 @@ Arguments:
   options         option bits
   match_data      points to a match_data block
   mcontext        points to a match context
-  jit_stack       points to a JIT stack
 
 Returns:          > 0 => success; value is the number of ovector pairs filled
                   = 0 => success, but ovector is not big enough

+ 321 - 0
thirdparty/pcre2/src/pcre2_jit_neon_inc.h

@@ -0,0 +1,321 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+            This module by Zoltan Herczeg and Sebastian Pop
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+# if defined(FFCS)
+#  if defined(FF_UTF)
+#   define FF_FUN ffcs_utf
+#  else
+#   define FF_FUN ffcs
+#  endif
+
+# elif defined(FFCS_2)
+#  if defined(FF_UTF)
+#   define FF_FUN ffcs_2_utf
+#  else
+#   define FF_FUN ffcs_2
+#  endif
+
+# elif defined(FFCS_MASK)
+#  if defined(FF_UTF)
+#   define FF_FUN ffcs_mask_utf
+#  else
+#   define FF_FUN ffcs_mask
+#  endif
+
+# elif defined(FFCPS_0)
+#  if defined (FF_UTF)
+#   define FF_FUN ffcps_0_utf
+#  else
+#   define FF_FUN ffcps_0
+#  endif
+
+# elif defined (FFCPS_1)
+#  if defined (FF_UTF)
+#   define FF_FUN ffcps_1_utf
+#  else
+#   define FF_FUN ffcps_1
+#  endif
+
+# elif defined (FFCPS_DEFAULT)
+#  if defined (FF_UTF)
+#   define FF_FUN ffcps_default_utf
+#  else
+#   define FF_FUN ffcps_default
+#  endif
+# endif
+
+static sljit_u8* SLJIT_FUNC FF_FUN(sljit_u8 *str_end, sljit_u8 *str_ptr, sljit_uw offs1, sljit_uw offs2, sljit_uw chars)
+#undef FF_FUN
+{
+quad_word qw;
+int_char ic;
+ic.x = chars;
+
+#if defined(FFCS)
+sljit_u8 c1 = ic.c.c1;
+vect_t vc1 = VDUPQ(c1);
+
+#elif defined(FFCS_2)
+sljit_u8 c1 = ic.c.c1;
+vect_t vc1 = VDUPQ(c1);
+sljit_u8 c2 = ic.c.c2;
+vect_t vc2 = VDUPQ(c2);
+
+#elif defined(FFCS_MASK)
+sljit_u8 c1 = ic.c.c1;
+vect_t vc1 = VDUPQ(c1);
+sljit_u8 mask = ic.c.c2;
+vect_t vmask = VDUPQ(mask);
+#endif
+
+#if defined(FFCPS)
+compare_type compare1_type = compare_match1;
+compare_type compare2_type = compare_match1;
+vect_t cmp1a, cmp1b, cmp2a, cmp2b;
+const sljit_u32 diff = IN_UCHARS(offs1 - offs2);
+PCRE2_UCHAR char1a = ic.c.c1;
+PCRE2_UCHAR char2a = ic.c.c3;
+
+# ifdef FFCPS_CHAR1A2A
+cmp1a = VDUPQ(char1a);
+cmp2a = VDUPQ(char2a);
+# else
+PCRE2_UCHAR char1b = ic.c.c2;
+PCRE2_UCHAR char2b = ic.c.c4;
+if (char1a == char1b)
+  cmp1a = VDUPQ(char1a);
+else
+  {
+  sljit_u32 bit1 = char1a ^ char1b;
+  if (is_powerof2(bit1))
+    {
+    compare1_type = compare_match1i;
+    cmp1a = VDUPQ(char1a | bit1);
+    cmp1b = VDUPQ(bit1);
+    }
+  else
+    {
+    compare1_type = compare_match2;
+    cmp1a = VDUPQ(char1a);
+    cmp1b = VDUPQ(char1b);
+    }
+  }
+
+if (char2a == char2b)
+  cmp2a = VDUPQ(char2a);
+else
+  {
+  sljit_u32 bit2 = char2a ^ char2b;
+  if (is_powerof2(bit2))
+    {
+    compare2_type = compare_match1i;
+    cmp2a = VDUPQ(char2a | bit2);
+    cmp2b = VDUPQ(bit2);
+    }
+  else
+    {
+    compare2_type = compare_match2;
+    cmp2a = VDUPQ(char2a);
+    cmp2b = VDUPQ(char2b);
+    }
+  }
+# endif
+
+str_ptr += IN_UCHARS(offs1);
+#endif
+
+#if PCRE2_CODE_UNIT_WIDTH != 8
+vect_t char_mask = VDUPQ(0xff);
+#endif
+
+#if defined(FF_UTF)
+restart:;
+#endif
+
+#if defined(FFCPS)
+sljit_u8 *p1 = str_ptr - diff;
+#endif
+sljit_s32 align_offset = ((uint64_t)str_ptr & 0xf);
+str_ptr = (sljit_u8 *) ((uint64_t)str_ptr & ~0xf);
+vect_t data = VLD1Q(str_ptr);
+#if PCRE2_CODE_UNIT_WIDTH != 8
+data = VANDQ(data, char_mask);
+#endif
+ 
+#if defined(FFCS)
+vect_t eq = VCEQQ(data, vc1);
+
+#elif defined(FFCS_2)
+vect_t eq1 = VCEQQ(data, vc1);
+vect_t eq2 = VCEQQ(data, vc2);
+vect_t eq = VORRQ(eq1, eq2);    
+
+#elif defined(FFCS_MASK)
+vect_t eq = VORRQ(data, vmask);
+eq = VCEQQ(eq, vc1);
+
+#elif defined(FFCPS)
+# if defined(FFCPS_DIFF1)
+vect_t prev_data = data;
+# endif
+
+vect_t data2;
+if (p1 < str_ptr)
+  {
+  data2 = VLD1Q(str_ptr - diff);
+#if PCRE2_CODE_UNIT_WIDTH != 8
+  data2 = VANDQ(data2, char_mask);
+#endif
+  }
+else
+  data2 = shift_left_n_lanes(data, offs1 - offs2);
+ 
+data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
+data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
+vect_t eq = VANDQ(data, data2);
+#endif
+
+VST1Q(qw.mem, eq);
+/* Ignore matches before the first STR_PTR. */
+if (align_offset < 8)
+  {
+  qw.dw[0] >>= align_offset * 8;
+  if (qw.dw[0])
+    {
+    str_ptr += align_offset + __builtin_ctzll(qw.dw[0]) / 8;
+    goto match;
+    }
+  if (qw.dw[1])
+    {
+    str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
+    goto match;
+    }
+  }
+else
+  {
+  qw.dw[1] >>= (align_offset - 8) * 8;
+  if (qw.dw[1])
+    {
+    str_ptr += align_offset + __builtin_ctzll(qw.dw[1]) / 8;
+    goto match;
+    }
+  }
+str_ptr += 16;
+
+while (str_ptr < str_end)
+  {
+  vect_t orig_data = VLD1Q(str_ptr);
+#if PCRE2_CODE_UNIT_WIDTH != 8
+  orig_data = VANDQ(orig_data, char_mask);
+#endif
+  data = orig_data;
+
+#if defined(FFCS)
+  eq = VCEQQ(data, vc1);
+
+#elif defined(FFCS_2)
+  eq1 = VCEQQ(data, vc1);
+  eq2 = VCEQQ(data, vc2);
+  eq = VORRQ(eq1, eq2);    
+
+#elif defined(FFCS_MASK)
+  eq = VORRQ(data, vmask);
+  eq = VCEQQ(eq, vc1);
+#endif
+
+#if defined(FFCPS)
+# if defined (FFCPS_DIFF1)
+  data2 = VEXTQ(prev_data, data, VECTOR_FACTOR - 1);
+# else
+  data2 = VLD1Q(str_ptr - diff);
+#  if PCRE2_CODE_UNIT_WIDTH != 8
+  data2 = VANDQ(data2, char_mask);
+#  endif
+# endif
+
+# ifdef FFCPS_CHAR1A2A
+  data = VCEQQ(data, cmp1a);
+  data2 = VCEQQ(data2, cmp2a);
+# else
+  data = fast_forward_char_pair_compare(compare1_type, data, cmp1a, cmp1b);
+  data2 = fast_forward_char_pair_compare(compare2_type, data2, cmp2a, cmp2b);
+# endif
+
+  eq = VANDQ(data, data2);
+#endif
+
+  VST1Q(qw.mem, eq);
+  if (qw.dw[0])
+    str_ptr += __builtin_ctzll(qw.dw[0]) / 8;
+  else if (qw.dw[1])
+    str_ptr += 8 + __builtin_ctzll(qw.dw[1]) / 8;
+  else {
+    str_ptr += 16;
+#if defined (FFCPS_DIFF1)
+    prev_data = orig_data;
+#endif
+    continue;
+  }
+
+match:;
+  if (str_ptr >= str_end)
+    /* Failed match. */
+    return NULL;
+
+#if defined(FF_UTF)
+  if (utf_continue(str_ptr + IN_UCHARS(-offs1)))
+    {
+    /* Not a match. */
+    str_ptr += IN_UCHARS(1);
+    goto restart;
+    }
+#endif
+
+  /* Match. */
+#if defined (FFCPS)
+  str_ptr -= IN_UCHARS(offs1);
+#endif
+  return str_ptr;
+  }
+
+/* Failed match. */
+return NULL;
+}

+ 993 - 0
thirdparty/pcre2/src/pcre2_jit_simd_inc.h

@@ -0,0 +1,993 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+                    This module by Zoltan Herczeg
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
+return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
+return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00);
+#else
+#error "Unknown code width"
+#endif
+}
+#endif
+
+static sljit_s32 character_to_int32(PCRE2_UCHAR chr)
+{
+sljit_u32 value = chr;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+#define SSE2_COMPARE_TYPE_INDEX 0
+return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+#define SSE2_COMPARE_TYPE_INDEX 1
+return (sljit_s32)((value << 16) | value);
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+#define SSE2_COMPARE_TYPE_INDEX 2
+return (sljit_s32)(value);
+#else
+#error "Unsupported unit width"
+#endif
+}
+
+static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset)
+{
+sljit_u8 instruction[5];
+
+SLJIT_ASSERT(dst_xmm_reg < 8);
+SLJIT_ASSERT(src_general_reg < 8);
+
+/* MOVDQA xmm1, xmm2/m128 */
+instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3;
+instruction[1] = 0x0f;
+instruction[2] = 0x6f;
+
+if (offset == 0)
+  {
+  instruction[3] = (dst_xmm_reg << 3) | src_general_reg;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+  }
+
+instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg;
+instruction[4] = (sljit_u8)offset;
+sljit_emit_op_custom(compiler, instruction, 5);
+}
+
+typedef enum {
+    sse2_compare_match1,
+    sse2_compare_match1i,
+    sse2_compare_match2,
+} sse2_compare_type;
+
+static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type,
+  int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind)
+{
+sljit_u8 instruction[4];
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+
+SLJIT_ASSERT(step >= 0 && step <= 3);
+
+if (compare_type != sse2_compare_match2)
+  {
+  if (step == 0)
+    {
+    if (compare_type == sse2_compare_match1i)
+      {
+      /* POR xmm1, xmm2/m128 */
+      /* instruction[0] = 0x66; */
+      /* instruction[1] = 0x0f; */
+      instruction[2] = 0xeb;
+      instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind;
+      sljit_emit_op_custom(compiler, instruction, 4);
+      }
+    return;
+    }
+
+  if (step != 2)
+    return;
+
+  /* PCMPEQB/W/D xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+  }
+
+switch (step)
+  {
+  case 0:
+  /* MOVDQA xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x6f;
+  instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+
+  case 1:
+  /* PCMPEQB/W/D xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+  instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+
+  case 2:
+  /* PCMPEQB/W/D xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
+  instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+
+  case 3:
+  /* POR xmm1, xmm2/m128 */
+  /* instruction[0] = 0x66; */
+  /* instruction[1] = 0x0f; */
+  instruction[2] = 0xeb;
+  instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  return;
+  }
+}
+
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+
+static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
+{
+DEFINE_COMPILER;
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *quit;
+struct sljit_jump *partial_quit[2];
+sse2_compare_type compare_type = sse2_compare_match1;
+sljit_u8 instruction[8];
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data_ind = 0;
+sljit_s32 tmp_ind = 1;
+sljit_s32 cmp1_ind = 2;
+sljit_s32 cmp2_ind = 3;
+sljit_u32 bit = 0;
+int i;
+
+SLJIT_UNUSED_ARG(offset);
+
+if (char1 != char2)
+  {
+  bit = char1 ^ char2;
+  compare_type = sse2_compare_match1i;
+
+  if (!is_powerof2(bit))
+    {
+    bit = 0;
+    compare_type = sse2_compare_match2;
+    }
+  }
+
+partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+  add_jump(compiler, &common->failed_match, partial_quit[0]);
+
+/* First part (unaligned start) */
+
+OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
+
+SLJIT_ASSERT(tmp1_reg_ind < 8);
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1 != char2)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
+
+  /* MOVD xmm, r/m32 */
+  instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+
+OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x70;
+instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind;
+instruction[4] = 0;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1 != char2)
+  {
+  /* PSHUFD xmm1, xmm2/m128, imm8 */
+  instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind;
+  sljit_emit_op_custom(compiler, instruction, 5);
+  }
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+  fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+/* Second part (aligned) */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+
+partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+  add_jump(compiler, &common->failed_match, partial_quit[1]);
+
+load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0);
+for (i = 0; i < 4; i++)
+  fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(quit);
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+
+if (common->mode != PCRE2_JIT_COMPLETE)
+  {
+  JUMPHERE(partial_quit[0]);
+  JUMPHERE(partial_quit[1]);
+  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+  CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+  }
+else
+  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf && offset > 0)
+  {
+  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
+
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
+
+  quit = jump_if_utf_char_start(compiler, TMP1);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+  OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+  JUMPTO(SLJIT_JUMP, restart);
+
+  JUMPHERE(quit);
+  }
+#endif
+}
+
+#ifndef _WIN64
+
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return 15;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return 7;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+return 3;
+#else
+#error "Unsupported unit width"
+#endif
+}
+
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
+
+static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
+  PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
+{
+DEFINE_COMPILER;
+sse2_compare_type compare1_type = sse2_compare_match1;
+sse2_compare_type compare2_type = sse2_compare_match1;
+sljit_u32 bit1 = 0;
+sljit_u32 bit2 = 0;
+sljit_u32 diff = IN_UCHARS(offs1 - offs2);
+sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1);
+sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2);
+sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR);
+sljit_s32 data1_ind = 0;
+sljit_s32 data2_ind = 1;
+sljit_s32 tmp1_ind = 2;
+sljit_s32 tmp2_ind = 3;
+sljit_s32 cmp1a_ind = 4;
+sljit_s32 cmp1b_ind = 5;
+sljit_s32 cmp2a_ind = 6;
+sljit_s32 cmp2b_ind = 7;
+struct sljit_label *start;
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+struct sljit_label *restart;
+#endif
+struct sljit_jump *jump[2];
+sljit_u8 instruction[8];
+int i;
+
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
+SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
+SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1);
+
+/* Initialize. */
+if (common->match_end_ptr != 0)
+  {
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
+  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
+
+  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+  CMOV(SLJIT_LESS, STR_END, TMP1, 0);
+  }
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+/* MOVD xmm, r/m32 */
+instruction[0] = 0x66;
+instruction[1] = 0x0f;
+instruction[2] = 0x6e;
+
+if (char1a == char1b)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
+else
+  {
+  bit1 = char1a ^ char1b;
+  if (is_powerof2(bit1))
+    {
+    compare1_type = sse2_compare_match1i;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1));
+    }
+  else
+    {
+    compare1_type = sse2_compare_match2;
+    bit1 = 0;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b));
+    }
+  }
+
+instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char1a != char1b)
+  {
+  instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+
+if (char2a == char2b)
+  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
+else
+  {
+  bit2 = char2a ^ char2b;
+  if (is_powerof2(bit2))
+    {
+    compare2_type = sse2_compare_match1i;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2));
+    }
+  else
+    {
+    compare2_type = sse2_compare_match2;
+    bit2 = 0;
+    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a));
+    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b));
+    }
+  }
+
+instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+if (char2a != char2b)
+  {
+  instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind;
+  sljit_emit_op_custom(compiler, instruction, 4);
+  }
+
+/* PSHUFD xmm1, xmm2/m128, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x70;
+instruction[4] = 0;
+
+instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char1a != char1b)
+  {
+  instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind;
+  sljit_emit_op_custom(compiler, instruction, 5);
+  }
+
+instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+if (char2a != char2b)
+  {
+  instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind;
+  sljit_emit_op_custom(compiler, instruction, 5);
+  }
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+restart = LABEL();
+#endif
+
+OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff);
+OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
+OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
+
+load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
+
+jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0);
+
+load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
+jump[1] = JUMP(SLJIT_JUMP);
+
+JUMPHERE(jump[0]);
+
+/* MOVDQA xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x6f;
+instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PSLLDQ xmm1, imm8 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0x73;
+instruction[3] = 0xc0 | (7 << 3) | data2_ind;
+instruction[4] = diff;
+sljit_emit_op_custom(compiler, instruction, 5);
+
+JUMPHERE(jump[1]);
+
+OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf);
+
+for (i = 0; i < 4; i++)
+  {
+  fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind);
+  fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
+  }
+
+/* PAND xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xdb;
+instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* Ignore matches before the first STR_PTR. */
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0);
+
+jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0);
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
+
+/* Main loop. */
+start = LABEL();
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0);
+load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff);
+
+for (i = 0; i < 4; i++)
+  {
+  fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind);
+  fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
+  }
+
+/* PAND xmm1, xmm2/m128 */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xdb;
+instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+/* PMOVMSKB reg, xmm */
+/* instruction[0] = 0x66; */
+/* instruction[1] = 0x0f; */
+instruction[2] = 0xd7;
+instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0;
+sljit_emit_op_custom(compiler, instruction, 4);
+
+CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
+
+JUMPHERE(jump[0]);
+
+/* BSF r32, r/m32 */
+instruction[0] = 0x0f;
+instruction[1] = 0xbc;
+instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind;
+sljit_emit_op_custom(compiler, instruction, 3);
+
+OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+
+add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
+
+if (common->match_end_ptr != 0)
+  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+if (common->utf)
+  {
+  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1));
+
+  jump[0] = jump_if_utf_char_start(compiler, TMP1);
+
+  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+  CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart);
+
+  add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP));
+
+  JUMPHERE(jump[0]);
+  }
+#endif
+
+OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1));
+
+if (common->match_end_ptr != 0)
+  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
+}
+
+#endif /* !_WIN64 */
+
+#undef SSE2_COMPARE_TYPE_INDEX
+
+#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */
+
+#if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__))
+
+#include <arm_neon.h>
+
+typedef union {
+  unsigned int x;
+  struct { unsigned char c1, c2, c3, c4; } c;
+} int_char;
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+static SLJIT_INLINE int utf_continue(sljit_u8 *s)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return (*s & 0xc0) == 0x80;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return (*s & 0xfc00) == 0xdc00;
+#else
+#error "Unknown code width"
+#endif
+}
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+# define VECTOR_FACTOR 16
+# define vect_t uint8x16_t
+# define VLD1Q(X) vld1q_u8((sljit_u8 *)(X))
+# define VCEQQ vceqq_u8
+# define VORRQ vorrq_u8
+# define VST1Q vst1q_u8
+# define VDUPQ vdupq_n_u8
+# define VEXTQ vextq_u8
+# define VANDQ vandq_u8
+typedef union {
+       uint8_t mem[16];
+       uint64_t dw[2];
+} quad_word;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+# define VECTOR_FACTOR 8
+# define vect_t uint16x8_t
+# define VLD1Q(X) vld1q_u16((sljit_u16 *)(X))
+# define VCEQQ vceqq_u16
+# define VORRQ vorrq_u16
+# define VST1Q vst1q_u16
+# define VDUPQ vdupq_n_u16
+# define VEXTQ vextq_u16
+# define VANDQ vandq_u16
+typedef union {
+       uint16_t mem[8];
+       uint64_t dw[2];
+} quad_word;
+#else
+# define VECTOR_FACTOR 4
+# define vect_t uint32x4_t
+# define VLD1Q(X) vld1q_u32((sljit_u32 *)(X))
+# define VCEQQ vceqq_u32
+# define VORRQ vorrq_u32
+# define VST1Q vst1q_u32
+# define VDUPQ vdupq_n_u32
+# define VEXTQ vextq_u32
+# define VANDQ vandq_u32
+typedef union {
+       uint32_t mem[4];
+       uint64_t dw[2];
+} quad_word;
+#endif
+
+#define FFCS
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCS
+
+#define FFCS_2
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCS_2
+
+#define FFCS_MASK
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCS_MASK
+
+#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1
+
+static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
+{
+DEFINE_COMPILER;
+int_char ic;
+struct sljit_jump *partial_quit;
+/* Save temporary registers. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0);
+
+/* Prepare function arguments */
+OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
+OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0);
+OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset);
+
+if (char1 == char2)
+  {
+    ic.c.c1 = char1;
+    ic.c.c2 = char2;
+    OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+  if (common->utf && offset > 0)
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
+  else
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
+#else
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                   SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
+#endif
+  }
+else
+  {
+  PCRE2_UCHAR mask = char1 ^ char2;
+  if (is_powerof2(mask))
+    {
+    ic.c.c1 = char1 | mask;
+    ic.c.c2 = mask;
+    OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+    if (common->utf && offset > 0)
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
+    else
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
+#else
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
+#endif
+    }
+  else
+    {
+      ic.c.c1 = char1;
+      ic.c.c2 = char2;
+      OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x);
+
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+    if (common->utf && offset > 0)
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
+    else
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
+#else
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
+#endif
+    }
+  }
+/* Restore registers. */
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
+
+/* Check return value. */
+partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+if (common->mode == PCRE2_JIT_COMPLETE)
+  add_jump(compiler, &common->failed_match, partial_quit);
+
+/* Fast forward STR_PTR to the result of memchr. */
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
+
+if (common->mode != PCRE2_JIT_COMPLETE)
+  JUMPHERE(partial_quit);
+}
+
+typedef enum {
+  compare_match1,
+  compare_match1i,
+  compare_match2,
+} compare_type;
+
+static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2)
+{
+if (ctype == compare_match2)
+  {
+  vect_t tmp = dst;
+  dst = VCEQQ(dst, cmp1);
+  tmp = VCEQQ(tmp, cmp2);
+  dst = VORRQ(dst, tmp);
+  return dst;
+  }
+
+if (ctype == compare_match1i)
+  dst = VORRQ(dst, cmp2);
+dst = VCEQQ(dst, cmp1);
+return dst;
+}
+
+static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void)
+{
+#if PCRE2_CODE_UNIT_WIDTH == 8
+return 15;
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+return 7;
+#elif PCRE2_CODE_UNIT_WIDTH == 32
+return 3;
+#else
+#error "Unsupported unit width"
+#endif
+}
+
+/* ARM doesn't have a shift left across lanes. */
+static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n)
+{
+vect_t zero = VDUPQ(0);
+SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR);
+/* VEXTQ takes an immediate as last argument. */
+#define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X);
+switch (n)
+  {
+  C(1); C(2); C(3);
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  C(4); C(5); C(6); C(7);
+# if PCRE2_CODE_UNIT_WIDTH != 16
+  C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15);
+# endif
+#endif
+  default:
+    /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't
+       happen. The return is still here for compilers to not warn. */
+    return a;
+  }
+}
+
+#define FFCPS
+#define FFCPS_DIFF1
+#define FFCPS_CHAR1A2A
+
+#define FFCPS_0
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCPS_0
+
+#undef FFCPS_CHAR1A2A
+
+#define FFCPS_1
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCPS_1
+
+#undef FFCPS_DIFF1
+
+#define FFCPS_DEFAULT
+#include "pcre2_jit_neon_inc.h"
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+# define FF_UTF
+# include "pcre2_jit_neon_inc.h"
+# undef FF_UTF
+#endif
+#undef FFCPS
+
+#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1
+
+static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1,
+  PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b)
+{
+DEFINE_COMPILER;
+sljit_u32 diff = IN_UCHARS(offs1 - offs2);
+struct sljit_jump *partial_quit;
+int_char ic;
+SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2);
+SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset()));
+SLJIT_ASSERT(compiler->scratches == 5);
+
+/* Save temporary register STR_PTR. */
+OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
+
+/* Prepare arguments for the function call. */
+if (common->match_end_ptr == 0)
+   OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0);
+else
+  {
+  OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
+  OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
+
+  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
+  CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
+  }
+
+OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); 
+OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1);
+OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2);
+ic.c.c1 = char1a;
+ic.c.c2 = char1b;
+ic.c.c3 = char2a;
+ic.c.c4 = char2b;
+OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x);
+
+if (diff == 1) {
+  if (char1a == char1b && char2a == char2b) {
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+    if (common->utf)
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
+    else
+#endif
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
+  } else {
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+    if (common->utf)
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
+    else
+#endif
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
+  }
+} else {
+#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+  if (common->utf)
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
+  else
+#endif
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
+                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
+}
+
+/* Restore STR_PTR register. */
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+
+/* Check return value. */
+partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+add_jump(compiler, &common->failed_match, partial_quit);
+
+/* Fast forward STR_PTR to the result of memchr. */
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
+
+JUMPHERE(partial_quit);
+}
+
+#endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */

+ 11 - 0
thirdparty/pcre2/src/pcre2_maketables.c

@@ -147,4 +147,15 @@ for (i = 0; i < 256; i++)
 return yield;
 }
 
+#ifndef DFTABLES
+PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
+pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables)
+{
+  if (gcontext)
+    gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data);
+  else
+    free((void *)tables);
+}
+#endif
+
 /* End of pcre2_maketables.c */

+ 456 - 147
thirdparty/pcre2/src/pcre2_match.c

@@ -415,8 +415,7 @@ if (caseless)
   else
 #endif
 
-    /* Not in UTF mode */
-
+  /* Not in UTF mode */
     {
     for (; length > 0; length--)
       {
@@ -491,27 +490,32 @@ heap is used for a larger vector.
 *************************************************/
 
 /* These macros pack up tests that are used for partial matching several times
-in the code. We set the "hit end" flag if the pointer is at the end of the
-subject and also past the earliest inspected character (i.e. something has been
-matched, even if not part of the actual matched string). For hard partial
-matching, we then return immediately. The second one is used when we already
-know we are past the end of the subject. */
+in the code. The second one is used when we already know we are past the end of
+the subject. We set the "hit end" flag if the pointer is at the end of the
+subject and either (a) the pointer is past the earliest inspected character
+(i.e. something has been matched, even if not part of the actual matched
+string), or (b) the pattern contains a lookbehind. These are the conditions for
+which adding more characters may allow the current match to continue.
+
+For hard partial matching, we immediately return a partial match. Otherwise,
+carrying on means that a complete match on the current subject will be sought.
+A partial match is returned only if no complete match can be found. */
 
 #define CHECK_PARTIAL()\
-  if (mb->partial != 0 && Feptr >= mb->end_subject && \
-      Feptr > mb->start_used_ptr) \
+  if (Feptr >= mb->end_subject) \
     { \
-    mb->hitend = TRUE; \
-    if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
+    SCHECK_PARTIAL(); \
     }
 
 #define SCHECK_PARTIAL()\
-  if (mb->partial != 0 && Feptr > mb->start_used_ptr) \
+  if (mb->partial != 0 && \
+      (Feptr > mb->start_used_ptr || mb->allowemptypartial)) \
     { \
     mb->hitend = TRUE; \
     if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; \
     }
 
+
 /* These macros are used to implement backtracking. They simulate a recursive
 call to the match() function by means of a local vector of frames which
 remember the backtracking points. */
@@ -5127,6 +5131,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     case OP_ASSERT:
     case OP_ASSERTBACK:
+    case OP_ASSERT_NA:
+    case OP_ASSERTBACK_NA:
     Lframe_type = GF_NOCAPTURE | Fop;
     for (;;)
       {
@@ -5412,7 +5418,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       {
       while (number-- > 0)
         {
-        if (Feptr <= mb->start_subject) RRETURN(MATCH_NOMATCH);
+        if (Feptr <= mb->check_subject) RRETURN(MATCH_NOMATCH);
         Feptr--;
         BACKCHAR(Feptr);
         }
@@ -5420,7 +5426,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     else
 #endif
 
-    /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
+    /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
 
       {
       if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
@@ -5472,15 +5478,16 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
       /* If we are at the end of an assertion that is a condition, return a
       match, discarding any intermediate backtracking points. Copy back the
-      captures into the frame before N so that they are set on return. Doing
-      this for all assertions, both positive and negative, seems to match what
-      Perl does. */
+      mark setting and the captures into the frame before N so that they are
+      set on return. Doing this for all assertions, both positive and negative,
+      seems to match what Perl does. */
 
       if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
         {
         memcpy((char *)P + offsetof(heapframe, ovector), Fovector,
           Foffset_top * sizeof(PCRE2_SIZE));
         P->offset_top = Foffset_top;
+        P->mark = Fmark;
         Fback_frame = (char *)F - (char *)P;
         RRETURN(MATCH_MATCH);
         }
@@ -5496,10 +5503,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       case OP_SCOND:
       break;
 
-      /* Positive assertions are like OP_ONCE, except that in addition the
+      /* Non-atomic positive assertions are like OP_BRA, except that the
       subject pointer must be put back to where it was at the start of the
       assertion. */
 
+      case OP_ASSERT_NA:
+      case OP_ASSERTBACK_NA:
+      if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
+      Feptr = P->eptr;
+      break;
+
+      /* Atomic positive assertions are like OP_ONCE, except that in addition
+      the subject pointer must be put back to where it was at the start of the
+      assertion. */
+
       case OP_ASSERT:
       case OP_ASSERTBACK:
       if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
@@ -5640,7 +5657,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     case OP_EOD:
     if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
-    SCHECK_PARTIAL();
+    if (mb->partial != 0)
+      {
+      mb->hitend = TRUE;
+      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
+      }
     Fecode++;
     break;
 
@@ -5665,7 +5686,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     /* Either at end of string or \n before end. */
 
-    SCHECK_PARTIAL();
+    if (mb->partial != 0)
+      {
+      mb->hitend = TRUE;
+      if (mb->partial > 1) return PCRE2_ERROR_PARTIAL;
+      }
     Fecode++;
     break;
 
@@ -5743,7 +5768,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     case OP_NOT_WORD_BOUNDARY:
     case OP_WORD_BOUNDARY:
-    if (Feptr == mb->start_subject) prev_is_word = FALSE; else
+    if (Feptr == mb->check_subject) prev_is_word = FALSE; else
       {
       PCRE2_SPTR lastptr = Feptr - 1;
 #ifdef SUPPORT_UNICODE
@@ -5946,6 +5971,7 @@ in rrc. */
 #define LBL(val) case val: goto L_RM##val;
 
 RETURN_SWITCH:
+if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
 if (Frdepth == 0) return rrc;                     /* Exit from the top level */
 F = (heapframe *)((char *)F - Fback_frame);       /* Backtrack */
 mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
@@ -5999,9 +6025,9 @@ Arguments:
 
 Returns:          > 0 => success; value is the number of ovector pairs filled
                   = 0 => success, but ovector is not big enough
-                   -1 => failed to match (PCRE2_ERROR_NOMATCH)
-                   -2 => partial match (PCRE2_ERROR_PARTIAL)
-                 < -2 => some kind of unexpected problem
+                  = -1 => failed to match (PCRE2_ERROR_NOMATCH)
+                  = -2 => partial match (PCRE2_ERROR_PARTIAL)
+                  < -2 => some kind of unexpected problem
 */
 
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@@ -6014,7 +6040,6 @@ int was_zero_terminated = 0;
 const uint8_t *start_bits = NULL;
 const pcre2_real_code *re = (const pcre2_real_code *)code;
 
-
 BOOL anchored;
 BOOL firstline;
 BOOL has_first_cu = FALSE;
@@ -6022,6 +6047,11 @@ BOOL has_req_cu = FALSE;
 BOOL startline;
 BOOL utf;
 
+#if PCRE2_CODE_UNIT_WIDTH == 8
+BOOL memchr_not_found_first_cu = FALSE;
+BOOL memchr_not_found_first_cu2 = FALSE;
+#endif
+
 PCRE2_UCHAR first_cu = 0;
 PCRE2_UCHAR first_cu2 = 0;
 PCRE2_UCHAR req_cu = 0;
@@ -6029,10 +6059,23 @@ PCRE2_UCHAR req_cu2 = 0;
 
 PCRE2_SPTR bumpalong_limit;
 PCRE2_SPTR end_subject;
+PCRE2_SPTR true_end_subject;
 PCRE2_SPTR start_match = subject + start_offset;
 PCRE2_SPTR req_cu_ptr = start_match - 1;
-PCRE2_SPTR start_partial = NULL;
-PCRE2_SPTR match_partial = NULL;
+PCRE2_SPTR start_partial;
+PCRE2_SPTR match_partial;
+
+#ifdef SUPPORT_JIT
+BOOL use_jit;
+#endif
+
+#ifdef SUPPORT_UNICODE
+BOOL allow_invalid;
+uint32_t fragment_options = 0;
+#ifdef SUPPORT_JIT
+BOOL jit_checked_utf = FALSE;
+#endif
+#endif
 
 PCRE2_SIZE frame_size;
 
@@ -6059,7 +6102,7 @@ if (length == PCRE2_ZERO_TERMINATED)
   length = PRIV(strlen)(subject);
   was_zero_terminated = 1;
   }
-end_subject = subject + length;
+true_end_subject = end_subject = subject + length;
 
 /* Plausibility checks */
 
@@ -6095,12 +6138,24 @@ options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
 #undef FF
 #undef OO
 
-/* These two settings are used in the code for checking a UTF string that
-follows immediately afterwards. Other values in the mb block are used only
-during interpretive processing, not when the JIT support is in use, so they are
-set up later. */
+/* If the pattern was successfully studied with JIT support, we will run the
+JIT executable instead of the rest of this function. Most options must be set
+at compile time for the JIT code to be usable. */
+
+#ifdef SUPPORT_JIT
+use_jit = (re->executable_jit != NULL &&
+          (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0);
+#endif
+
+/* Initialize UTF parameters. */
 
 utf = (re->overall_options & PCRE2_UTF) != 0;
+#ifdef SUPPORT_UNICODE
+allow_invalid = (re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0;
+#endif
+
+/* Convert the partial matching flags into an integer. */
+
 mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
               ((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
 
@@ -6111,88 +6166,107 @@ if (mb->partial != 0 &&
    ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
   return PCRE2_ERROR_BADOPTION;
 
-/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
-we must also check that a starting offset does not point into the middle of a
-multiunit character. We check only the portion of the subject that is going to
-be inspected during matching - from the offset minus the maximum back reference
-to the given length. This saves time when a small part of a large subject is
-being matched by the use of a starting offset. Note that the maximum lookbehind
-is a number of characters, not code units. */
+/* It is an error to set an offset limit without setting the flag at compile
+time. */
 
-#ifdef SUPPORT_UNICODE
-if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
+if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
+     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
+  return PCRE2_ERROR_BADOFFSETLIMIT;
+
+/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
+free the memory that was obtained. Set the field to NULL for no match cases. */
+
+if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
   {
-  PCRE2_SPTR check_subject = start_match;  /* start_match includes offset */
+  match_data->memctl.free((void *)match_data->subject,
+    match_data->memctl.memory_data);
+  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
+  }
+match_data->subject = NULL;
+
+/* Zero the error offset in case the first code unit is invalid UTF. */
+
+match_data->startchar = 0;
+
+
+/* ============================= JIT matching ============================== */
+
+/* Prepare for JIT matching. Check a UTF string for validity unless no check is
+requested or invalid UTF can be handled. We check only the portion of the
+subject that might be be inspected during matching - from the offset minus the
+maximum lookbehind to the given length. This saves time when a small part of a
+large subject is being matched by the use of a starting offset. Note that the
+maximum lookbehind is a number of characters, not code units. */
 
-  if (start_offset > 0)
+#ifdef SUPPORT_JIT
+if (use_jit)
+  {
+#ifdef SUPPORT_UNICODE
+  if (utf && (options & PCRE2_NO_UTF_CHECK) == 0 && !allow_invalid)
     {
 #if PCRE2_CODE_UNIT_WIDTH != 32
     unsigned int i;
+#endif
+
+    /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
+    character start. */
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
     if (start_match < end_subject && NOT_FIRSTCU(*start_match))
-      return PCRE2_ERROR_BADUTFOFFSET;
-    for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
       {
-      check_subject--;
-      while (check_subject > subject &&
+      if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
+#else
+      return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
+#endif
+      }
+#endif  /* WIDTH != 32 */
+
+    /* Move back by the maximum lookbehind, just in case it happens at the very
+    start of matching. */
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+    for (i = re->max_lookbehind; i > 0 && start_match > subject; i--)
+      {
+      start_match--;
+      while (start_match > subject &&
 #if PCRE2_CODE_UNIT_WIDTH == 8
-      (*check_subject & 0xc0) == 0x80)
+      (*start_match & 0xc0) == 0x80)
 #else  /* 16-bit */
-      (*check_subject & 0xfc00) == 0xdc00)
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
-        check_subject--;
+      (*start_match & 0xfc00) == 0xdc00)
+#endif
+        start_match--;
       }
-#else
+#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
     /* In the 32-bit library, one code unit equals one character. However,
     we cannot just subtract the lookbehind and then compare pointers, because
     a very large lookbehind could create an invalid pointer. */
 
     if (start_offset >= re->max_lookbehind)
-      check_subject -= re->max_lookbehind;
+      start_match -= re->max_lookbehind;
     else
-      check_subject = subject;
+      start_match = subject;
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
-    }
 
-  /* Validate the relevant portion of the subject. After an error, adjust the
-  offset to be an absolute offset in the whole string. */
+    /* Validate the relevant portion of the subject. Adjust the offset of an
+    invalid code point to be an absolute offset in the whole string. */
 
-  match_data->rc = PRIV(valid_utf)(check_subject,
-    length - (check_subject - subject), &(match_data->startchar));
-  if (match_data->rc != 0)
-    {
-    match_data->startchar += check_subject - subject;
-    return match_data->rc;
+    match_data->rc = PRIV(valid_utf)(start_match,
+      length - (start_match - subject), &(match_data->startchar));
+    if (match_data->rc != 0)
+      {
+      match_data->startchar += start_match - subject;
+      return match_data->rc;
+      }
+    jit_checked_utf = TRUE;
     }
-  }
 #endif  /* SUPPORT_UNICODE */
 
-/* It is an error to set an offset limit without setting the flag at compile
-time. */
-
-if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
-     (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
-  return PCRE2_ERROR_BADOFFSETLIMIT;
-
-/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
-free the memory that was obtained. Set the field to NULL for no match cases. */
+  /* If JIT returns BADOPTION, which means that the selected complete or
+  partial matching mode was not compiled, fall through to the interpreter. */
 
-if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
-  {
-  match_data->memctl.free((void *)match_data->subject,
-    match_data->memctl.memory_data);
-  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
-  }
-match_data->subject = NULL;
-
-/* If the pattern was successfully studied with JIT support, run the JIT
-executable instead of the rest of this function. Most options must be set at
-compile time for the JIT code to be usable. Fallback to the normal code path if
-an unsupported option is set or if JIT returns BADOPTION (which means that the
-selected normal or partial matching mode was not compiled). */
-
-#ifdef SUPPORT_JIT
-if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
-  {
   rc = pcre2_jit_match(code, subject, length, start_offset, options,
     match_data, mcontext);
   if (rc != PCRE2_ERROR_JIT_BADOPTION)
@@ -6209,10 +6283,152 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
     return rc;
     }
   }
+#endif  /* SUPPORT_JIT */
+
+/* ========================= End of JIT matching ========================== */
+
+
+/* Proceed with non-JIT matching. The default is to allow lookbehinds to the
+start of the subject. A UTF check when there is a non-zero offset may change
+this. */
+
+mb->check_subject = subject;
+
+/* If a UTF subject string was not checked for validity in the JIT code above,
+check it here, and handle support for invalid UTF strings. The check above
+happens only when invalid UTF is not supported and PCRE2_NO_CHECK_UTF is unset.
+If we get here in those circumstances, it means the subject string is valid,
+but for some reason JIT matching was not successful. There is no need to check
+the subject again.
+
+We check only the portion of the subject that might be be inspected during
+matching - from the offset minus the maximum lookbehind to the given length.
+This saves time when a small part of a large subject is being matched by the
+use of a starting offset. Note that the maximum lookbehind is a number of
+characters, not code units.
+
+Note also that support for invalid UTF forces a check, overriding the setting
+of PCRE2_NO_CHECK_UTF. */
+
+#ifdef SUPPORT_UNICODE
+if (utf &&
+#ifdef SUPPORT_JIT
+    !jit_checked_utf &&
+#endif
+    ((options & PCRE2_NO_UTF_CHECK) == 0 || allow_invalid))
+  {
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  BOOL skipped_bad_start = FALSE;
+#endif
+
+  /* For 8-bit and 16-bit UTF, check that the first code unit is a valid
+  character start. If we are handling invalid UTF, just skip over such code
+  units. Otherwise, give an appropriate error. */
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  if (allow_invalid)
+    {
+    while (start_match < end_subject && NOT_FIRSTCU(*start_match))
+      {
+      start_match++;
+      skipped_bad_start = TRUE;
+      }
+    }
+  else if (start_match < end_subject && NOT_FIRSTCU(*start_match))
+    {
+    if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET;
+#if PCRE2_CODE_UNIT_WIDTH == 8
+    return PCRE2_ERROR_UTF8_ERR20;  /* Isolated 0x80 byte */
+#else
+    return PCRE2_ERROR_UTF16_ERR3;  /* Isolated low surrogate */
+#endif
+    }
+#endif  /* WIDTH != 32 */
+
+  /* The mb->check_subject field points to the start of UTF checking;
+  lookbehinds can go back no further than this. */
+
+  mb->check_subject = start_match;
+
+  /* Move back by the maximum lookbehind, just in case it happens at the very
+  start of matching, but don't do this if we skipped bad 8-bit or 16-bit code
+  units above. */
+
+#if PCRE2_CODE_UNIT_WIDTH != 32
+  if (!skipped_bad_start)
+    {
+    unsigned int i;
+    for (i = re->max_lookbehind; i > 0 && mb->check_subject > subject; i--)
+      {
+      mb->check_subject--;
+      while (mb->check_subject > subject &&
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      (*mb->check_subject & 0xc0) == 0x80)
+#else  /* 16-bit */
+      (*mb->check_subject & 0xfc00) == 0xdc00)
+#endif
+        mb->check_subject--;
+      }
+    }
+#else  /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+  /* In the 32-bit library, one code unit equals one character. However,
+  we cannot just subtract the lookbehind and then compare pointers, because
+  a very large lookbehind could create an invalid pointer. */
+
+  if (start_offset >= re->max_lookbehind)
+    mb->check_subject -= re->max_lookbehind;
+  else
+    mb->check_subject = subject;
+#endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
+
+  /* Validate the relevant portion of the subject. There's a loop in case we
+  encounter bad UTF in the characters preceding start_match which we are
+  scanning because of a lookbehind. */
+
+  for (;;)
+    {
+    match_data->rc = PRIV(valid_utf)(mb->check_subject,
+      length - (mb->check_subject - subject), &(match_data->startchar));
+
+    if (match_data->rc == 0) break;   /* Valid UTF string */
+
+    /* Invalid UTF string. Adjust the offset to be an absolute offset in the
+    whole string. If we are handling invalid UTF strings, set end_subject to
+    stop before the bad code unit, and set the options to "not end of line".
+    Otherwise return the error. */
+
+    match_data->startchar += mb->check_subject - subject;
+    if (!allow_invalid || match_data->rc > 0) return match_data->rc;
+    end_subject = subject + match_data->startchar;
+
+    /* If the end precedes start_match, it means there is invalid UTF in the
+    extra code units we reversed over because of a lookbehind. Advance past the
+    first bad code unit, and then skip invalid character starting code units in
+    8-bit and 16-bit modes, and try again. */
+
+    if (end_subject < start_match)
+      {
+      mb->check_subject = end_subject + 1;
+#if PCRE2_CODE_UNIT_WIDTH != 32
+      while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
+        mb->check_subject++;
 #endif
+      }
+
+    /* Otherwise, set the not end of line option, and do the match. */
+
+    else
+      {
+      fragment_options = PCRE2_NOTEOL;
+      break;
+      }
+    }
+  }
+#endif  /* SUPPORT_UNICODE */
 
-/* Carry on with non-JIT matching. A NULL match context means "use a default
-context", but we take the memory control functions from the pattern. */
+/* A NULL match context means "use a default context", but we take the memory
+control functions from the pattern. */
 
 if (mcontext == NULL)
   {
@@ -6224,8 +6440,8 @@ else mb->memctl = mcontext->memctl;
 anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
 firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
 startline = (re->flags & PCRE2_STARTLINE) != 0;
-bumpalong_limit =  (mcontext->offset_limit == PCRE2_UNSET)?
-  end_subject : subject + mcontext->offset_limit;
+bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
+  true_end_subject : subject + mcontext->offset_limit;
 
 /* Initialize and set up the fixed fields in the callout block, with a pointer
 in the match block. */
@@ -6236,7 +6452,8 @@ cb.subject = subject;
 cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
 cb.callout_flags = 0;
 
-/* Fill in the remaining fields in the match block. */
+/* Fill in the remaining fields in the match block, except for moptions, which
+gets set later. */
 
 mb->callout = mcontext->callout;
 mb->callout_data = mcontext->callout_data;
@@ -6245,13 +6462,11 @@ mb->start_subject = subject;
 mb->start_offset = start_offset;
 mb->end_subject = end_subject;
 mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
-
-mb->moptions = options;                 /* Match options */
-mb->poptions = re->overall_options;     /* Pattern options */
-
+mb->allowemptypartial = (re->max_lookbehind > 0) ||
+    (re->flags & PCRE2_MATCH_EMPTY) != 0;
+mb->poptions = re->overall_options;          /* Pattern options */
 mb->ignore_skip_arg = 0;
-mb->mark = mb->nomatch_mark = NULL;     /* In case never set */
-mb->hitend = FALSE;
+mb->mark = mb->nomatch_mark = NULL;          /* In case never set */
 
 /* The name table is needed for finding all the numbers associated with a
 given name, for condition testing. The code follows the name table. */
@@ -6404,6 +6619,13 @@ if ((re->flags & PCRE2_LASTSET) != 0)
 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
 the loop runs just once. */
 
+#ifdef SUPPORT_UNICODE
+FRAGMENT_RESTART:
+#endif
+
+start_partial = match_partial = NULL;
+mb->hitend = FALSE;
+
 for(;;)
   {
   PCRE2_SPTR new_start_match;
@@ -6473,7 +6695,10 @@ for(;;)
     /* Not anchored. Advance to a unique first code unit if there is one. In
     8-bit mode, the use of memchr() gives a big speed up, even though we have
     to call it twice in caseless mode, in order to find the earliest occurrence
-    of the character in either of its cases. */
+    of the character in either of its cases. If a call to memchr() that
+    searches the rest of the subject fails to find one case, remember that in
+    order not to keep on repeating the search. This can make a huge difference
+    when the strings are very long and only one case is present. */
 
     else
       {
@@ -6487,11 +6712,29 @@ for(;;)
                 (smc = UCHAR21TEST(start_match)) != first_cu &&
                   smc != first_cu2)
             start_match++;
+
 #else  /* 8-bit code units */
-          PCRE2_SPTR pp1 =
-            memchr(start_match, first_cu, end_subject-start_match);
-          PCRE2_SPTR pp2 =
-            memchr(start_match, first_cu2, end_subject-start_match);
+          PCRE2_SPTR pp1 = NULL;
+          PCRE2_SPTR pp2 = NULL;
+          PCRE2_SIZE cu2size = end_subject - start_match;
+
+          if (!memchr_not_found_first_cu)
+            {
+            pp1 = memchr(start_match, first_cu, end_subject - start_match);
+            if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
+              else cu2size = pp1 - start_match;
+            }
+
+          /* If pp1 is not NULL, we have arranged to search only as far as pp1,
+          to see if the other case is earlier, so we can set "not found" only
+          when both searches have returned NULL. */
+
+          if (!memchr_not_found_first_cu2)
+            {
+            pp2 = memchr(start_match, first_cu2, cu2size);
+            memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
+            }
+
           if (pp1 == NULL)
             start_match = (pp2 == NULL)? end_subject : pp2;
           else
@@ -6523,7 +6766,7 @@ for(;;)
         we also let the cycle run, because the matching string is legitimately
         allowed to start with the first code unit of a newline. */
 
-        if (!mb->partial && start_match >= mb->end_subject)
+        if (mb->partial == 0 && start_match >= mb->end_subject)
           {
           rc = MATCH_NOMATCH;
           break;
@@ -6582,7 +6825,7 @@ for(;;)
 
         /* See comment above in first_cu checking about the next few lines. */
 
-        if (!mb->partial && start_match >= mb->end_subject)
+        if (mb->partial == 0 && start_match >= mb->end_subject)
           {
           rc = MATCH_NOMATCH;
           break;
@@ -6596,8 +6839,10 @@ for(;;)
 
     /* The following two optimizations must be disabled for partial matching. */
 
-    if (!mb->partial)
+    if (mb->partial == 0)
       {
+      PCRE2_SPTR p;
+
       /* The minimum matching length is a lower bound; no string of that length
       may actually match the pattern. Although the value is, strictly, in
       characters, we treat it as code units to avoid spending too much time in
@@ -6621,60 +6866,57 @@ for(;;)
       memchr() twice in the caseless case because we only need to check for the
       presence of the character in either case, not find the first occurrence.
 
+      The search can be skipped if the code unit was found later than the
+      current starting point in a previous iteration of the bumpalong loop.
+
       HOWEVER: when the subject string is very, very long, searching to its end
       can take a long time, and give bad performance on quite ordinary
-      patterns. This showed up when somebody was matching something like
-      /^\d+C/ on a 32-megabyte string... so we don't do this when the string is
-      sufficiently long. */
+      anchored patterns. This showed up when somebody was matching something
+      like /^\d+C/ on a 32-megabyte string... so we don't do this when the
+      string is sufficiently long, but it's worth searching a lot more for
+      unanchored patterns. */
 
-      if (has_req_cu && end_subject - start_match < REQ_CU_MAX)
+      p = start_match + (has_first_cu? 1:0);
+      if (has_req_cu && p > req_cu_ptr)
         {
-        PCRE2_SPTR p = start_match + (has_first_cu? 1:0);
-
-        /* We don't need to repeat the search if we haven't yet reached the
-        place we found it last time round the bumpalong loop. */
+        PCRE2_SIZE check_length = end_subject - start_match;
 
-        if (p > req_cu_ptr)
+        if (check_length < REQ_CU_MAX ||
+              (!anchored && check_length < REQ_CU_MAX * 1000))
           {
-          if (p < end_subject)
+          if (req_cu != req_cu2)  /* Caseless */
             {
-            if (req_cu != req_cu2)  /* Caseless */
-              {
 #if PCRE2_CODE_UNIT_WIDTH != 8
-              do
-                {
-                uint32_t pp = UCHAR21INCTEST(p);
-                if (pp == req_cu || pp == req_cu2) { p--; break; }
-                }
-              while (p < end_subject);
-
+            while (p < end_subject)
+              {
+              uint32_t pp = UCHAR21INCTEST(p);
+              if (pp == req_cu || pp == req_cu2) { p--; break; }
+              }
 #else  /* 8-bit code units */
-              PCRE2_SPTR pp = p;
-              p = memchr(pp, req_cu, end_subject - pp);
-              if (p == NULL)
-                {
-                p = memchr(pp, req_cu2, end_subject - pp);
-                if (p == NULL) p = end_subject;
-                }
-#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
+            PCRE2_SPTR pp = p;
+            p = memchr(pp, req_cu, end_subject - pp);
+            if (p == NULL)
+              {
+              p = memchr(pp, req_cu2, end_subject - pp);
+              if (p == NULL) p = end_subject;
               }
+#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
+            }
 
-            /* The caseful case */
+          /* The caseful case */
 
-            else
-              {
+          else
+            {
 #if PCRE2_CODE_UNIT_WIDTH != 8
-              do
-                {
-                if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
-                }
-              while (p < end_subject);
+            while (p < end_subject)
+              {
+              if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
+              }
 
 #else  /* 8-bit code units */
-              p = memchr(p, req_cu, end_subject - p);
-              if (p == NULL) p = end_subject;
+            p = memchr(p, req_cu, end_subject - p);
+            if (p == NULL) p = end_subject;
 #endif
-              }
             }
 
           /* If we can't find the required code unit, break the bumpalong loop,
@@ -6714,6 +6956,11 @@ for(;;)
 
   mb->start_used_ptr = start_match;
   mb->last_used_ptr = start_match;
+#ifdef SUPPORT_UNICODE
+  mb->moptions = options | fragment_options;
+#else
+  mb->moptions = options;
+#endif
   mb->match_call_count = 0;
   mb->end_offset_top = 0;
   mb->skip_arg_count = 0;
@@ -6839,6 +7086,68 @@ for(;;)
 
 ENDLOOP:
 
+/* If end_subject != true_end_subject, it means we are handling invalid UTF,
+and have just processed a non-terminal fragment. If this resulted in no match
+or a partial match we must carry on to the next fragment (a partial match is
+returned to the caller only at the very end of the subject). A loop is used to
+avoid trying to match against empty fragments; if the pattern can match an
+empty string it would have done so already. */
+
+#ifdef SUPPORT_UNICODE
+if (utf && end_subject != true_end_subject &&
+    (rc == MATCH_NOMATCH || rc == PCRE2_ERROR_PARTIAL))
+  {
+  for (;;)
+    {
+    /* Advance past the first bad code unit, and then skip invalid character
+    starting code units in 8-bit and 16-bit modes. */
+
+    start_match = end_subject + 1;
+#if PCRE2_CODE_UNIT_WIDTH != 32
+    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
+      start_match++;
+#endif
+
+    /* If we have hit the end of the subject, there isn't another non-empty
+    fragment, so give up. */
+
+    if (start_match >= true_end_subject)
+      {
+      rc = MATCH_NOMATCH;  /* In case it was partial */
+      break;
+      }
+
+    /* Check the rest of the subject */
+
+    mb->check_subject = start_match;
+    rc = PRIV(valid_utf)(start_match, length - (start_match - subject),
+      &(match_data->startchar));
+
+    /* The rest of the subject is valid UTF. */
+
+    if (rc == 0)
+      {
+      mb->end_subject = end_subject = true_end_subject;
+      fragment_options = PCRE2_NOTBOL;
+      goto FRAGMENT_RESTART;
+      }
+
+    /* A subsequent UTF error has been found; if the next fragment is
+    non-empty, set up to process it. Otherwise, let the loop advance. */
+
+    else if (rc < 0)
+      {
+      mb->end_subject = end_subject = start_match + match_data->startchar;
+      if (end_subject > start_match)
+        {
+        fragment_options = PCRE2_NOTBOL|PCRE2_NOTEOL;
+        goto FRAGMENT_RESTART;
+        }
+      }
+    }
+  }
+#endif  /* SUPPORT_UNICODE */
+
 /* Release an enlarged frame vector that is on the heap. */
 
 if (mb->match_frames != mb->stack_frames)

+ 14 - 1
thirdparty/pcre2/src/pcre2_match_data.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -150,4 +150,17 @@ pcre2_get_startchar(pcre2_match_data *match_data)
 return match_data->startchar;
 }
 
+
+
+/*************************************************
+*         Get size of match data block           *
+*************************************************/
+
+PCRE2_EXP_DEFN PCRE2_SIZE PCRE2_CALL_CONVENTION
+pcre2_get_match_data_size(pcre2_match_data *match_data)
+{
+return offsetof(pcre2_match_data, ovector) +
+  2 * (match_data->oveccount) * sizeof(PCRE2_SIZE);
+}
+
 /* End of pcre2_match_data.c */

+ 235 - 71
thirdparty/pcre2/src/pcre2_study.c

@@ -88,11 +88,13 @@ Arguments:
   countptr        pointer to call count (to catch over complexity)
   backref_cache   vector for caching back references.
 
+This function is no longer called when the pattern contains (*ACCEPT); however,
+the old code for returning -1 is retained, just in case.
+
 Returns:   the minimum length
            -1 \C in UTF-8 mode
               or (*ACCEPT)
               or pattern too complicated
-              or back reference to duplicate name/number
            -2 internal error (missing capturing bracket)
            -3 internal error (opcode not listed)
 */
@@ -103,6 +105,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
   int *backref_cache)
 {
 int length = -1;
+int branchlength = 0;
 int prev_cap_recno = -1;
 int prev_cap_d = 0;
 int prev_recurse_recno = -1;
@@ -110,9 +113,9 @@ int prev_recurse_d = 0;
 uint32_t once_fudge = 0;
 BOOL had_recurse = FALSE;
 BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
-recurse_check this_recurse;
-int branchlength = 0;
+PCRE2_SPTR nextbranch = code + GET(code, 1);
 PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
+recurse_check this_recurse;
 
 /* If this is a "could be empty" group, its minimum length is 0. */
 
@@ -128,16 +131,20 @@ if ((*countptr)++ > 1000) return -1;
 
 /* Scan along the opcodes for this branch. If we get to the end of the branch,
 check the length against that of the other branches. If the accumulated length
-passes 16-bits, stop. */
+passes 16-bits, reset to that value and skip the rest of the branch. */
 
 for (;;)
   {
   int d, min, recno;
-  PCRE2_UCHAR *cs, *ce;
-  PCRE2_UCHAR op = *cc;
+  PCRE2_UCHAR op, *cs, *ce;
 
-  if (branchlength >= UINT16_MAX) return UINT16_MAX;
+  if (branchlength >= UINT16_MAX)
+    {
+    branchlength = UINT16_MAX;
+    cc = (PCRE2_UCHAR *)nextbranch;
+    }
 
+  op = *cc;
   switch (op)
     {
     case OP_COND:
@@ -206,7 +213,9 @@ for (;;)
     cc += 1 + LINK_SIZE;
     break;
 
-    /* ACCEPT makes things far too complicated; we have to give up. */
+    /* ACCEPT makes things far too complicated; we have to give up. In fact,
+    from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
+    used. However, leave the code in place, just in case. */
 
     case OP_ACCEPT:
     case OP_ASSERT_ACCEPT:
@@ -214,9 +223,9 @@ for (;;)
 
     /* Reached end of a branch; if it's a ket it is the end of a nested
     call. If it's ALT it is an alternation in a nested call. If it is END it's
-    the end of the outer call. All can be handled by the same code. If an
-    ACCEPT was previously encountered, use the length that was in force at that
-    time, and pass back the shortest ACCEPT length. */
+    the end of the outer call. All can be handled by the same code. If the
+    length of any branch is zero, there is no need to scan any subsequent
+    branches. */
 
     case OP_ALT:
     case OP_KET:
@@ -226,7 +235,8 @@ for (;;)
     case OP_END:
     if (length < 0 || (!had_recurse && branchlength < length))
       length = branchlength;
-    if (op != OP_ALT) return length;
+    if (op != OP_ALT || length == 0) return length;
+    nextbranch = cc + GET(cc, 1);
     cc += 1 + LINK_SIZE;
     branchlength = 0;
     had_recurse = FALSE;
@@ -238,6 +248,8 @@ for (;;)
     case OP_ASSERT_NOT:
     case OP_ASSERTBACK:
     case OP_ASSERTBACK_NOT:
+    case OP_ASSERT_NA:
+    case OP_ASSERTBACK_NA:
     do cc += GET(cc, 1); while (*cc == OP_ALT);
     /* Fall through */
 
@@ -451,15 +463,17 @@ for (;;)
 
     If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
     matches an empty string (by default it causes a matching failure), so in
-    that case we must set the minimum length to zero. */
+    that case we must set the minimum length to zero.
+
+    For backreferenes, if duplicate numbers are present in the pattern we check
+    for a reference to a duplicate. If it is, we don't know which version will
+    be referenced, so we have to set the minimum length to zero. */
 
-    /* Duplicate named pattern back reference. We cannot reliably find a length
-    for this if duplicate numbers are present in the pattern. */
+    /* Duplicate named pattern back reference. */
 
     case OP_DNREF:
     case OP_DNREFI:
-    if (dupcapused) return -1;
-    if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
+    if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
       {
       int count = GET2(cc, 1+IMM2_SIZE);
       PCRE2_UCHAR *slot =
@@ -482,28 +496,32 @@ for (;;)
           ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
           if (cs == NULL) return -2;
           do ce += GET(ce, 1); while (*ce == OP_ALT);
-          if (cc > cs && cc < ce)    /* Simple recursion */
-            {
-            dd = 0;
-            had_recurse = TRUE;
-            }
-          else
+
+          dd = 0;
+          if (!dupcapused ||
+              (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
             {
-            recurse_check *r = recurses;
-            for (r = recurses; r != NULL; r = r->prev)
-              if (r->group == cs) break;
-            if (r != NULL)           /* Mutual recursion */
+            if (cc > cs && cc < ce)    /* Simple recursion */
               {
-              dd = 0;
               had_recurse = TRUE;
               }
             else
               {
-              this_recurse.prev = recurses;
-              this_recurse.group = cs;
-              dd = find_minlength(re, cs, startcode, utf, &this_recurse,
-                countptr, backref_cache);
-              if (dd < 0) return dd;
+              recurse_check *r = recurses;
+              for (r = recurses; r != NULL; r = r->prev)
+                if (r->group == cs) break;
+              if (r != NULL)           /* Mutual recursion */
+                {
+                had_recurse = TRUE;
+                }
+              else
+                {
+                this_recurse.prev = recurses;  /* No recursion */
+                this_recurse.group = cs;
+                dd = find_minlength(re, cs, startcode, utf, &this_recurse,
+                  countptr, backref_cache);
+                if (dd < 0) return dd;
+                }
               }
             }
 
@@ -521,48 +539,51 @@ for (;;)
     cc += 1 + 2*IMM2_SIZE;
     goto REPEAT_BACK_REFERENCE;
 
-    /* Single back reference. We cannot find a length for this if duplicate
-    numbers are present in the pattern. */
+    /* Single back reference by number. References by name are converted to by
+    number when there is no duplication. */
 
     case OP_REF:
     case OP_REFI:
-    if (dupcapused) return -1;
     recno = GET2(cc, 1);
     if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
       d = backref_cache[recno];
     else
       {
       int i;
+      d = 0;
+
       if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
         {
         ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
         if (cs == NULL) return -2;
         do ce += GET(ce, 1); while (*ce == OP_ALT);
-        if (cc > cs && cc < ce)    /* Simple recursion */
-          {
-          d = 0;
-          had_recurse = TRUE;
-          }
-        else
+
+        if (!dupcapused ||
+            (PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
           {
-          recurse_check *r = recurses;
-          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
-          if (r != NULL)           /* Mutual recursion */
+          if (cc > cs && cc < ce)    /* Simple recursion */
             {
-            d = 0;
             had_recurse = TRUE;
             }
           else
             {
-            this_recurse.prev = recurses;
-            this_recurse.group = cs;
-            d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
-              backref_cache);
-            if (d < 0) return d;
+            recurse_check *r = recurses;
+            for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
+            if (r != NULL)           /* Mutual recursion */
+              {
+              had_recurse = TRUE;
+              }
+            else                     /* No recursion */
+              {
+              this_recurse.prev = recurses;
+              this_recurse.group = cs;
+              d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
+                backref_cache);
+              if (d < 0) return d;
+              }
             }
           }
         }
-      else d = 0;
 
       backref_cache[recno] = d;
       for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
@@ -888,7 +909,7 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
 
 
 /*************************************************
-*          Create bitmap of starting bytes       *
+*      Create bitmap of starting code units      *
 *************************************************/
 
 /* This function scans a compiled unanchored expression recursively and
@@ -938,6 +959,9 @@ do
     {
     int rc;
     uint8_t *classmap = NULL;
+#ifdef SUPPORT_WIDE_CHARS
+    PCRE2_UCHAR xclassflags;
+#endif
 
     switch(*tcode)
       {
@@ -1078,6 +1102,7 @@ do
       case OP_ONCE:
       case OP_SCRIPT_RUN:
       case OP_ASSERT:
+      case OP_ASSERT_NA:
       rc = set_start_bits(re, tcode, utf);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
       if (rc == SSB_DONE) try_next = FALSE; else
@@ -1120,6 +1145,7 @@ do
       case OP_ASSERT_NOT:
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
+      case OP_ASSERTBACK_NA:
       do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
       tcode += 1 + LINK_SIZE;
       break;
@@ -1444,20 +1470,59 @@ do
       negative XCLASS without a map, give up. If there are no property checks,
       there must be wide characters on the XCLASS list, because otherwise an
       XCLASS would not have been created. This means that code points >= 255
-      are always potential starters. */
+      are potential starters. In the UTF-8 case we can scan them and set bits
+      for the relevant leading bytes. */
 
 #ifdef SUPPORT_WIDE_CHARS
       case OP_XCLASS:
-      if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
-          (tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
+      xclassflags = tcode[1 + LINK_SIZE];
+      if ((xclassflags & XCL_HASPROP) != 0 ||
+          (xclassflags & (XCL_MAP|XCL_NOT)) == XCL_NOT)
         return SSB_FAIL;
 
       /* We have a positive XCLASS or a negative one without a map. Set up the
       map pointer if there is one, and fall through. */
 
-      classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
+      classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
         (uint8_t *)(tcode + 1 + LINK_SIZE + 1);
-#endif
+
+      /* In UTF-8 mode, scan the character list and set bits for leading bytes,
+      then jump to handle the map. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+      if (utf && (xclassflags & XCL_NOT) == 0)
+        {
+        PCRE2_UCHAR b, e;
+        PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
+        tcode += GET(tcode, 1);
+
+        for (;;) switch (*p++)
+          {
+          case XCL_SINGLE:
+          b = *p++;
+          while ((*p & 0xc0) == 0x80) p++;
+          re->start_bitmap[b/8] |= (1u << (b&7));
+          break;
+
+          case XCL_RANGE:
+          b = *p++;
+          while ((*p & 0xc0) == 0x80) p++;
+          e = *p++;
+          while ((*p & 0xc0) == 0x80) p++;
+          for (; b <= e; b++)
+            re->start_bitmap[b/8] |= (1u << (b&7));
+          break;
+
+          case XCL_END:
+          goto HANDLE_CLASSMAP;
+
+          default:
+          return SSB_UNKNOWN;   /* Internal error, should not occur */
+          }
+        }
+#endif  /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
+#endif  /* SUPPORT_WIDE_CHARS */
+
       /* It seems that the fall through comment must be outside the #ifdef if
       it is to avoid the gcc compiler warning. */
 
@@ -1499,6 +1564,9 @@ do
       greater than 127. In fact, there are only two possible starting bytes for
       characters in the range 128 - 255. */
 
+#if defined SUPPORT_WIDE_CHARS && PCRE2_CODE_UNIT_WIDTH == 8
+      HANDLE_CLASSMAP:
+#endif
       if (classmap != NULL)
         {
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
@@ -1569,7 +1637,9 @@ return yield;
 /* This function is handed a compiled expression that it must study to produce
 information that will speed up the matching.
 
-Argument:  points to the compiled expression
+Argument:
+  re       points to the compiled expression
+
 Returns:   0 normally; non-zero should never normally occur
            1 unknown opcode in set_start_bits
            2 missing capturing bracket
@@ -1579,7 +1649,6 @@ Returns:   0 normally; non-zero should never normally occur
 int
 PRIV(study)(pcre2_real_code *re)
 {
-int min;
 int count = 0;
 PCRE2_UCHAR *code;
 BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
@@ -1597,25 +1666,121 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
   {
   int rc = set_start_bits(re, code, utf);
   if (rc == SSB_UNKNOWN) return 1;
-  if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
+
+  /* If a list of starting code units was set up, scan the list to see if only
+  one or two were listed. Having only one listed is rare because usually a
+  single starting code unit will have been recognized and PCRE2_FIRSTSET set.
+  If two are listed, see if they are caseless versions of the same character;
+  if so we can replace the list with a caseless first code unit. This gives
+  better performance and is plausibly worth doing for patterns such as [Ww]ord
+  or (word|WORD). */
+
+  if (rc == SSB_DONE)
+    {
+    int i;
+    int a = -1;
+    int b = -1;
+    uint8_t *p = re->start_bitmap;
+    uint32_t flags = PCRE2_FIRSTMAPSET;
+
+    for (i = 0; i < 256; p++, i += 8)
+      {
+      uint8_t x = *p;
+      if (x != 0)
+        {
+        int c;
+        uint8_t y = x & (~x + 1);   /* Least significant bit */
+        if (y != x) goto DONE;      /* More than one bit set */
+
+        /* In the 16-bit and 32-bit libraries, the bit for 0xff means "0xff and
+        all wide characters", so we cannot use it here. */
+
+#if PCRE2_CODE_UNIT_WIDTH != 8
+        if (i == 248 && x == 0x80) goto DONE;
+#endif
+
+        /* Compute the character value */
+
+        c = i;
+        switch (x)
+          {
+          case 1:   break;
+          case 2:   c += 1; break;  case 4:  c += 2; break;
+          case 8:   c += 3; break;  case 16: c += 4; break;
+          case 32:  c += 5; break;  case 64: c += 6; break;
+          case 128: c += 7; break;
+          }
+
+        /* c contains the code unit value, in the range 0-255. In 8-bit UTF
+        mode, only values < 128 can be used. */
+
+#if PCRE2_CODE_UNIT_WIDTH == 8
+        if (c > 127) goto DONE;
+#endif
+        if (a < 0) a = c;   /* First one found */
+        else if (b < 0)     /* Second one found */
+          {
+          int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
+
+#ifdef SUPPORT_UNICODE
+#if PCRE2_CODE_UNIT_WIDTH == 8
+          if (utf && UCD_CASESET(c) != 0) goto DONE;   /* Multiple case set */
+#else   /* 16-bit or 32-bit */
+          if (UCD_CASESET(c) != 0) goto DONE;     /* Multiple case set */
+          if (utf && c > 127) d = UCD_OTHERCASE(c);
+#endif  /* Code width */
+#endif  /* SUPPORT_UNICODE */
+
+          if (d != a) goto DONE;   /* Not other case of a */
+          b = c;
+          }
+        else goto DONE;   /* More than two characters found */
+        }
+      }
+
+    /* Replace the start code unit bits with a first code unit, but only if it
+    is not the same as a required later code unit. This is because a search for
+    a required code unit starts after an explicit first code unit, but at a
+    code unit found from the bitmap. Patterns such as /a*a/ don't work
+    if both the start unit and required unit are the same. */
+
+    if (a >= 0 &&
+        (
+        (re->flags & PCRE2_LASTSET) == 0 ||
+          (
+          re->last_codeunit != (uint32_t)a &&
+          (b < 0 || re->last_codeunit != (uint32_t)b)
+          )
+        ))
+      {
+      re->first_codeunit = a;
+      flags = PCRE2_FIRSTSET;
+      if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
+      }
+
+    DONE:
+    re->flags |= flags;
+    }
   }
 
 /* Find the minimum length of subject string. If the pattern can match an empty
-string, the minimum length is already known. If there are more back references
-than the size of the vector we are going to cache them in, do nothing. A
-pattern that complicated will probably take a long time to analyze and may in
-any case turn out to be too complicated. Note that back reference minima are
-held as 16-bit numbers. */
-
-if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
+string, the minimum length is already known. If the pattern contains (*ACCEPT)
+all bets are off, and we don't even try to find a minimum length. If there are
+more back references than the size of the vector we are going to cache them in,
+do nothing. A pattern that complicated will probably take a long time to
+analyze and may in any case turn out to be too complicated. Note that back
+reference minima are held as 16-bit numbers. */
+
+if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
      re->top_backref <= MAX_CACHE_BACKREF)
   {
+  int min;
   int backref_cache[MAX_CACHE_BACKREF+1];
   backref_cache[0] = 0;    /* Highest one that is set */
   min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
   switch(min)
     {
-    case -1:  /* \C in UTF mode or (*ACCEPT) or over-complex regex */
+    case -1:  /* \C in UTF mode or over-complex regex */
     break;    /* Leave minlength unchanged (will be zero) */
 
     case -2:
@@ -1625,8 +1790,7 @@ if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
     return 3; /* unrecognized opcode */
 
     default:
-    if (min > UINT16_MAX) min = UINT16_MAX;
-    re->minlength = min;
+    re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
     break;
     }
   }

+ 164 - 152
thirdparty/pcre2/src/pcre2_tables.c

@@ -279,6 +279,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
 #define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
 #define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
+#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0"
 #define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
 #define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
 #define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
@@ -348,6 +349,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
 #define STRING_N0 STR_N "\0"
 #define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
+#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
 #define STRING_Nd0 STR_N STR_d "\0"
 #define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
 #define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
@@ -355,6 +357,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Nl0 STR_N STR_l "\0"
 #define STRING_No0 STR_N STR_o "\0"
 #define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
+#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
 #define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
 #define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
 #define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
@@ -419,6 +422,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
 #define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
 #define STRING_Vai0 STR_V STR_a STR_i "\0"
+#define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0"
 #define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
 #define STRING_Xan0 STR_X STR_a STR_n "\0"
 #define STRING_Xps0 STR_X STR_p STR_s "\0"
@@ -474,6 +478,7 @@ const char PRIV(utt_names)[] =
   STRING_Duployan0
   STRING_Egyptian_Hieroglyphs0
   STRING_Elbasan0
+  STRING_Elymaic0
   STRING_Ethiopic0
   STRING_Georgian0
   STRING_Glagolitic0
@@ -543,6 +548,7 @@ const char PRIV(utt_names)[] =
   STRING_Myanmar0
   STRING_N0
   STRING_Nabataean0
+  STRING_Nandinagari0
   STRING_Nd0
   STRING_New_Tai_Lue0
   STRING_Newa0
@@ -550,6 +556,7 @@ const char PRIV(utt_names)[] =
   STRING_Nl0
   STRING_No0
   STRING_Nushu0
+  STRING_Nyiakeng_Puachue_Hmong0
   STRING_Ogham0
   STRING_Ol_Chiki0
   STRING_Old_Hungarian0
@@ -614,6 +621,7 @@ const char PRIV(utt_names)[] =
   STRING_Ugaritic0
   STRING_Unknown0
   STRING_Vai0
+  STRING_Wancho0
   STRING_Warang_Citi0
   STRING_Xan0
   STRING_Xps0
@@ -669,158 +677,162 @@ const ucp_type_table PRIV(utt)[] = {
   { 299, PT_SC, ucp_Duployan },
   { 308, PT_SC, ucp_Egyptian_Hieroglyphs },
   { 329, PT_SC, ucp_Elbasan },
-  { 337, PT_SC, ucp_Ethiopic },
-  { 346, PT_SC, ucp_Georgian },
-  { 355, PT_SC, ucp_Glagolitic },
-  { 366, PT_SC, ucp_Gothic },
-  { 373, PT_SC, ucp_Grantha },
-  { 381, PT_SC, ucp_Greek },
-  { 387, PT_SC, ucp_Gujarati },
-  { 396, PT_SC, ucp_Gunjala_Gondi },
-  { 410, PT_SC, ucp_Gurmukhi },
-  { 419, PT_SC, ucp_Han },
-  { 423, PT_SC, ucp_Hangul },
-  { 430, PT_SC, ucp_Hanifi_Rohingya },
-  { 446, PT_SC, ucp_Hanunoo },
-  { 454, PT_SC, ucp_Hatran },
-  { 461, PT_SC, ucp_Hebrew },
-  { 468, PT_SC, ucp_Hiragana },
-  { 477, PT_SC, ucp_Imperial_Aramaic },
-  { 494, PT_SC, ucp_Inherited },
-  { 504, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 526, PT_SC, ucp_Inscriptional_Parthian },
-  { 549, PT_SC, ucp_Javanese },
-  { 558, PT_SC, ucp_Kaithi },
-  { 565, PT_SC, ucp_Kannada },
-  { 573, PT_SC, ucp_Katakana },
-  { 582, PT_SC, ucp_Kayah_Li },
-  { 591, PT_SC, ucp_Kharoshthi },
-  { 602, PT_SC, ucp_Khmer },
-  { 608, PT_SC, ucp_Khojki },
-  { 615, PT_SC, ucp_Khudawadi },
-  { 625, PT_GC, ucp_L },
-  { 627, PT_LAMP, 0 },
-  { 630, PT_SC, ucp_Lao },
-  { 634, PT_SC, ucp_Latin },
-  { 640, PT_SC, ucp_Lepcha },
-  { 647, PT_SC, ucp_Limbu },
-  { 653, PT_SC, ucp_Linear_A },
-  { 662, PT_SC, ucp_Linear_B },
-  { 671, PT_SC, ucp_Lisu },
-  { 676, PT_PC, ucp_Ll },
-  { 679, PT_PC, ucp_Lm },
-  { 682, PT_PC, ucp_Lo },
-  { 685, PT_PC, ucp_Lt },
-  { 688, PT_PC, ucp_Lu },
-  { 691, PT_SC, ucp_Lycian },
-  { 698, PT_SC, ucp_Lydian },
-  { 705, PT_GC, ucp_M },
-  { 707, PT_SC, ucp_Mahajani },
-  { 716, PT_SC, ucp_Makasar },
-  { 724, PT_SC, ucp_Malayalam },
-  { 734, PT_SC, ucp_Mandaic },
-  { 742, PT_SC, ucp_Manichaean },
-  { 753, PT_SC, ucp_Marchen },
-  { 761, PT_SC, ucp_Masaram_Gondi },
-  { 775, PT_PC, ucp_Mc },
-  { 778, PT_PC, ucp_Me },
-  { 781, PT_SC, ucp_Medefaidrin },
-  { 793, PT_SC, ucp_Meetei_Mayek },
-  { 806, PT_SC, ucp_Mende_Kikakui },
-  { 820, PT_SC, ucp_Meroitic_Cursive },
-  { 837, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 858, PT_SC, ucp_Miao },
-  { 863, PT_PC, ucp_Mn },
-  { 866, PT_SC, ucp_Modi },
-  { 871, PT_SC, ucp_Mongolian },
-  { 881, PT_SC, ucp_Mro },
-  { 885, PT_SC, ucp_Multani },
-  { 893, PT_SC, ucp_Myanmar },
-  { 901, PT_GC, ucp_N },
-  { 903, PT_SC, ucp_Nabataean },
-  { 913, PT_PC, ucp_Nd },
-  { 916, PT_SC, ucp_New_Tai_Lue },
-  { 928, PT_SC, ucp_Newa },
-  { 933, PT_SC, ucp_Nko },
-  { 937, PT_PC, ucp_Nl },
-  { 940, PT_PC, ucp_No },
-  { 943, PT_SC, ucp_Nushu },
-  { 949, PT_SC, ucp_Ogham },
-  { 955, PT_SC, ucp_Ol_Chiki },
-  { 964, PT_SC, ucp_Old_Hungarian },
-  { 978, PT_SC, ucp_Old_Italic },
-  { 989, PT_SC, ucp_Old_North_Arabian },
-  { 1007, PT_SC, ucp_Old_Permic },
-  { 1018, PT_SC, ucp_Old_Persian },
-  { 1030, PT_SC, ucp_Old_Sogdian },
-  { 1042, PT_SC, ucp_Old_South_Arabian },
-  { 1060, PT_SC, ucp_Old_Turkic },
-  { 1071, PT_SC, ucp_Oriya },
-  { 1077, PT_SC, ucp_Osage },
-  { 1083, PT_SC, ucp_Osmanya },
-  { 1091, PT_GC, ucp_P },
-  { 1093, PT_SC, ucp_Pahawh_Hmong },
-  { 1106, PT_SC, ucp_Palmyrene },
-  { 1116, PT_SC, ucp_Pau_Cin_Hau },
-  { 1128, PT_PC, ucp_Pc },
-  { 1131, PT_PC, ucp_Pd },
-  { 1134, PT_PC, ucp_Pe },
-  { 1137, PT_PC, ucp_Pf },
-  { 1140, PT_SC, ucp_Phags_Pa },
-  { 1149, PT_SC, ucp_Phoenician },
-  { 1160, PT_PC, ucp_Pi },
-  { 1163, PT_PC, ucp_Po },
-  { 1166, PT_PC, ucp_Ps },
-  { 1169, PT_SC, ucp_Psalter_Pahlavi },
-  { 1185, PT_SC, ucp_Rejang },
-  { 1192, PT_SC, ucp_Runic },
-  { 1198, PT_GC, ucp_S },
-  { 1200, PT_SC, ucp_Samaritan },
-  { 1210, PT_SC, ucp_Saurashtra },
-  { 1221, PT_PC, ucp_Sc },
-  { 1224, PT_SC, ucp_Sharada },
-  { 1232, PT_SC, ucp_Shavian },
-  { 1240, PT_SC, ucp_Siddham },
-  { 1248, PT_SC, ucp_SignWriting },
-  { 1260, PT_SC, ucp_Sinhala },
-  { 1268, PT_PC, ucp_Sk },
-  { 1271, PT_PC, ucp_Sm },
-  { 1274, PT_PC, ucp_So },
-  { 1277, PT_SC, ucp_Sogdian },
-  { 1285, PT_SC, ucp_Sora_Sompeng },
-  { 1298, PT_SC, ucp_Soyombo },
-  { 1306, PT_SC, ucp_Sundanese },
-  { 1316, PT_SC, ucp_Syloti_Nagri },
-  { 1329, PT_SC, ucp_Syriac },
-  { 1336, PT_SC, ucp_Tagalog },
-  { 1344, PT_SC, ucp_Tagbanwa },
-  { 1353, PT_SC, ucp_Tai_Le },
-  { 1360, PT_SC, ucp_Tai_Tham },
-  { 1369, PT_SC, ucp_Tai_Viet },
-  { 1378, PT_SC, ucp_Takri },
-  { 1384, PT_SC, ucp_Tamil },
-  { 1390, PT_SC, ucp_Tangut },
-  { 1397, PT_SC, ucp_Telugu },
-  { 1404, PT_SC, ucp_Thaana },
-  { 1411, PT_SC, ucp_Thai },
-  { 1416, PT_SC, ucp_Tibetan },
-  { 1424, PT_SC, ucp_Tifinagh },
-  { 1433, PT_SC, ucp_Tirhuta },
-  { 1441, PT_SC, ucp_Ugaritic },
-  { 1450, PT_SC, ucp_Unknown },
-  { 1458, PT_SC, ucp_Vai },
-  { 1462, PT_SC, ucp_Warang_Citi },
-  { 1474, PT_ALNUM, 0 },
-  { 1478, PT_PXSPACE, 0 },
-  { 1482, PT_SPACE, 0 },
-  { 1486, PT_UCNC, 0 },
-  { 1490, PT_WORD, 0 },
-  { 1494, PT_SC, ucp_Yi },
-  { 1497, PT_GC, ucp_Z },
-  { 1499, PT_SC, ucp_Zanabazar_Square },
-  { 1516, PT_PC, ucp_Zl },
-  { 1519, PT_PC, ucp_Zp },
-  { 1522, PT_PC, ucp_Zs }
+  { 337, PT_SC, ucp_Elymaic },
+  { 345, PT_SC, ucp_Ethiopic },
+  { 354, PT_SC, ucp_Georgian },
+  { 363, PT_SC, ucp_Glagolitic },
+  { 374, PT_SC, ucp_Gothic },
+  { 381, PT_SC, ucp_Grantha },
+  { 389, PT_SC, ucp_Greek },
+  { 395, PT_SC, ucp_Gujarati },
+  { 404, PT_SC, ucp_Gunjala_Gondi },
+  { 418, PT_SC, ucp_Gurmukhi },
+  { 427, PT_SC, ucp_Han },
+  { 431, PT_SC, ucp_Hangul },
+  { 438, PT_SC, ucp_Hanifi_Rohingya },
+  { 454, PT_SC, ucp_Hanunoo },
+  { 462, PT_SC, ucp_Hatran },
+  { 469, PT_SC, ucp_Hebrew },
+  { 476, PT_SC, ucp_Hiragana },
+  { 485, PT_SC, ucp_Imperial_Aramaic },
+  { 502, PT_SC, ucp_Inherited },
+  { 512, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 534, PT_SC, ucp_Inscriptional_Parthian },
+  { 557, PT_SC, ucp_Javanese },
+  { 566, PT_SC, ucp_Kaithi },
+  { 573, PT_SC, ucp_Kannada },
+  { 581, PT_SC, ucp_Katakana },
+  { 590, PT_SC, ucp_Kayah_Li },
+  { 599, PT_SC, ucp_Kharoshthi },
+  { 610, PT_SC, ucp_Khmer },
+  { 616, PT_SC, ucp_Khojki },
+  { 623, PT_SC, ucp_Khudawadi },
+  { 633, PT_GC, ucp_L },
+  { 635, PT_LAMP, 0 },
+  { 638, PT_SC, ucp_Lao },
+  { 642, PT_SC, ucp_Latin },
+  { 648, PT_SC, ucp_Lepcha },
+  { 655, PT_SC, ucp_Limbu },
+  { 661, PT_SC, ucp_Linear_A },
+  { 670, PT_SC, ucp_Linear_B },
+  { 679, PT_SC, ucp_Lisu },
+  { 684, PT_PC, ucp_Ll },
+  { 687, PT_PC, ucp_Lm },
+  { 690, PT_PC, ucp_Lo },
+  { 693, PT_PC, ucp_Lt },
+  { 696, PT_PC, ucp_Lu },
+  { 699, PT_SC, ucp_Lycian },
+  { 706, PT_SC, ucp_Lydian },
+  { 713, PT_GC, ucp_M },
+  { 715, PT_SC, ucp_Mahajani },
+  { 724, PT_SC, ucp_Makasar },
+  { 732, PT_SC, ucp_Malayalam },
+  { 742, PT_SC, ucp_Mandaic },
+  { 750, PT_SC, ucp_Manichaean },
+  { 761, PT_SC, ucp_Marchen },
+  { 769, PT_SC, ucp_Masaram_Gondi },
+  { 783, PT_PC, ucp_Mc },
+  { 786, PT_PC, ucp_Me },
+  { 789, PT_SC, ucp_Medefaidrin },
+  { 801, PT_SC, ucp_Meetei_Mayek },
+  { 814, PT_SC, ucp_Mende_Kikakui },
+  { 828, PT_SC, ucp_Meroitic_Cursive },
+  { 845, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 866, PT_SC, ucp_Miao },
+  { 871, PT_PC, ucp_Mn },
+  { 874, PT_SC, ucp_Modi },
+  { 879, PT_SC, ucp_Mongolian },
+  { 889, PT_SC, ucp_Mro },
+  { 893, PT_SC, ucp_Multani },
+  { 901, PT_SC, ucp_Myanmar },
+  { 909, PT_GC, ucp_N },
+  { 911, PT_SC, ucp_Nabataean },
+  { 921, PT_SC, ucp_Nandinagari },
+  { 933, PT_PC, ucp_Nd },
+  { 936, PT_SC, ucp_New_Tai_Lue },
+  { 948, PT_SC, ucp_Newa },
+  { 953, PT_SC, ucp_Nko },
+  { 957, PT_PC, ucp_Nl },
+  { 960, PT_PC, ucp_No },
+  { 963, PT_SC, ucp_Nushu },
+  { 969, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 992, PT_SC, ucp_Ogham },
+  { 998, PT_SC, ucp_Ol_Chiki },
+  { 1007, PT_SC, ucp_Old_Hungarian },
+  { 1021, PT_SC, ucp_Old_Italic },
+  { 1032, PT_SC, ucp_Old_North_Arabian },
+  { 1050, PT_SC, ucp_Old_Permic },
+  { 1061, PT_SC, ucp_Old_Persian },
+  { 1073, PT_SC, ucp_Old_Sogdian },
+  { 1085, PT_SC, ucp_Old_South_Arabian },
+  { 1103, PT_SC, ucp_Old_Turkic },
+  { 1114, PT_SC, ucp_Oriya },
+  { 1120, PT_SC, ucp_Osage },
+  { 1126, PT_SC, ucp_Osmanya },
+  { 1134, PT_GC, ucp_P },
+  { 1136, PT_SC, ucp_Pahawh_Hmong },
+  { 1149, PT_SC, ucp_Palmyrene },
+  { 1159, PT_SC, ucp_Pau_Cin_Hau },
+  { 1171, PT_PC, ucp_Pc },
+  { 1174, PT_PC, ucp_Pd },
+  { 1177, PT_PC, ucp_Pe },
+  { 1180, PT_PC, ucp_Pf },
+  { 1183, PT_SC, ucp_Phags_Pa },
+  { 1192, PT_SC, ucp_Phoenician },
+  { 1203, PT_PC, ucp_Pi },
+  { 1206, PT_PC, ucp_Po },
+  { 1209, PT_PC, ucp_Ps },
+  { 1212, PT_SC, ucp_Psalter_Pahlavi },
+  { 1228, PT_SC, ucp_Rejang },
+  { 1235, PT_SC, ucp_Runic },
+  { 1241, PT_GC, ucp_S },
+  { 1243, PT_SC, ucp_Samaritan },
+  { 1253, PT_SC, ucp_Saurashtra },
+  { 1264, PT_PC, ucp_Sc },
+  { 1267, PT_SC, ucp_Sharada },
+  { 1275, PT_SC, ucp_Shavian },
+  { 1283, PT_SC, ucp_Siddham },
+  { 1291, PT_SC, ucp_SignWriting },
+  { 1303, PT_SC, ucp_Sinhala },
+  { 1311, PT_PC, ucp_Sk },
+  { 1314, PT_PC, ucp_Sm },
+  { 1317, PT_PC, ucp_So },
+  { 1320, PT_SC, ucp_Sogdian },
+  { 1328, PT_SC, ucp_Sora_Sompeng },
+  { 1341, PT_SC, ucp_Soyombo },
+  { 1349, PT_SC, ucp_Sundanese },
+  { 1359, PT_SC, ucp_Syloti_Nagri },
+  { 1372, PT_SC, ucp_Syriac },
+  { 1379, PT_SC, ucp_Tagalog },
+  { 1387, PT_SC, ucp_Tagbanwa },
+  { 1396, PT_SC, ucp_Tai_Le },
+  { 1403, PT_SC, ucp_Tai_Tham },
+  { 1412, PT_SC, ucp_Tai_Viet },
+  { 1421, PT_SC, ucp_Takri },
+  { 1427, PT_SC, ucp_Tamil },
+  { 1433, PT_SC, ucp_Tangut },
+  { 1440, PT_SC, ucp_Telugu },
+  { 1447, PT_SC, ucp_Thaana },
+  { 1454, PT_SC, ucp_Thai },
+  { 1459, PT_SC, ucp_Tibetan },
+  { 1467, PT_SC, ucp_Tifinagh },
+  { 1476, PT_SC, ucp_Tirhuta },
+  { 1484, PT_SC, ucp_Ugaritic },
+  { 1493, PT_SC, ucp_Unknown },
+  { 1501, PT_SC, ucp_Vai },
+  { 1505, PT_SC, ucp_Wancho },
+  { 1512, PT_SC, ucp_Warang_Citi },
+  { 1524, PT_ALNUM, 0 },
+  { 1528, PT_PXSPACE, 0 },
+  { 1532, PT_SPACE, 0 },
+  { 1536, PT_UCNC, 0 },
+  { 1540, PT_WORD, 0 },
+  { 1544, PT_SC, ucp_Yi },
+  { 1547, PT_GC, ucp_Z },
+  { 1549, PT_SC, ucp_Zanabazar_Square },
+  { 1566, PT_PC, ucp_Zl },
+  { 1569, PT_PC, ucp_Zp },
+  { 1572, PT_PC, ucp_Zs }
 };
 
 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

File diff suppressed because it is too large
+ 898 - 863
thirdparty/pcre2/src/pcre2_ucd.c


+ 6 - 1
thirdparty/pcre2/src/pcre2_ucp.h

@@ -281,7 +281,12 @@ enum {
   ucp_Makasar,
   ucp_Medefaidrin,
   ucp_Old_Sogdian,
-  ucp_Sogdian
+  ucp_Sogdian,
+  /* New for Unicode 12.0.0 */
+  ucp_Elymaic,
+  ucp_Nandinagari,
+  ucp_Nyiakeng_Puachue_Hmong,
+  ucp_Wancho
 };
 
 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */

+ 4 - 0
thirdparty/pcre2/src/sljit/sljitConfigInternal.h

@@ -214,6 +214,10 @@
 #define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len)
 #endif
 
+#ifndef SLJIT_MEMMOVE
+#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len)
+#endif
+
 #ifndef SLJIT_ZEROMEM
 #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len)
 #endif

+ 24 - 4
thirdparty/pcre2/src/sljit/sljitExecAllocator.c

@@ -118,10 +118,20 @@ static SLJIT_INLINE int get_map_jit_flag()
 	if (map_jit_flag == -1) {
 		struct utsname name;
 
+		map_jit_flag = 0;
 		uname(&name);
 
 		/* Kernel version for 10.14.0 (Mojave) */
-		map_jit_flag = (atoi(name.release) >= 18) ? MAP_JIT : 0;
+		if (atoi(name.release) >= 18) {
+			/* Only use MAP_JIT if a hardened runtime is used, because MAP_JIT is incompatible with fork(). */
+			void *ptr = mmap(NULL, getpagesize(), PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+			if (ptr == MAP_FAILED) {
+				map_jit_flag = MAP_JIT;
+			} else {
+				munmap(ptr, getpagesize());
+			}
+		}
 	}
 
 	return map_jit_flag;
@@ -137,6 +147,7 @@ static SLJIT_INLINE int get_map_jit_flag()
 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
 	void *retval;
+	const int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
 
 #ifdef MAP_ANON
 
@@ -146,16 +157,25 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 	flags |= get_map_jit_flag();
 #endif
 
-	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
+	retval = mmap(NULL, size, prot, flags, -1, 0);
 #else /* !MAP_ANON */
 	if (dev_zero < 0) {
 		if (open_dev_zero())
 			return NULL;
 	}
-	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
+	retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0);
 #endif /* MAP_ANON */
 
-	return (retval != MAP_FAILED) ? retval : NULL;
+	if (retval == MAP_FAILED)
+		retval = NULL;
+	else {
+		if (mprotect(retval, size, prot) < 0) {
+			munmap(retval, size);
+			retval = NULL;
+		}
+	}
+
+	return retval;
 }
 
 static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)

+ 68 - 1
thirdparty/pcre2/src/sljit/sljitLir.c

@@ -144,6 +144,7 @@
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 #	define PATCH_MD		0x10
 #endif
+#	define TYPE_SHIFT	13
 #endif
 
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
@@ -521,6 +522,12 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw
 	}
 }
 
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label)
+{
+	if (SLJIT_LIKELY(!!put_label))
+		put_label->label = label;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags)
 {
 	SLJIT_UNUSED_ARG(compiler);
@@ -620,6 +627,30 @@ static SLJIT_INLINE sljit_s32 get_arg_count(sljit_s32 arg_types)
 	return arg_count;
 }
 
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+
+static SLJIT_INLINE sljit_uw compute_next_addr(struct sljit_label *label, struct sljit_jump *jump,
+	struct sljit_const *const_, struct sljit_put_label *put_label)
+{
+	sljit_uw result = ~(sljit_uw)0;
+
+	if (label)
+		result = label->size;
+
+	if (jump && jump->addr < result)
+		result = jump->addr;
+
+	if (const_ && const_->addr < result)
+		result = const_->addr;
+
+	if (put_label && put_label->addr < result)
+		result = put_label->addr;
+
+	return result;
+}
+
+#endif /* !SLJIT_CONFIG_X86 */
+
 static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler,
 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
@@ -687,6 +718,19 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp
 	compiler->last_const = const_;
 }
 
+static SLJIT_INLINE void set_put_label(struct sljit_put_label *put_label, struct sljit_compiler *compiler, sljit_uw offset)
+{
+	put_label->next = NULL;
+	put_label->label = NULL;
+	put_label->addr = compiler->size - offset;
+	put_label->flags = 0;
+	if (compiler->last_put_label)
+		compiler->last_put_label->next = put_label;
+	else
+		compiler->put_labels = put_label;
+	compiler->last_put_label = put_label;
+}
+
 #define ADDRESSING_DEPENDS_ON(exp, reg) \
 	(((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg))
 
@@ -1905,6 +1949,21 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil
 	CHECK_RETURN_OK;
 }
 
+static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	FUNCTION_CHECK_DST(dst, dstw, 0);
+#endif
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
+	if (SLJIT_UNLIKELY(!!compiler->verbose)) {
+		fprintf(compiler->verbose, "  put_label ");
+		sljit_verbose_param(compiler, dst, dstw);
+		fprintf(compiler->verbose, "\n");
+	}
+#endif
+	CHECK_RETURN_OK;
+}
+
 #endif /* SLJIT_ARGUMENT_CHECKS || SLJIT_VERBOSE */
 
 #define SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw) \
@@ -2581,6 +2640,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	return NULL;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	SLJIT_UNUSED_ARG(compiler);
+	SLJIT_UNUSED_ARG(dst);
+	SLJIT_UNUSED_ARG(dstw);
+	return NULL;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
 	SLJIT_UNUSED_ARG(addr);
@@ -2597,4 +2664,4 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
 	SLJIT_UNREACHABLE();
 }
 
-#endif
+#endif /* !SLJIT_CONFIG_UNSUPPORTED */

+ 19 - 3
thirdparty/pcre2/src/sljit/sljitLir.h

@@ -348,13 +348,20 @@ struct sljit_label {
 struct sljit_jump {
 	struct sljit_jump *next;
 	sljit_uw addr;
-	sljit_sw flags;
+	sljit_uw flags;
 	union {
 		sljit_uw target;
-		struct sljit_label* label;
+		struct sljit_label *label;
 	} u;
 };
 
+struct sljit_put_label {
+	struct sljit_put_label *next;
+	struct sljit_label *label;
+	sljit_uw addr;
+	sljit_uw flags;
+};
+
 struct sljit_const {
 	struct sljit_const *next;
 	sljit_uw addr;
@@ -366,10 +373,12 @@ struct sljit_compiler {
 
 	struct sljit_label *labels;
 	struct sljit_jump *jumps;
+	struct sljit_put_label *put_labels;
 	struct sljit_const *consts;
 	struct sljit_label *last_label;
 	struct sljit_jump *last_jump;
 	struct sljit_const *last_const;
+	struct sljit_put_label *last_put_label;
 
 	void *allocator_data;
 	struct sljit_memory_fragment *buf;
@@ -1314,10 +1323,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
    Flags: - (may destroy flags) */
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset);
 
-/* The constant can be changed runtime (see: sljit_set_const)
+/* Store a value that can be changed runtime (see: sljit_get_const_addr / sljit_set_const)
    Flags: - (does not modify flags) */
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value);
 
+/* Store the value of a label (see: sljit_set_put_label)
+   Flags: - (does not modify flags) */
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw);
+
+/* Set the value stored by put_label to this label. */
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_put_label(struct sljit_put_label *put_label, struct sljit_label *label);
+
 /* After the code generation the address for label, jump and const instructions
    are computed. Since these structures are freed by sljit_free_compiler, the
    addresses must be preserved by the user program elsewere. */

+ 92 - 34
thirdparty/pcre2/src/sljit/sljitNativeARM_32.c

@@ -583,8 +583,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_uw *buf_end;
 	sljit_uw size;
 	sljit_uw word_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
-	sljit_sw jump_addr;
+	sljit_sw addr;
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 	sljit_uw cpool_size;
 	sljit_uw cpool_skip_alignment;
@@ -597,6 +598,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -625,11 +627,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	word_count = 0;
+	next_addr = 1;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	if (label && label->size == 0) {
 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
@@ -669,35 +673,45 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 			else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
 #endif
 				*code_ptr = *buf_ptr++;
+				if (next_addr == word_count) {
+					SLJIT_ASSERT(!label || label->size >= word_count);
+					SLJIT_ASSERT(!jump || jump->addr >= word_count);
+					SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+					SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
 				/* These structures are ordered by their address. */
-				SLJIT_ASSERT(!label || label->size >= word_count);
-				SLJIT_ASSERT(!jump || jump->addr >= word_count);
-				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-				if (jump && jump->addr == word_count) {
+					if (jump && jump->addr == word_count) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-					if (detect_jump_type(jump, code_ptr, code, executable_offset))
-						code_ptr--;
-					jump->addr = (sljit_uw)code_ptr;
+						if (detect_jump_type(jump, code_ptr, code, executable_offset))
+							code_ptr--;
+						jump->addr = (sljit_uw)code_ptr;
 #else
-					jump->addr = (sljit_uw)(code_ptr - 2);
-					if (detect_jump_type(jump, code_ptr, code, executable_offset))
-						code_ptr -= 2;
+						jump->addr = (sljit_uw)(code_ptr - 2);
+						if (detect_jump_type(jump, code_ptr, code, executable_offset))
+							code_ptr -= 2;
 #endif
-					jump = jump->next;
-				}
-				if (label && label->size == word_count) {
-					/* code_ptr can be affected above. */
-					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
-					label->size = (code_ptr + 1) - code;
-					label = label->next;
-				}
-				if (const_ && const_->addr == word_count) {
+						jump = jump->next;
+					}
+					if (label && label->size == word_count) {
+						/* code_ptr can be affected above. */
+						label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
+						label->size = (code_ptr + 1) - code;
+						label = label->next;
+					}
+					if (const_ && const_->addr == word_count) {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-					const_->addr = (sljit_uw)code_ptr;
+						const_->addr = (sljit_uw)code_ptr;
 #else
-					const_->addr = (sljit_uw)(code_ptr - 1);
+						const_->addr = (sljit_uw)(code_ptr - 1);
 #endif
-					const_ = const_->next;
+						const_ = const_->next;
+					}
+					if (put_label && put_label->addr == word_count) {
+						SLJIT_ASSERT(put_label->label);
+						put_label->addr = (sljit_uw)code_ptr;
+						put_label = put_label->next;
+					}
+					next_addr = compute_next_addr(label, jump, const_, put_label);
 				}
 				code_ptr++;
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
@@ -725,6 +739,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
 
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
 	SLJIT_ASSERT(cpool_size == 0);
@@ -755,15 +770,15 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		buf_ptr = (sljit_uw *)jump->addr;
 
 		if (jump->flags & PATCH_B) {
-			jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
+			addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
 			if (!(jump->flags & JUMP_ADDR)) {
 				SLJIT_ASSERT(jump->flags & JUMP_LABEL);
-				SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
-				*buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
+				SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - addr) >= -0x02000000);
+				*buf_ptr |= (((sljit_sw)jump->u.label->addr - addr) >> 2) & 0x00ffffff;
 			}
 			else {
-				SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
-				*buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
+				SLJIT_ASSERT(((sljit_sw)jump->u.target - addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - addr) >= -0x02000000);
+				*buf_ptr |= (((sljit_sw)jump->u.target - addr) >> 2) & 0x00ffffff;
 			}
 		}
 		else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
@@ -813,6 +828,22 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	}
 #endif
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+		addr = put_label->label->addr;
+		buf_ptr = (sljit_uw*)put_label->addr;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+		SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
+		buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
+#else
+		SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
+		buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
+		buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
+#endif
+		put_label = put_label->next;
+	}
+
 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
 
 	compiler->error = SLJIT_ERR_COMPILED;
@@ -2639,28 +2670,55 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
 	struct sljit_const *const_;
-	sljit_s32 reg;
+	sljit_s32 dst_r;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 
+	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+
+#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
+	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), init_value));
+	compiler->patches++;
+#else
+	PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
+#endif
+
 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
 	PTR_FAIL_IF(!const_);
+	set_const(const_, compiler);
 
-	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
+	return const_;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG2;
 
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
-	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), init_value));
+	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
 	compiler->patches++;
 #else
-	PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
+	PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
 #endif
-	set_const(const_, compiler);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
 
 	if (dst & SLJIT_MEM)
 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
-	return const_;
+	return put_label;
 }
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)

+ 91 - 20
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c

@@ -161,7 +161,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
 	inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
 }
 
-static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
+static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
 {
 	sljit_sw diff;
 	sljit_uw target_addr;
@@ -196,14 +196,14 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
 		return 4;
 	}
 
-	if (target_addr <= 0xffffffffl) {
+	if (target_addr < 0x100000000l) {
 		if (jump->flags & IS_COND)
 			code_ptr[-5] -= (2 << 5);
 		code_ptr[-2] = code_ptr[0];
 		return 2;
 	}
 
-	if (target_addr <= 0xffffffffffffl) {
+	if (target_addr < 0x1000000000000l) {
 		if (jump->flags & IS_COND)
 			code_ptr[-5] -= (1 << 5);
 		jump->flags |= PATCH_ABS48;
@@ -215,6 +215,22 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in
 	return 0;
 }
 
+static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
+{
+	if (max_label < 0x100000000l) {
+		put_label->flags = 0;
+		return 2;
+	}
+
+	if (max_label < 0x1000000000000l) {
+		put_label->flags = 1;
+		return 1;
+	}
+
+	put_label->flags = 2;
+	return 0;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 {
 	struct sljit_memory_fragment *buf;
@@ -223,6 +239,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_ins *buf_ptr;
 	sljit_ins *buf_end;
 	sljit_uw word_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
 	sljit_uw addr;
 	sljit_s32 dst;
@@ -230,6 +247,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -241,34 +259,47 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	word_count = 0;
+	next_addr = 0;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	do {
 		buf_ptr = (sljit_ins*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 2);
 		do {
 			*code_ptr = *buf_ptr++;
-			/* These structures are ordered by their address. */
-			SLJIT_ASSERT(!label || label->size >= word_count);
-			SLJIT_ASSERT(!jump || jump->addr >= word_count);
-			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-			if (label && label->size == word_count) {
-				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-				label->size = code_ptr - code;
-				label = label->next;
-			}
-			if (jump && jump->addr == word_count) {
-					jump->addr = (sljit_uw)(code_ptr - 4);
-					code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
-					jump = jump->next;
-			}
-			if (const_ && const_->addr == word_count) {
-				const_->addr = (sljit_uw)code_ptr;
-				const_ = const_->next;
+			if (next_addr == word_count) {
+				SLJIT_ASSERT(!label || label->size >= word_count);
+				SLJIT_ASSERT(!jump || jump->addr >= word_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == word_count) {
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = code_ptr - code;
+					label = label->next;
+				}
+				if (jump && jump->addr == word_count) {
+						jump->addr = (sljit_uw)(code_ptr - 4);
+						code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
+						jump = jump->next;
+				}
+				if (const_ && const_->addr == word_count) {
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == word_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)(code_ptr - 3);
+					code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			code_ptr ++;
 			word_count ++;
@@ -286,6 +317,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
 
 	jump = compiler->jumps;
@@ -323,6 +355,23 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+		addr = put_label->label->addr;
+		buf_ptr = (sljit_ins *)put_label->addr;
+
+		buf_ptr[0] |= (addr & 0xffff) << 5;
+		buf_ptr[1] |= ((addr >> 16) & 0xffff) << 5;
+
+		if (put_label->flags >= 1)
+			buf_ptr[2] |= ((addr >> 32) & 0xffff) << 5;
+
+		if (put_label->flags >= 2)
+			buf_ptr[3] |= ((addr >> 48) & 0xffff) << 5;
+
+		put_label = put_label->next;
+	}
+
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@@ -1947,6 +1996,28 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	return const_;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+	PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0));
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 1);
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
+
+	return put_label;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
 	sljit_ins* inst = (sljit_ins*)addr;

+ 59 - 17
thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c

@@ -365,11 +365,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_u16 *buf_ptr;
 	sljit_u16 *buf_end;
 	sljit_uw half_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
 
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -381,34 +383,46 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	half_count = 0;
+	next_addr = 0;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	do {
 		buf_ptr = (sljit_u16*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 1);
 		do {
 			*code_ptr = *buf_ptr++;
-			/* These structures are ordered by their address. */
-			SLJIT_ASSERT(!label || label->size >= half_count);
-			SLJIT_ASSERT(!jump || jump->addr >= half_count);
-			SLJIT_ASSERT(!const_ || const_->addr >= half_count);
-			if (label && label->size == half_count) {
-				label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
-				label->size = code_ptr - code;
-				label = label->next;
-			}
-			if (jump && jump->addr == half_count) {
-					jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
-					code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
-					jump = jump->next;
-			}
-			if (const_ && const_->addr == half_count) {
-				const_->addr = (sljit_uw)code_ptr;
-				const_ = const_->next;
+			if (next_addr == half_count) {
+				SLJIT_ASSERT(!label || label->size >= half_count);
+				SLJIT_ASSERT(!jump || jump->addr >= half_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= half_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= half_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == half_count) {
+					label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
+					label->size = code_ptr - code;
+					label = label->next;
+				}
+				if (jump && jump->addr == half_count) {
+						jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
+						code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
+						jump = jump->next;
+				}
+				if (const_ && const_->addr == half_count) {
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == half_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			code_ptr ++;
 			half_count ++;
@@ -426,6 +440,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
 
 	jump = compiler->jumps;
@@ -434,6 +449,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+		modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr);
+		put_label = put_label->next;
+	}
+
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16);
@@ -2311,6 +2332,27 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	return const_;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+	PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0));
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
+	return put_label;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
 	sljit_u16 *inst = (sljit_u16*)addr;

+ 2 - 0
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c

@@ -425,6 +425,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
 	inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
 	inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@@ -435,6 +436,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT((inst[0] & 0xffe00000) == LUI && (inst[1] & 0xfc000000) == ORI);
 	inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
 	inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

+ 132 - 23
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c

@@ -449,6 +449,55 @@ static __attribute__ ((noinline)) void sljit_cache_flush(void* code, void* code_
 }
 #endif
 
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+
+static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
+{
+	if (max_label < 0x80000000l) {
+		put_label->flags = 0;
+		return 1;
+	}
+
+	if (max_label < 0x800000000000l) {
+		put_label->flags = 1;
+		return 3;
+	}
+
+	put_label->flags = 2;
+	return 5;
+}
+
+static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
+{
+	sljit_uw addr = put_label->label->addr;
+	sljit_ins *inst = (sljit_ins *)put_label->addr;
+	sljit_s32 reg = *inst;
+
+	if (put_label->flags == 0) {
+		SLJIT_ASSERT(addr < 0x80000000l);
+		inst[0] = LUI | T(reg) | IMM(addr >> 16);
+	}
+	else if (put_label->flags == 1) {
+		SLJIT_ASSERT(addr < 0x800000000000l);
+		inst[0] = LUI | T(reg) | IMM(addr >> 32);
+		inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
+		inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		inst += 2;
+	}
+	else {
+		inst[0] = LUI | T(reg) | IMM(addr >> 48);
+		inst[1] = ORI | S(reg) | T(reg) | IMM((addr >> 32) & 0xffff);
+		inst[2] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		inst[3] = ORI | S(reg) | T(reg) | IMM((addr >> 16) & 0xffff);
+		inst[4] = DSLL | T(reg) | D(reg) | SH_IMM(16);
+		inst += 4;
+	}
+
+	inst[1] = ORI | S(reg) | T(reg) | IMM(addr & 0xffff);
+}
+
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 {
 	struct sljit_memory_fragment *buf;
@@ -457,12 +506,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_ins *buf_ptr;
 	sljit_ins *buf_end;
 	sljit_uw word_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
 	sljit_uw addr;
 
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -474,39 +525,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	word_count = 0;
+	next_addr = 0;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	do {
 		buf_ptr = (sljit_ins*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 2);
 		do {
 			*code_ptr = *buf_ptr++;
-			SLJIT_ASSERT(!label || label->size >= word_count);
-			SLJIT_ASSERT(!jump || jump->addr >= word_count);
-			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-			/* These structures are ordered by their address. */
-			if (label && label->size == word_count) {
-				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-				label->size = code_ptr - code;
-				label = label->next;
-			}
-			if (jump && jump->addr == word_count) {
+			if (next_addr == word_count) {
+				SLJIT_ASSERT(!label || label->size >= word_count);
+				SLJIT_ASSERT(!jump || jump->addr >= word_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == word_count) {
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = code_ptr - code;
+					label = label->next;
+				}
+				if (jump && jump->addr == word_count) {
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-				jump->addr = (sljit_uw)(code_ptr - 3);
+					jump->addr = (sljit_uw)(code_ptr - 3);
 #else
-				jump->addr = (sljit_uw)(code_ptr - 7);
+					jump->addr = (sljit_uw)(code_ptr - 7);
 #endif
-				code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
-				jump = jump->next;
-			}
-			if (const_ && const_->addr == word_count) {
-				/* Just recording the address. */
-				const_->addr = (sljit_uw)code_ptr;
-				const_ = const_->next;
+					code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
+					jump = jump->next;
+				}
+				if (const_ && const_->addr == word_count) {
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == word_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+					code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
+					word_count += 5;
+#endif
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			code_ptr ++;
 			word_count ++;
@@ -524,6 +590,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
 
 	jump = compiler->jumps;
@@ -571,6 +638,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+		addr = put_label->label->addr;
+		buf_ptr = (sljit_ins *)put_label->addr;
+
+		SLJIT_ASSERT((buf_ptr[0] & 0xffe00000) == LUI && (buf_ptr[1] & 0xfc000000) == ORI);
+		buf_ptr[0] |= (addr >> 16) & 0xffff;
+		buf_ptr[1] |= addr & 0xffff;
+#else
+		put_label_set(put_label);
+#endif
+		put_label = put_label->next;
+	}
+
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@@ -2157,7 +2239,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
 	struct sljit_const *const_;
-	sljit_s32 reg;
+	sljit_s32 dst_r;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@@ -2167,11 +2249,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	PTR_FAIL_IF(!const_);
 	set_const(const_, compiler);
 
-	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
-
-	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
 
 	if (dst & SLJIT_MEM)
 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+
 	return const_;
 }
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
+#else
+	PTR_FAIL_IF(push_inst(compiler, dst_r, UNMOVABLE_INS));
+	compiler->size += 5;
+#endif
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+
+	return put_label;
+}

+ 2 - 0
thirdparty/pcre2/src/sljit/sljitNativePPC_32.c

@@ -259,6 +259,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
 	inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff);
 	inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@@ -269,6 +270,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT((inst[0] & 0xfc1f0000) == ADDIS && (inst[1] & 0xfc000000) == ORI);
 	inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff);
 	inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

+ 0 - 3
thirdparty/pcre2/src/sljit/sljitNativePPC_64.c

@@ -35,9 +35,6 @@
 #error "Must implement count leading zeroes"
 #endif
 
-#define RLDI(dst, src, sh, mb, type) \
-	(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
-
 #define PUSH_RLDICR(reg, shift) \
 	push_inst(compiler, RLDI(reg, reg, 63 - shift, shift, 1))
 

+ 164 - 50
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c

@@ -231,6 +231,9 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define SIMM_MIN	(-0x8000)
 #define UIMM_MAX	(0xffff)
 
+#define RLDI(dst, src, sh, mb, type) \
+	(HI(30) | S(src) | A(dst) | ((type) << 2) | (((sh) & 0x1f) << 11) | (((sh) & 0x20) >> 4) | (((mb) & 0x1f) << 6) | ((mb) & 0x20))
+
 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
 {
@@ -324,6 +327,55 @@ keep_address:
 	return 0;
 }
 
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+
+static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
+{
+	if (max_label < 0x100000000l) {
+		put_label->flags = 0;
+		return 1;
+	}
+
+	if (max_label < 0x1000000000000l) {
+		put_label->flags = 1;
+		return 3;
+	}
+
+	put_label->flags = 2;
+	return 4;
+}
+
+static SLJIT_INLINE void put_label_set(struct sljit_put_label *put_label)
+{
+	sljit_uw addr = put_label->label->addr;
+	sljit_ins *inst = (sljit_ins *)put_label->addr;
+	sljit_s32 reg = *inst;
+
+	if (put_label->flags == 0) {
+		SLJIT_ASSERT(addr < 0x100000000l);
+		inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 16);
+	}
+	else {
+		if (put_label->flags == 1) {
+			SLJIT_ASSERT(addr < 0x1000000000000l);
+			inst[0] = ORI | S(TMP_ZERO) | A(reg) | IMM(addr >> 32);
+		}
+		else {
+			inst[0] = ORIS | S(TMP_ZERO) | A(reg) | IMM(addr >> 48);
+			inst[1] = ORI | S(reg) | A(reg) | IMM((addr >> 32) & 0xffff);
+			inst ++;
+		}
+
+		inst[1] = RLDI(reg, reg, 32, 31, 1);
+		inst[2] = ORIS | S(reg) | A(reg) | IMM((addr >> 16) & 0xffff);
+		inst += 2;
+	}
+
+	inst[1] = ORI | S(reg) | A(reg) | IMM(addr & 0xffff);
+}
+
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
 {
 	struct sljit_memory_fragment *buf;
@@ -332,12 +384,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_ins *buf_ptr;
 	sljit_ins *buf_end;
 	sljit_uw word_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
 	sljit_uw addr;
 
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -356,71 +410,87 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	word_count = 0;
+	next_addr = 0;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	do {
 		buf_ptr = (sljit_ins*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 2);
 		do {
 			*code_ptr = *buf_ptr++;
-			SLJIT_ASSERT(!label || label->size >= word_count);
-			SLJIT_ASSERT(!jump || jump->addr >= word_count);
-			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-			/* These structures are ordered by their address. */
-			if (label && label->size == word_count) {
-				/* Just recording the address. */
-				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-				label->size = code_ptr - code;
-				label = label->next;
-			}
-			if (jump && jump->addr == word_count) {
+			if (next_addr == word_count) {
+				SLJIT_ASSERT(!label || label->size >= word_count);
+				SLJIT_ASSERT(!jump || jump->addr >= word_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == word_count) {
+					/* Just recording the address. */
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = code_ptr - code;
+					label = label->next;
+				}
+				if (jump && jump->addr == word_count) {
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-				jump->addr = (sljit_uw)(code_ptr - 3);
+					jump->addr = (sljit_uw)(code_ptr - 3);
 #else
-				jump->addr = (sljit_uw)(code_ptr - 6);
+					jump->addr = (sljit_uw)(code_ptr - 6);
 #endif
-				if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
+					if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
-					code_ptr[-3] = code_ptr[0];
-					code_ptr -= 3;
-#else
-					if (jump->flags & PATCH_ABS32) {
+						code_ptr[-3] = code_ptr[0];
 						code_ptr -= 3;
-						code_ptr[-1] = code_ptr[2];
-						code_ptr[0] = code_ptr[3];
-					}
-					else if (jump->flags & PATCH_ABS48) {
-						code_ptr--;
-						code_ptr[-1] = code_ptr[0];
-						code_ptr[0] = code_ptr[1];
-						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
-						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
-						code_ptr[-3] ^= 0x8422;
-						/* oris -> ori */
-						code_ptr[-2] ^= 0x4000000;
-					}
-					else {
-						code_ptr[-6] = code_ptr[0];
-						code_ptr -= 6;
-					}
+#else
+						if (jump->flags & PATCH_ABS32) {
+							code_ptr -= 3;
+							code_ptr[-1] = code_ptr[2];
+							code_ptr[0] = code_ptr[3];
+						}
+						else if (jump->flags & PATCH_ABS48) {
+							code_ptr--;
+							code_ptr[-1] = code_ptr[0];
+							code_ptr[0] = code_ptr[1];
+							/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
+							SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
+							code_ptr[-3] ^= 0x8422;
+							/* oris -> ori */
+							code_ptr[-2] ^= 0x4000000;
+						}
+						else {
+							code_ptr[-6] = code_ptr[0];
+							code_ptr -= 6;
+						}
 #endif
-					if (jump->flags & REMOVE_COND) {
-						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
-						code_ptr++;
-						jump->addr += sizeof(sljit_ins);
-						code_ptr[0] = Bx;
-						jump->flags -= IS_COND;
+						if (jump->flags & REMOVE_COND) {
+							code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
+							code_ptr++;
+							jump->addr += sizeof(sljit_ins);
+							code_ptr[0] = Bx;
+							jump->flags -= IS_COND;
+						}
 					}
+					jump = jump->next;
 				}
-				jump = jump->next;
-			}
-			if (const_ && const_->addr == word_count) {
-				const_->addr = (sljit_uw)code_ptr;
-				const_ = const_->next;
+				if (const_ && const_->addr == word_count) {
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == word_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
+					code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
+					word_count += 4;
+#endif
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			code_ptr ++;
 			word_count ++;
@@ -438,6 +508,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
+
 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
 #else
@@ -503,6 +575,21 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+		addr = put_label->label->addr;
+		buf_ptr = (sljit_ins *)put_label->addr;
+
+		SLJIT_ASSERT((buf_ptr[0] & 0xfc1f0000) == ADDIS && (buf_ptr[1] & 0xfc000000) == ORI);
+		buf_ptr[0] |= (addr >> 16) & 0xffff;
+		buf_ptr[1] |= addr & 0xffff;
+#else
+		put_label_set(put_label);
+#endif
+		put_label = put_label->next;
+	}
+
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
@@ -2261,7 +2348,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
 	struct sljit_const *const_;
-	sljit_s32 reg;
+	sljit_s32 dst_r;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@@ -2271,11 +2358,38 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	PTR_FAIL_IF(!const_);
 	set_const(const_, compiler);
 
-	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
-
-	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
 
 	if (dst & SLJIT_MEM)
 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+
 	return const_;
 }
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
+	PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
+#else
+	PTR_FAIL_IF(push_inst(compiler, dst_r));
+	compiler->size += 4;
+#endif
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
+
+	return put_label;
+}

+ 2 - 0
thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c

@@ -267,6 +267,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
 	inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff);
 	inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
@@ -277,6 +278,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
 {
 	sljit_ins *inst = (sljit_ins *)addr;
 
+	SLJIT_ASSERT(((inst[0] & 0xc1c00000) == 0x01000000) && ((inst[1] & 0xc1f82000) == 0x80102000));
 	inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff);
 	inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);

+ 76 - 26
thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c

@@ -298,12 +298,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_ins *buf_ptr;
 	sljit_ins *buf_end;
 	sljit_uw word_count;
+	sljit_uw next_addr;
 	sljit_sw executable_offset;
 	sljit_uw addr;
 
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -315,40 +317,52 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	code_ptr = code;
 	word_count = 0;
+	next_addr = 0;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 
 	do {
 		buf_ptr = (sljit_ins*)buf->memory;
 		buf_end = buf_ptr + (buf->used_size >> 2);
 		do {
 			*code_ptr = *buf_ptr++;
-			SLJIT_ASSERT(!label || label->size >= word_count);
-			SLJIT_ASSERT(!jump || jump->addr >= word_count);
-			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
-			/* These structures are ordered by their address. */
-			if (label && label->size == word_count) {
-				/* Just recording the address. */
-				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-				label->size = code_ptr - code;
-				label = label->next;
-			}
-			if (jump && jump->addr == word_count) {
+			if (next_addr == word_count) {
+				SLJIT_ASSERT(!label || label->size >= word_count);
+				SLJIT_ASSERT(!jump || jump->addr >= word_count);
+				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
+				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
+
+				/* These structures are ordered by their address. */
+				if (label && label->size == word_count) {
+					/* Just recording the address. */
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = code_ptr - code;
+					label = label->next;
+				}
+				if (jump && jump->addr == word_count) {
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
-				jump->addr = (sljit_uw)(code_ptr - 3);
+					jump->addr = (sljit_uw)(code_ptr - 3);
 #else
-				jump->addr = (sljit_uw)(code_ptr - 6);
+					jump->addr = (sljit_uw)(code_ptr - 6);
 #endif
-				code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
-				jump = jump->next;
-			}
-			if (const_ && const_->addr == word_count) {
-				/* Just recording the address. */
-				const_->addr = (sljit_uw)code_ptr;
-				const_ = const_->next;
+					code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
+					jump = jump->next;
+				}
+				if (const_ && const_->addr == word_count) {
+					/* Just recording the address. */
+					const_->addr = (sljit_uw)code_ptr;
+					const_ = const_->next;
+				}
+				if (put_label && put_label->addr == word_count) {
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+					put_label = put_label->next;
+				}
+				next_addr = compute_next_addr(label, jump, const_, put_label);
 			}
 			code_ptr ++;
 			word_count ++;
@@ -366,6 +380,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
 
 	jump = compiler->jumps;
@@ -389,8 +404,9 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 			/* Set the fields of immediate loads. */
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
-			buf_ptr[0] = (buf_ptr[0] & 0xffc00000) | ((addr >> 10) & 0x3fffff);
-			buf_ptr[1] = (buf_ptr[1] & 0xfffffc00) | (addr & 0x3ff);
+			SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
+			buf_ptr[0] |= (addr >> 10) & 0x3fffff;
+			buf_ptr[1] |= addr & 0x3ff;
 #else
 #error "Implementation required"
 #endif
@@ -398,6 +414,20 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
+	put_label = compiler->put_labels;
+	while (put_label) {
+		addr = put_label->label->addr;
+		buf_ptr = (sljit_ins *)put_label->addr;
+
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+		SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
+		buf_ptr[0] |= (addr >> 10) & 0x3fffff;
+		buf_ptr[1] |= addr & 0x3ff;
+#else
+#error "Implementation required"
+#endif
+		put_label = put_label->next;
+	}
 
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
@@ -1465,8 +1495,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
 {
-	sljit_s32 reg;
 	struct sljit_const *const_;
+	sljit_s32 dst_r;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
@@ -1476,11 +1506,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	PTR_FAIL_IF(!const_);
 	set_const(const_, compiler);
 
-	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
-
-	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value));
 
 	if (dst & SLJIT_MEM)
 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
 	return const_;
 }
+
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_s32 dst_r;
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
+	PTR_FAIL_IF(emit_const(compiler, dst_r, 0));
+
+	if (dst & SLJIT_MEM)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
+	return put_label;
+}

+ 3 - 1
thirdparty/pcre2/src/sljit/sljitNativeX86_32.c

@@ -38,8 +38,10 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s
 	return SLJIT_SUCCESS;
 }
 
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
 {
+	sljit_s32 type = jump->flags >> TYPE_SHIFT;
+
 	if (type == SLJIT_JUMP) {
 		*code_ptr++ = JMP_i32;
 		jump->addr++;

+ 53 - 1
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c

@@ -39,8 +39,10 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg,
 	return SLJIT_SUCCESS;
 }
 
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
 {
+	sljit_s32 type = jump->flags >> TYPE_SHIFT;
+
 	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
 
 	/* The relative jump below specialized for this case. */
@@ -72,6 +74,56 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_
 	return code_ptr;
 }
 
+static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
+{
+	if (max_label > HALFWORD_MAX) {
+		put_label->addr -= put_label->flags;
+		put_label->flags = PATCH_MD;
+		return code_ptr;
+	}
+
+	if (put_label->flags == 0) {
+		/* Destination is register. */
+		code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
+
+		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
+		SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
+
+		if ((code_ptr[0] & 0x07) != 0) {
+			code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
+			code_ptr += 2 + sizeof(sljit_s32);
+		}
+		else {
+			code_ptr[0] = code_ptr[1];
+			code_ptr += 1 + sizeof(sljit_s32);
+		}
+
+		put_label->addr = (sljit_uw)code_ptr;
+		return code_ptr;
+	}
+
+	code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
+	SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
+
+	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
+
+	if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
+		code_ptr += 2 + sizeof(sljit_uw);
+		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
+	}
+
+	SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
+
+	code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
+	code_ptr[1] = MOV_rm_i32;
+	code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
+
+	code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
+	put_label->addr = (sljit_uw)code_ptr;
+	put_label->flags = 0;
+	return code_ptr;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)

+ 103 - 22
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c

@@ -428,13 +428,15 @@ static sljit_u8 get_jump_code(sljit_s32 type)
 }
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
 #else
-static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
+static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
+static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
 #endif
 
-static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
+static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
 {
+	sljit_s32 type = jump->flags >> TYPE_SHIFT;
 	sljit_s32 short_jump;
 	sljit_uw label_addr;
 
@@ -447,7 +449,7 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
-		return generate_far_jump_code(jump, code_ptr, type);
+		return generate_far_jump_code(jump, code_ptr);
 #endif
 
 	if (type == SLJIT_JUMP) {
@@ -497,6 +499,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	struct sljit_label *label;
 	struct sljit_jump *jump;
 	struct sljit_const *const_;
+	struct sljit_put_label *put_label;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_generate_code(compiler));
@@ -511,6 +514,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	label = compiler->labels;
 	jump = compiler->jumps;
 	const_ = compiler->consts;
+	put_label = compiler->put_labels;
 	executable_offset = SLJIT_EXEC_OFFSET(code);
 
 	do {
@@ -525,27 +529,38 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 				buf_ptr += len;
 			}
 			else {
-				if (*buf_ptr >= 2) {
+				switch (*buf_ptr) {
+				case 0:
+					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+					label->size = code_ptr - code;
+					label = label->next;
+					break;
+				case 1:
 					jump->addr = (sljit_uw)code_ptr;
 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
-						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
+						code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
 					else {
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
+						code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
 #else
-						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
+						code_ptr = generate_far_jump_code(jump, code_ptr);
 #endif
 					}
 					jump = jump->next;
-				}
-				else if (*buf_ptr == 0) {
-					label->addr = ((sljit_uw)code_ptr) + executable_offset;
-					label->size = code_ptr - code;
-					label = label->next;
-				}
-				else { /* *buf_ptr is 1 */
+					break;
+				case 2:
 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
 					const_ = const_->next;
+					break;
+				default:
+					SLJIT_ASSERT(*buf_ptr == 3);
+					SLJIT_ASSERT(put_label->label);
+					put_label->addr = (sljit_uw)code_ptr;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+					code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
+#endif
+					put_label = put_label->next;
+					break;
 				}
 				buf_ptr++;
 			}
@@ -557,6 +572,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	SLJIT_ASSERT(!label);
 	SLJIT_ASSERT(!jump);
 	SLJIT_ASSERT(!const_);
+	SLJIT_ASSERT(!put_label);
+	SLJIT_ASSERT(code_ptr <= code + compiler->size);
 
 	jump = compiler->jumps;
 	while (jump) {
@@ -591,8 +608,24 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 		jump = jump->next;
 	}
 
-	/* Some space may be wasted because of short jumps. */
-	SLJIT_ASSERT(code_ptr <= code + compiler->size);
+	put_label = compiler->put_labels;
+	while (put_label) {
+#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
+		sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
+#else
+		if (put_label->flags & PATCH_MD) {
+			SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
+			sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
+		}
+		else {
+			SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
+			sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
+		}
+#endif
+
+		put_label = put_label->next;
+	}
+
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
 	compiler->executable_size = code_ptr - code;
@@ -2481,7 +2514,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 
 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 	PTR_FAIL_IF_NULL(jump);
-	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
+	set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
 	type &= 0xff;
 
 	/* Worst case size. */
@@ -2495,7 +2528,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 	PTR_FAIL_IF_NULL(inst);
 
 	*inst++ = 0;
-	*inst++ = type + 2;
+	*inst++ = 1;
 	return jump;
 }
 
@@ -2513,7 +2546,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 	if (src == SLJIT_IMM) {
 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 		FAIL_IF_NULL(jump);
-		set_jump(jump, compiler, JUMP_ADDR);
+		set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
 		jump->u.target = srcw;
 
 		/* Worst case size. */
@@ -2527,7 +2560,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 		FAIL_IF_NULL(inst);
 
 		*inst++ = 0;
-		*inst++ = type + 2;
+		*inst++ = 1;
 	}
 	else {
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
@@ -2831,7 +2864,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	PTR_FAIL_IF(!inst);
 
 	*inst++ = 0;
-	*inst++ = 1;
+	*inst++ = 2;
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 	if (dst & SLJIT_MEM)
@@ -2842,6 +2875,54 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
 	return const_;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+{
+	struct sljit_put_label *put_label;
+	sljit_u8 *inst;
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+	sljit_s32 reg;
+	sljit_uw start_size;
+#endif
+
+	CHECK_ERROR_PTR();
+	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+	ADJUST_LOCAL_OFFSET(dst, dstw);
+
+	CHECK_EXTRA_REGS(dst, dstw, (void)0);
+
+	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
+	PTR_FAIL_IF(!put_label);
+	set_put_label(put_label, compiler, 0);
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+	compiler->mode32 = 0;
+	reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+	if (emit_load_imm64(compiler, reg, 0))
+		return NULL;
+#else
+	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
+		return NULL;
+#endif
+
+#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
+	if (dst & SLJIT_MEM) {
+		start_size = compiler->size;
+		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
+			return NULL;
+		put_label->flags = compiler->size - start_size;
+	}
+#endif
+
+	inst = (sljit_u8*)ensure_buf(compiler, 2);
+	PTR_FAIL_IF(!inst);
+
+	*inst++ = 0;
+	*inst++ = 3;
+
+	return put_label;
+}
+
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
 {
 	SLJIT_UNUSED_ARG(executable_offset);

+ 6 - 0
thirdparty/pcre2/src/sljit/sljitUtils.c

@@ -154,7 +154,13 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_release_lock(void)
 #include "windows.h"
 #else
 /* Provides mmap function. */
+#include <sys/types.h>
 #include <sys/mman.h>
+#ifndef MAP_ANON
+#ifdef MAP_ANONYMOUS
+#define MAP_ANON MAP_ANONYMOUS
+#endif
+#endif
 /* For detecting the page size. */
 #include <unistd.h>
 

Some files were not shown because too many files changed in this diff