ソースを参照

pcre2: Update to upstream version 10.33

Rémi Verschelde 6 年 前
コミット
518e23424e
37 ファイル変更3898 行追加1584 行削除
  1. 2 2
      COPYRIGHT.txt
  2. 1 0
      modules/regex/SCsub
  3. 2 2
      thirdparty/README.md
  4. 3 3
      thirdparty/pcre2/AUTHORS
  5. 3 3
      thirdparty/pcre2/LICENCE
  6. 12 3
      thirdparty/pcre2/src/config.h
  7. 56 52
      thirdparty/pcre2/src/pcre2.h
  8. 11 3
      thirdparty/pcre2/src/pcre2_auto_possess.c
  9. 9 9
      thirdparty/pcre2/src/pcre2_chartables.c
  10. 423 157
      thirdparty/pcre2/src/pcre2_compile.c
  11. 17 5
      thirdparty/pcre2/src/pcre2_context.c
  12. 1 1
      thirdparty/pcre2/src/pcre2_convert.c
  13. 54 17
      thirdparty/pcre2/src/pcre2_dfa_match.c
  14. 11 8
      thirdparty/pcre2/src/pcre2_error.c
  15. 2 2
      thirdparty/pcre2/src/pcre2_extuni.c
  16. 146 111
      thirdparty/pcre2/src/pcre2_internal.h
  17. 4 1
      thirdparty/pcre2/src/pcre2_intmodedep.h
  18. 1265 199
      thirdparty/pcre2/src/pcre2_jit_compile.c
  19. 2 4
      thirdparty/pcre2/src/pcre2_jit_match.c
  20. 13 13
      thirdparty/pcre2/src/pcre2_maketables.c
  21. 70 18
      thirdparty/pcre2/src/pcre2_match.c
  22. 7 1
      thirdparty/pcre2/src/pcre2_match_data.c
  23. 441 0
      thirdparty/pcre2/src/pcre2_script_run.c
  24. 9 7
      thirdparty/pcre2/src/pcre2_study.c
  25. 54 13
      thirdparty/pcre2/src/pcre2_substitute.c
  26. 37 34
      thirdparty/pcre2/src/pcre2_tables.c
  27. 1019 864
      thirdparty/pcre2/src/pcre2_ucd.c
  28. 1 0
      thirdparty/pcre2/src/pcre2_ucp.h
  29. 3 3
      thirdparty/pcre2/src/pcre2_xclass.c
  30. 1 1
      thirdparty/pcre2/src/sljit/sljitConfigInternal.h
  31. 43 3
      thirdparty/pcre2/src/sljit/sljitExecAllocator.c
  32. 6 5
      thirdparty/pcre2/src/sljit/sljitLir.c
  33. 10 10
      thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
  34. 8 3
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
  35. 10 3
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
  36. 141 23
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
  37. 1 1
      thirdparty/pcre2/src/sljit/sljitNativePPC_common.c

+ 2 - 2
COPYRIGHT.txt

@@ -330,8 +330,8 @@ License: BSD-3-clause
 
 Files: ./thirdparty/pcre2/
 Comment: PCRE2
-Copyright: 1997-2018, University of Cambridge,
- 2009-2018, Zoltan Herczeg
+Copyright: 1997-2019, University of Cambridge,
+ 2009-2019, Zoltan Herczeg
 License: BSD-3-clause
 
 Files: ./thirdparty/pvrtccompressor/

+ 1 - 0
modules/regex/SCsub

@@ -33,6 +33,7 @@ if env['builtin_pcre2']:
         "pcre2_newline.c",
         "pcre2_ord2utf.c",
         "pcre2_pattern_info.c",
+        "pcre2_script_run.c",
         "pcre2_serialize.c",
         "pcre2_string_utils.c",
         "pcre2_study.c",

+ 2 - 2
thirdparty/README.md

@@ -426,8 +426,8 @@ Files extracted from upstream source:
 
 ## pcre2
 
-- Upstream: http://www.pcre.org/
-- Version: 10.32
+- Upstream: http://www.pcre.org
+- Version: 10.33
 - License: BSD-3-Clause
 
 Files extracted from upstream source:

+ 3 - 3
thirdparty/pcre2/AUTHORS

@@ -8,7 +8,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2018 University of Cambridge
+Copyright (c) 1997-2019 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2018 Zoltan Herczeg
+Copyright(c) 2010-2019 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2018 Zoltan Herczeg
+Copyright(c) 2009-2019 Zoltan Herczeg
 All rights reserved.
 
 ####

+ 3 - 3
thirdparty/pcre2/LICENCE

@@ -26,7 +26,7 @@ Email domain:     cam.ac.uk
 University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2018 University of Cambridge
+Copyright (c) 1997-2019 University of Cambridge
 All rights reserved.
 
 
@@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2010-2018 Zoltan Herczeg
+Copyright(c) 2010-2019 Zoltan Herczeg
 All rights reserved.
 
 
@@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2009-2018 Zoltan Herczeg
+Copyright(c) 2009-2019 Zoltan Herczeg
 All rights reserved.
 
 

+ 12 - 3
thirdparty/pcre2/src/config.h

@@ -35,6 +35,10 @@ sure both macros are undefined; an emulation function will then be used. */
    */
 /* #undef BSR_ANYCRLF */
 
+/* Define to any value to disable the use of the z and t modifiers in
+   formatting settings such as %zu or %td (this is rarely needed). */
+/* #undef DISABLE_PERCENT_ZT */
+
 /* If you are compiling for a system that uses EBCDIC instead of ASCII
    character codes, define this macro to any value. When EBCDIC is set, PCRE2
    assumes that all input strings are in EBCDIC. If you do not define this
@@ -214,7 +218,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.32"
+#define PACKAGE_STRING "PCRE2 10.33"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -223,7 +227,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.32"
+#define PACKAGE_VERSION "10.33"
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -299,6 +303,11 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Define to any value to enable callout script support in pcre2grep. */
 /* #undef SUPPORT_PCRE2GREP_CALLOUT */
 
+/* Define to any value to enable fork support in pcre2grep callout scripts.
+   This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also defined.
+   */
+/* #undef SUPPORT_PCRE2GREP_CALLOUT_FORK */
+
 /* Define to any value to enable JIT support in pcre2grep. Note that this will
    have no effect unless SUPPORT_JIT is also defined. */
 /* #undef SUPPORT_PCRE2GREP_JIT */
@@ -343,7 +352,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 
 /* Version number of package */
-#define VERSION "10.32"
+#define VERSION "10.33"
 
 /* Define to 1 if on MINIX. */
 /* #undef _MINIX */

+ 56 - 52
thirdparty/pcre2/src/pcre2.h

@@ -42,15 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           32
+#define PCRE2_MINOR           33
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2018-09-10
-
-/* For the benefit of systems without stdint.h, an alternative is to use
-inttypes.h. The existence of these headers is checked by configure or CMake. */
-
-#define PCRE2_HAVE_STDINT_H   1
-#define PCRE2_HAVE_INTTYPES_H 1
+#define PCRE2_DATE            2019-04-16
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate
@@ -87,18 +81,15 @@ set, we ensure here that it has no effect. */
 #define PCRE2_CALL_CONVENTION
 #endif
 
-/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
-that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
-header, the relevant values must be provided by some other means. */
+/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
+uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
+not have stdint.h, which is why we use inttypes.h, which according to the C
+standard is a superset of stdint.h. If none of these headers are available,
+the relevant values must be provided by some other means. */
 
 #include <limits.h>
 #include <stdlib.h>
-
-#if PCRE2_HAVE_STDINT_H
-#include <stdint.h>
-#elif PCRE2_HAVE_INTTYPES_H
 #include <inttypes.h>
-#endif
 
 /* Allow for C++ users compiling this directly. */
 
@@ -158,43 +149,37 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    0x00000002u  /* C */
 #define PCRE2_EXTRA_MATCH_WORD               0x00000004u  /* C */
 #define PCRE2_EXTRA_MATCH_LINE               0x00000008u  /* C */
+#define PCRE2_EXTRA_ESCAPED_CR_IS_LF         0x00000010u  /* C */
+#define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
 
 /* These are for pcre2_jit_compile(). */
 
 #define PCRE2_JIT_COMPLETE        0x00000001u  /* For full matching */
 #define PCRE2_JIT_PARTIAL_SOFT    0x00000002u
 #define PCRE2_JIT_PARTIAL_HARD    0x00000004u
-
-/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
-that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
-functions (though pcre2_jit_match() ignores the latter since it bypasses all
-sanity checks). */
-
-#define PCRE2_NOTBOL              0x00000001u
-#define PCRE2_NOTEOL              0x00000002u
-#define PCRE2_NOTEMPTY            0x00000004u  /* ) These two must be kept */
-#define PCRE2_NOTEMPTY_ATSTART    0x00000008u  /* ) adjacent to each other. */
-#define PCRE2_PARTIAL_SOFT        0x00000010u
-#define PCRE2_PARTIAL_HARD        0x00000020u
-
-/* These are additional options for pcre2_dfa_match(). */
-
-#define PCRE2_DFA_RESTART         0x00000040u
-#define PCRE2_DFA_SHORTEST        0x00000080u
-
-/* These are additional options for pcre2_substitute(), which passes any others
-through to pcre2_match(). */
-
-#define PCRE2_SUBSTITUTE_GLOBAL           0x00000100u
-#define PCRE2_SUBSTITUTE_EXTENDED         0x00000200u
-#define PCRE2_SUBSTITUTE_UNSET_EMPTY      0x00000400u
-#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET    0x00000800u
-#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  0x00001000u
-
-/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
-ignored for pcre2_jit_match(). */
-
-#define PCRE2_NO_JIT              0x00002000u
+#define PCRE2_JIT_INVALID_UTF     0x00000100u
+
+/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
+pcre2_substitute(). Some are allowed only for one of the functions, and in
+these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
+PCRE2_NO_UTF_CHECK can also be passed to these functions (though
+pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
+
+#define PCRE2_NOTBOL                      0x00000001u
+#define PCRE2_NOTEOL                      0x00000002u
+#define PCRE2_NOTEMPTY                    0x00000004u  /* ) These two must be kept */
+#define PCRE2_NOTEMPTY_ATSTART            0x00000008u  /* ) adjacent to each other. */
+#define PCRE2_PARTIAL_SOFT                0x00000010u
+#define PCRE2_PARTIAL_HARD                0x00000020u
+#define PCRE2_DFA_RESTART                 0x00000040u  /* pcre2_dfa_match() only */
+#define PCRE2_DFA_SHORTEST                0x00000080u  /* pcre2_dfa_match() only */
+#define PCRE2_SUBSTITUTE_GLOBAL           0x00000100u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_EXTENDED         0x00000200u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_UNSET_EMPTY      0x00000400u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET    0x00000800u  /* pcre2_substitute() only */
+#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  0x00001000u  /* pcre2_substitute() only */
+#define PCRE2_NO_JIT                      0x00002000u  /* Not for pcre2_dfa_match() */
+#define PCRE2_COPY_MATCHED_SUBJECT        0x00004000u
 
 /* Options for pcre2_pattern_convert(). */
 
@@ -318,6 +303,8 @@ pcre2_pattern_convert(). */
 #define PCRE2_ERROR_BAD_LITERAL_OPTIONS            192
 #define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE      193
 #define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS      194
+#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN        195
+#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE       196
 
 
 /* "Expected" matching error codes: no match and partial match. */
@@ -504,10 +491,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
 typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
 
 
-/* The structure for passing out data via the pcre_callout_function. We use a
-structure so that new fields can be added on the end in future versions,
-without changing the API of the function, thereby allowing old clients to work
-without modification. Define the generic version in a macro; the width-specific
+/* The structures for passing out data via callout functions. We use structures
+so that new fields can be added on the end in future versions, without changing
+the API of the function, thereby allowing old clients to work without
+modification. Define the generic versions in a macro; the width-specific
 versions are generated from this macro below. */
 
 /* Flags for the callout_flags field. These are cleared after a callout. */
@@ -549,7 +536,19 @@ typedef struct pcre2_callout_enumerate_block { \
   PCRE2_SIZE    callout_string_length; /* Length of string compiled into pattern */ \
   PCRE2_SPTR    callout_string;    /* String compiled into pattern */ \
   /* ------------------------------------------------------------------ */ \
-} pcre2_callout_enumerate_block;
+} pcre2_callout_enumerate_block; \
+\
+typedef struct pcre2_substitute_callout_block { \
+  uint32_t      version;           /* Identifies version of block */ \
+  /* ------------------------ Version 0 ------------------------------- */ \
+  PCRE2_SPTR    input;             /* Pointer to input subject string */ \
+  PCRE2_SPTR    output;            /* Pointer to output buffer */ \
+  PCRE2_SIZE    output_offsets[2]; /* Changed portion of the output */ \
+  PCRE2_SIZE   *ovector;           /* Pointer to current ovector */ \
+  uint32_t      oveccount;         /* Count of pairs set in ovector */ \
+  uint32_t      subscount;         /* Substitution number */ \
+  /* ------------------------------------------------------------------ */ \
+} pcre2_substitute_callout_block;
 
 
 /* List the generic forms of all other functions in macros, which will be
@@ -604,6 +603,9 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_callout(pcre2_match_context *, \
     int (*)(pcre2_callout_block *, void *), void *); \
+PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
+  pcre2_set_substitute_callout(pcre2_match_context *, \
+    int (*)(pcre2_substitute_callout_block *, void *), void *); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
   pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
 PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -807,6 +809,7 @@ pcre2_compile are called by application code. */
 
 #define pcre2_callout_block            PCRE2_SUFFIX(pcre2_callout_block_)
 #define pcre2_callout_enumerate_block  PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
+#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
 #define pcre2_general_context          PCRE2_SUFFIX(pcre2_general_context_)
 #define pcre2_compile_context          PCRE2_SUFFIX(pcre2_compile_context_)
 #define pcre2_convert_context          PCRE2_SUFFIX(pcre2_convert_context_)
@@ -872,6 +875,7 @@ pcre2_compile are called by application code. */
 #define pcre2_set_newline                     PCRE2_SUFFIX(pcre2_set_newline_)
 #define pcre2_set_parens_nest_limit           PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
 #define pcre2_set_offset_limit                PCRE2_SUFFIX(pcre2_set_offset_limit_)
+#define pcre2_set_substitute_callout          PCRE2_SUFFIX(pcre2_set_substitute_callout_)
 #define pcre2_substitute                      PCRE2_SUFFIX(pcre2_substitute_)
 #define pcre2_substring_copy_byname           PCRE2_SUFFIX(pcre2_substring_copy_byname_)
 #define pcre2_substring_copy_bynumber         PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)

+ 11 - 3
thirdparty/pcre2/src/pcre2_auto_possess.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -605,6 +605,15 @@ for(;;)
       if (cb->had_recurse) return FALSE;
       break;
 
+      /* A script run might have to backtrack if the iterated item can match
+      characters from more than one script. So give up unless repeating an
+      explicit character. */
+
+      case OP_SCRIPT_RUN:
+      if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
+        return FALSE;
+      break;
+
       /* Atomic sub-patterns and assertions can always auto-possessify their
       last iterator. However, if the group was entered as a result of checking
       a previous iterator, this is not possible. */
@@ -614,7 +623,6 @@ for(;;)
       case OP_ASSERTBACK:
       case OP_ASSERTBACK_NOT:
       case OP_ONCE:
-
       return !entered_a_group;
       }
 
@@ -1043,7 +1051,7 @@ for(;;)
       if (chr > 255) break;
       class_bitset = (uint8_t *)
         ((list_ptr == list ? code : base_end) - list_ptr[2]);
-      if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
+      if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
       break;
 
 #ifdef SUPPORT_WIDE_CHARS

+ 9 - 9
thirdparty/pcre2/src/pcre2_chartables.c

@@ -157,8 +157,8 @@ graph print, punct, and cntrl. Other classes are built from combinations. */
 /* This table identifies various classes of character by individual bits:
   0x01   white space character
   0x02   letter
-  0x04   decimal digit
-  0x08   hexadecimal digit
+  0x04   lower case letter
+  0x08   decimal digit
   0x10   alphanumeric or '_'
 */
 
@@ -168,16 +168,16 @@ graph print, punct, and cntrl. Other classes are built from combinations. */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
   0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
-  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
-  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
-  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
+  0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /*  0 - 7  */
+  0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
+  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  @ - G  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
   0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
   0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /*  X - _  */
-  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
-  0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
-  0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
+  0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  ` - g  */
+  0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  h - o  */
+  0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /*  p - w  */
+  0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
   0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

ファイルの差分が大きいため隠しています
+ 423 - 157
thirdparty/pcre2/src/pcre2_compile.c


+ 17 - 5
thirdparty/pcre2/src/pcre2_context.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -163,11 +163,13 @@ when no context is supplied to a match function. */
 const pcre2_match_context PRIV(default_match_context) = {
   { default_malloc, default_free, NULL },
 #ifdef SUPPORT_JIT
-  NULL,
-  NULL,
+  NULL,          /* JIT callback */
+  NULL,          /* JIT callback data */
 #endif
-  NULL,
-  NULL,
+  NULL,          /* Callout function */
+  NULL,          /* Callout data */
+  NULL,          /* Substitute callout function */
+  NULL,          /* Substitute callout data */
   PCRE2_UNSET,   /* Offset limit */
   HEAP_LIMIT,
   MATCH_LIMIT,
@@ -403,6 +405,16 @@ mcontext->callout_data = callout_data;
 return 0;
 }
 
+PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
+pcre2_set_substitute_callout(pcre2_match_context *mcontext,
+  int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
+    void *substitute_callout_data)
+{
+mcontext->substitute_callout = substitute_callout;
+mcontext->substitute_callout_data = substitute_callout_data;
+return 0;
+}
+
 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
 {

+ 1 - 1
thirdparty/pcre2/src/pcre2_convert.c

@@ -276,7 +276,7 @@ while (plength > 0)
     break;
 
     case CHAR_BACKSLASH:
-    if (plength <= 0) return PCRE2_ERROR_END_BACKSLASH;
+    if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
     if (extended) nextisliteral = TRUE; else
       {
       if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)

+ 54 - 17
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -85,7 +85,8 @@ in others, so I abandoned this code. */
 #define PUBLIC_DFA_MATCH_OPTIONS \
   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
-   PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART)
+   PCRE2_PARTIAL_SOFT|PCRE2_DFA_SHORTEST|PCRE2_DFA_RESTART| \
+   PCRE2_COPY_MATCHED_SUBJECT)
 
 
 /*************************************************
@@ -173,6 +174,7 @@ static const uint8_t coptable[] = {
   0,                             /* Assert behind                          */
   0,                             /* Assert behind not                      */
   0,                             /* ONCE                                   */
+  0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
   0, 0,                          /* CREF, DNCREF                           */
@@ -247,6 +249,7 @@ static const uint8_t poptable[] = {
   0,                             /* Assert behind                          */
   0,                             /* Assert behind not                      */
   0,                             /* ONCE                                   */
+  0,                             /* SCRIPT_RUN                             */
   0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
   0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
   0, 0,                          /* CREF, DNCREF                           */
@@ -316,8 +319,8 @@ finding the minimum heap requirement for a match. */
 
 typedef struct RWS_anchor {
   struct RWS_anchor *next;
-  unsigned int size;  /* Number of ints */
-  unsigned int free;  /* Number of ints */
+  uint32_t size;  /* Number of ints */
+  uint32_t free;  /* Number of ints */
 } RWS_anchor;
 
 #define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
@@ -413,20 +416,24 @@ if (rws->next != NULL)
   new = rws->next;
   }
 
-/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
-kibibytes. */
+/* Sizes in the RWS_anchor blocks are in units of sizeof(int), but
+mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
+overflow. */
 
 else
   {
-  unsigned int newsize = rws->size * 2;
-  unsigned int heapleft = (unsigned int)
-    (((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
-  if (newsize > heapleft) newsize = heapleft;
+  uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2;
+  uint32_t newsizeK = newsize/(1024/sizeof(int));
+
+  if (newsizeK + mb->heap_used > mb->heap_limit)
+    newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
+  newsize = newsizeK*(1024/sizeof(int));
+
   if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
     return PCRE2_ERROR_HEAPLIMIT;
   new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
   if (new == NULL) return PCRE2_ERROR_NOMEMORY;
-  mb->heap_used += newsize;
+  mb->heap_used += newsizeK;
   new->next = NULL;
   new->size = newsize;
   rws->next = new;
@@ -2560,7 +2567,7 @@ for (;;)
           if (clen > 0)
             {
             isinclass = (c > 255)? (codevalue == OP_NCLASS) :
-              ((((uint8_t *)(code + 1))[c/8] & (1 << (c&7))) != 0);
+              ((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
             }
           }
 
@@ -2753,7 +2760,7 @@ for (;;)
         /* There is also an always-true condition */
 
         else if (condcode == OP_TRUE)
-          { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
+          { ADD_ACTIVE(state_offset + LINK_SIZE + 2, 0); }
 
         /* The only supported version of OP_RREF is for the value RREF_ANY,
         which means "test if in any recursion". We can't test for specifically
@@ -3226,6 +3233,8 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
   pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
 {
 int rc;
+int was_zero_terminated = 0;
+
 const pcre2_real_code *re = (const pcre2_real_code *)code;
 
 PCRE2_SPTR start_match;
@@ -3265,7 +3274,11 @@ rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
 subject string. */
 
-if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
+if (length == PCRE2_ZERO_TERMINATED)
+  {
+  length = PRIV(strlen)(subject);
+  was_zero_terminated = 1;
+  }
 
 /* Plausibility checks */
 
@@ -3518,10 +3531,20 @@ if ((re->flags & PCRE2_LASTSET) != 0)
     }
   }
 
+/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
+free the memory that was obtained. */
+
+if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
+  {
+  match_data->memctl.free((void *)match_data->subject,
+    match_data->memctl.memory_data);
+  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
+  }
+
 /* Fill in fields that are always returned in the match data. */
 
 match_data->code = re;
-match_data->subject = subject;
+match_data->subject = NULL;  /* Default for no match */
 match_data->mark = NULL;
 match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
 
@@ -3586,7 +3609,7 @@ for (;;)
 #if PCRE2_CODE_UNIT_WIDTH != 8
             if (c > 255) c = 255;
 #endif
-            ok = (start_bits[c/8] & (1 << (c&7))) != 0;
+            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
             }
           }
         if (!ok) break;
@@ -3697,7 +3720,7 @@ for (;;)
 #if PCRE2_CODE_UNIT_WIDTH != 8
           if (c > 255) c = 255;
 #endif
-          if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
           start_match++;
           }
 
@@ -3816,6 +3839,20 @@ for (;;)
     match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
     match_data->startchar = (PCRE2_SIZE)(start_match - subject);
     match_data->rc = rc;
+
+    if (rc >= 0 &&(options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
+      {
+      length = CU2BYTES(length + was_zero_terminated);
+      match_data->subject = match_data->memctl.malloc(length,
+        match_data->memctl.memory_data);
+      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
+      memcpy((void *)match_data->subject, subject, length);
+      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
+      }
+    else
+      {
+      if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
+      }
     goto EXIT;
     }
 

+ 11 - 8
thirdparty/pcre2/src/pcre2_error.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -71,7 +71,7 @@ static const unsigned char compile_error_texts[] =
   /* 5 */
   "number too big in {} quantifier\0"
   "missing terminating ] for character class\0"
-  "invalid escape sequence in character class\0"
+  "escape sequence is invalid in character class\0"
   "range out of order in character class\0"
   "quantifier does not follow a repeatable item\0"
   /* 10 */
@@ -95,7 +95,7 @@ static const unsigned char compile_error_texts[] =
   /* 25 */
   "lookbehind assertion is not fixed length\0"
   "a relative value of zero is not allowed\0"
-  "conditional group contains more than two branches\0"
+  "conditional subpattern contains more than two branches\0"
   "assertion expected after (?( or (?(?C)\0"
   "digit expected after (?+ or (?-\0"
   /* 30 */
@@ -113,21 +113,21 @@ static const unsigned char compile_error_texts[] =
   /* 40 */
   "invalid escape sequence in (*VERB) name\0"
   "unrecognized character after (?P\0"
-  "syntax error in subpattern name (missing terminator)\0"
+  "syntax error in subpattern name (missing terminator?)\0"
   "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0"
-  "group name must start with a non-digit\0"
+  "subpattern name must start with a non-digit\0"
   /* 45 */
   "this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
   "malformed \\P or \\p sequence\0"
   "unknown property name after \\P or \\p\0"
-  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
+  "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
   "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
   /* 50 */
   "invalid range in character class\0"
   "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0"
   "internal error: overran compiling workspace\0"
   "internal error: previously-checked referenced subpattern not found\0"
-  "DEFINE group contains more than one branch\0"
+  "DEFINE subpattern contains more than one branch\0"
   /* 55 */
   "missing opening brace after \\o\0"
   "internal error: unknown newline setting\0"
@@ -137,7 +137,7 @@ static const unsigned char compile_error_texts[] =
   "obsolete error (should not occur)\0"  /* Was the above */
   /* 60 */
   "(*VERB) not recognized or malformed\0"
-  "group number is too big\0"
+  "subpattern number is too big\0"
   "subpattern name expected\0"
   "internal error: parsed pattern overflow\0"
   "non-octal character in \\o{} (closing brace missing?)\0"
@@ -181,6 +181,9 @@ static const unsigned char compile_error_texts[] =
   "invalid option bits with PCRE2_LITERAL\0"
   "\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
   "invalid hyphen in option setting\0"
+  /* 95 */
+  "(*alpha_assertion) not recognized\0"
+  "script runs require Unicode support, which this version of PCRE2 does not have\0"
   ;
 
 /* Match-time and UTF error texts are in the same format. */

+ 2 - 2
thirdparty/pcre2/src/pcre2_extuni.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -100,7 +100,7 @@ while (eptr < end_subject)
   int len = 1;
   if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
   rgb = UCD_GRAPHBREAK(c);
-  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+  if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
 
   /* Not breaking between Regional Indicators is allowed only if there
   are an even number of preceding RIs. */

+ 146 - 111
thirdparty/pcre2/src/pcre2_internal.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -148,16 +148,7 @@ pcre2_match() because of the way it backtracks. */
 /* When checking for integer overflow in pcre2_compile(), we need to handle
 large integers. If a 64-bit integer type is available, we can use that.
 Otherwise we have to cast to double, which of course requires floating point
-arithmetic. Handle this by defining a macro for the appropriate type. If
-stdint.h is available, include it; it may define INT64_MAX. Systems that do not
-have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
-by "configure". */
-
-#if defined HAVE_STDINT_H
-#include <stdint.h>
-#elif defined HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
+arithmetic. Handle this by defining a macro for the appropriate type. */
 
 #if defined INT64_MAX || defined int64_t
 #define INT64_OR_DOUBLE int64_t
@@ -535,6 +526,10 @@ enum { PCRE2_MATCHEDBY_INTERPRETER,     /* pcre2_match() */
        PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
        PCRE2_MATCHEDBY_JIT };           /* pcre2_jit_match() */
 
+/* Values for the flags field in a match data block. */
+
+#define PCRE2_MD_COPIED_SUBJECT  0x01u
+
 /* Magic number to provide a small check against being handed junk. */
 
 #define MAGIC_NUMBER  0x50435245UL   /* 'PCRE' */
@@ -569,11 +564,11 @@ these tables. */
 without checking pcre2_jit_compile.c, which has an assertion to ensure that
 ctype_word has the value 16. */
 
-#define ctype_space   0x01
-#define ctype_letter  0x02
-#define ctype_digit   0x04
-#define ctype_xdigit  0x08    /* not actually used any more */
-#define ctype_word    0x10    /* alphanumeric or '_' */
+#define ctype_space    0x01
+#define ctype_letter   0x02
+#define ctype_lcletter 0x04
+#define ctype_digit    0x08
+#define ctype_word     0x10    /* alphanumeric or '_' */
 
 /* Offsets of the various tables from the base tables pointer, and
 total length of the tables. */
@@ -874,34 +869,48 @@ a positive value. */
 #define STR_RIGHT_CURLY_BRACKET     "}"
 #define STR_TILDE                   "~"
 
-#define STRING_ACCEPT0              "ACCEPT\0"
-#define STRING_COMMIT0              "COMMIT\0"
-#define STRING_F0                   "F\0"
-#define STRING_FAIL0                "FAIL\0"
-#define STRING_MARK0                "MARK\0"
-#define STRING_PRUNE0               "PRUNE\0"
-#define STRING_SKIP0                "SKIP\0"
-#define STRING_THEN                 "THEN"
-
-#define STRING_alpha0               "alpha\0"
-#define STRING_lower0               "lower\0"
-#define STRING_upper0               "upper\0"
-#define STRING_alnum0               "alnum\0"
-#define STRING_ascii0               "ascii\0"
-#define STRING_blank0               "blank\0"
-#define STRING_cntrl0               "cntrl\0"
-#define STRING_digit0               "digit\0"
-#define STRING_graph0               "graph\0"
-#define STRING_print0               "print\0"
-#define STRING_punct0               "punct\0"
-#define STRING_space0               "space\0"
-#define STRING_word0                "word\0"
-#define STRING_xdigit               "xdigit"
-
-#define STRING_DEFINE               "DEFINE"
-#define STRING_VERSION              "VERSION"
-#define STRING_WEIRD_STARTWORD      "[:<:]]"
-#define STRING_WEIRD_ENDWORD        "[:>:]]"
+#define STRING_ACCEPT0               "ACCEPT\0"
+#define STRING_COMMIT0               "COMMIT\0"
+#define STRING_F0                    "F\0"
+#define STRING_FAIL0                 "FAIL\0"
+#define STRING_MARK0                 "MARK\0"
+#define STRING_PRUNE0                "PRUNE\0"
+#define STRING_SKIP0                 "SKIP\0"
+#define STRING_THEN                  "THEN"
+
+#define STRING_atomic0               "atomic\0"
+#define STRING_pla0                  "pla\0"
+#define STRING_plb0                  "plb\0"
+#define STRING_nla0                  "nla\0"
+#define STRING_nlb0                  "nlb\0"
+#define STRING_sr0                   "sr\0"
+#define STRING_asr0                  "asr\0"
+#define STRING_positive_lookahead0   "positive_lookahead\0"
+#define STRING_positive_lookbehind0  "positive_lookbehind\0"
+#define STRING_negative_lookahead0   "negative_lookahead\0"
+#define STRING_negative_lookbehind0  "negative_lookbehind\0"
+#define STRING_script_run0           "script_run\0"
+#define STRING_atomic_script_run     "atomic_script_run"
+
+#define STRING_alpha0                "alpha\0"
+#define STRING_lower0                "lower\0"
+#define STRING_upper0                "upper\0"
+#define STRING_alnum0                "alnum\0"
+#define STRING_ascii0                "ascii\0"
+#define STRING_blank0                "blank\0"
+#define STRING_cntrl0                "cntrl\0"
+#define STRING_digit0                "digit\0"
+#define STRING_graph0                "graph\0"
+#define STRING_print0                "print\0"
+#define STRING_punct0                "punct\0"
+#define STRING_space0                "space\0"
+#define STRING_word0                 "word\0"
+#define STRING_xdigit                "xdigit"
+
+#define STRING_DEFINE                "DEFINE"
+#define STRING_VERSION               "VERSION"
+#define STRING_WEIRD_STARTWORD       "[:<:]]"
+#define STRING_WEIRD_ENDWORD         "[:>:]]"
 
 #define STRING_CR_RIGHTPAR                "CR)"
 #define STRING_LF_RIGHTPAR                "LF)"
@@ -1150,34 +1159,48 @@ only. */
 #define STR_RIGHT_CURLY_BRACKET     "\175"
 #define STR_TILDE                   "\176"
 
-#define STRING_ACCEPT0              STR_A STR_C STR_C STR_E STR_P STR_T "\0"
-#define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
-#define STRING_F0                   STR_F "\0"
-#define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
-#define STRING_MARK0                STR_M STR_A STR_R STR_K "\0"
-#define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
-#define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
-#define STRING_THEN                 STR_T STR_H STR_E STR_N
-
-#define STRING_alpha0               STR_a STR_l STR_p STR_h STR_a "\0"
-#define STRING_lower0               STR_l STR_o STR_w STR_e STR_r "\0"
-#define STRING_upper0               STR_u STR_p STR_p STR_e STR_r "\0"
-#define STRING_alnum0               STR_a STR_l STR_n STR_u STR_m "\0"
-#define STRING_ascii0               STR_a STR_s STR_c STR_i STR_i "\0"
-#define STRING_blank0               STR_b STR_l STR_a STR_n STR_k "\0"
-#define STRING_cntrl0               STR_c STR_n STR_t STR_r STR_l "\0"
-#define STRING_digit0               STR_d STR_i STR_g STR_i STR_t "\0"
-#define STRING_graph0               STR_g STR_r STR_a STR_p STR_h "\0"
-#define STRING_print0               STR_p STR_r STR_i STR_n STR_t "\0"
-#define STRING_punct0               STR_p STR_u STR_n STR_c STR_t "\0"
-#define STRING_space0               STR_s STR_p STR_a STR_c STR_e "\0"
-#define STRING_word0                STR_w STR_o STR_r STR_d       "\0"
-#define STRING_xdigit               STR_x STR_d STR_i STR_g STR_i STR_t
-
-#define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E
-#define STRING_VERSION              STR_V STR_E STR_R STR_S STR_I STR_O STR_N
-#define STRING_WEIRD_STARTWORD      STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
-#define STRING_WEIRD_ENDWORD        STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_ACCEPT0               STR_A STR_C STR_C STR_E STR_P STR_T "\0"
+#define STRING_COMMIT0               STR_C STR_O STR_M STR_M STR_I STR_T "\0"
+#define STRING_F0                    STR_F "\0"
+#define STRING_FAIL0                 STR_F STR_A STR_I STR_L "\0"
+#define STRING_MARK0                 STR_M STR_A STR_R STR_K "\0"
+#define STRING_PRUNE0                STR_P STR_R STR_U STR_N STR_E "\0"
+#define STRING_SKIP0                 STR_S STR_K STR_I STR_P "\0"
+#define STRING_THEN                  STR_T STR_H STR_E STR_N
+
+#define STRING_atomic0               STR_a STR_t STR_o STR_m STR_i STR_c "\0"
+#define STRING_pla0                  STR_p STR_l STR_a "\0"
+#define STRING_plb0                  STR_p STR_l STR_b "\0"
+#define STRING_nla0                  STR_n STR_l STR_a "\0"
+#define STRING_nlb0                  STR_n STR_l STR_b "\0"
+#define STRING_sr0                   STR_s STR_r "\0"
+#define STRING_asr0                  STR_a STR_s STR_r "\0"
+#define STRING_positive_lookahead0   STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_positive_lookbehind0  STR_p STR_o STR_s STR_i STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_negative_lookahead0   STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_a STR_h STR_e STR_a STR_d "\0"
+#define STRING_negative_lookbehind0  STR_n STR_e STR_g STR_a STR_t STR_i STR_v STR_e STR_UNDERSCORE STR_l STR_o STR_o STR_k STR_b STR_e STR_h STR_i STR_n STR_d "\0"
+#define STRING_script_run0           STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n "\0"
+#define STRING_atomic_script_run     STR_a STR_t STR_o STR_m STR_i STR_c STR_UNDERSCORE STR_s STR_c STR_r STR_i STR_p STR_t STR_UNDERSCORE STR_r STR_u STR_n
+
+#define STRING_alpha0                STR_a STR_l STR_p STR_h STR_a "\0"
+#define STRING_lower0                STR_l STR_o STR_w STR_e STR_r "\0"
+#define STRING_upper0                STR_u STR_p STR_p STR_e STR_r "\0"
+#define STRING_alnum0                STR_a STR_l STR_n STR_u STR_m "\0"
+#define STRING_ascii0                STR_a STR_s STR_c STR_i STR_i "\0"
+#define STRING_blank0                STR_b STR_l STR_a STR_n STR_k "\0"
+#define STRING_cntrl0                STR_c STR_n STR_t STR_r STR_l "\0"
+#define STRING_digit0                STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_graph0                STR_g STR_r STR_a STR_p STR_h "\0"
+#define STRING_print0                STR_p STR_r STR_i STR_n STR_t "\0"
+#define STRING_punct0                STR_p STR_u STR_n STR_c STR_t "\0"
+#define STRING_space0                STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_word0                 STR_w STR_o STR_r STR_d       "\0"
+#define STRING_xdigit                STR_x STR_d STR_i STR_g STR_i STR_t
+
+#define STRING_DEFINE                STR_D STR_E STR_F STR_I STR_N STR_E
+#define STRING_VERSION               STR_V STR_E STR_R STR_S STR_I STR_O STR_N
+#define STRING_WEIRD_STARTWORD       STR_LEFT_SQUARE_BRACKET STR_COLON STR_LESS_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
+#define STRING_WEIRD_ENDWORD         STR_LEFT_SQUARE_BRACKET STR_COLON STR_GREATER_THAN_SIGN STR_COLON STR_RIGHT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
 
 #define STRING_CR_RIGHTPAR                STR_C STR_R STR_RIGHT_PARENTHESIS
 #define STRING_LF_RIGHTPAR                STR_L STR_F STR_RIGHT_PARENTHESIS
@@ -1485,70 +1508,71 @@ enum {
   OP_ASSERTBACK,     /* 128 Positive lookbehind */
   OP_ASSERTBACK_NOT, /* 129 Negative lookbehind */
 
-  /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately after the
-  assertions, with ONCE first, as there's a test for >= ONCE for a subpattern
-  that isn't an assertion. The POS versions must immediately follow the non-POS
-  versions in each case. */
+  /* ONCE, SCRIPT_RUN, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come
+  immediately after the assertions, with ONCE first, as there's a test for >=
+  ONCE for a subpattern that isn't an assertion. The POS versions must
+  immediately follow the non-POS versions in each case. */
 
   OP_ONCE,           /* 130 Atomic group, contains captures */
-  OP_BRA,            /* 131 Start of non-capturing bracket */
-  OP_BRAPOS,         /* 132 Ditto, with unlimited, possessive repeat */
-  OP_CBRA,           /* 133 Start of capturing bracket */
-  OP_CBRAPOS,        /* 134 Ditto, with unlimited, possessive repeat */
-  OP_COND,           /* 135 Conditional group */
+  OP_SCRIPT_RUN,     /* 131 Non-capture, but check characters' scripts */
+  OP_BRA,            /* 132 Start of non-capturing bracket */
+  OP_BRAPOS,         /* 133 Ditto, with unlimited, possessive repeat */
+  OP_CBRA,           /* 134 Start of capturing bracket */
+  OP_CBRAPOS,        /* 135 Ditto, with unlimited, possessive repeat */
+  OP_COND,           /* 136 Conditional group */
 
   /* These five must follow the previous five, in the same order. There's a
   check for >= SBRA to distinguish the two sets. */
 
-  OP_SBRA,           /* 136 Start of non-capturing bracket, check empty  */
-  OP_SBRAPOS,        /* 137 Ditto, with unlimited, possessive repeat */
-  OP_SCBRA,          /* 138 Start of capturing bracket, check empty */
-  OP_SCBRAPOS,       /* 139 Ditto, with unlimited, possessive repeat */
-  OP_SCOND,          /* 140 Conditional group, check empty */
+  OP_SBRA,           /* 137 Start of non-capturing bracket, check empty  */
+  OP_SBRAPOS,        /* 138 Ditto, with unlimited, possessive repeat */
+  OP_SCBRA,          /* 139 Start of capturing bracket, check empty */
+  OP_SCBRAPOS,       /* 140 Ditto, with unlimited, possessive repeat */
+  OP_SCOND,          /* 141 Conditional group, check empty */
 
   /* The next two pairs must (respectively) be kept together. */
 
-  OP_CREF,           /* 141 Used to hold a capture number as condition */
-  OP_DNCREF,         /* 142 Used to point to duplicate names as a condition */
-  OP_RREF,           /* 143 Used to hold a recursion number as condition */
-  OP_DNRREF,         /* 144 Used to point to duplicate names as a condition */
-  OP_FALSE,          /* 145 Always false (used by DEFINE and VERSION) */
-  OP_TRUE,           /* 146 Always true (used by VERSION) */
+  OP_CREF,           /* 142 Used to hold a capture number as condition */
+  OP_DNCREF,         /* 143 Used to point to duplicate names as a condition */
+  OP_RREF,           /* 144 Used to hold a recursion number as condition */
+  OP_DNRREF,         /* 145 Used to point to duplicate names as a condition */
+  OP_FALSE,          /* 146 Always false (used by DEFINE and VERSION) */
+  OP_TRUE,           /* 147 Always true (used by VERSION) */
 
-  OP_BRAZERO,        /* 147 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 148 order. */
-  OP_BRAPOSZERO,     /* 149 */
+  OP_BRAZERO,        /* 148 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 149 order. */
+  OP_BRAPOSZERO,     /* 150 */
 
   /* These are backtracking control verbs */
 
-  OP_MARK,           /* 150 always has an argument */
-  OP_PRUNE,          /* 151 */
-  OP_PRUNE_ARG,      /* 152 same, but with argument */
-  OP_SKIP,           /* 153 */
-  OP_SKIP_ARG,       /* 154 same, but with argument */
-  OP_THEN,           /* 155 */
-  OP_THEN_ARG,       /* 156 same, but with argument */
-  OP_COMMIT,         /* 157 */
-  OP_COMMIT_ARG,     /* 158 same, but with argument */
+  OP_MARK,           /* 151 always has an argument */
+  OP_PRUNE,          /* 152 */
+  OP_PRUNE_ARG,      /* 153 same, but with argument */
+  OP_SKIP,           /* 154 */
+  OP_SKIP_ARG,       /* 155 same, but with argument */
+  OP_THEN,           /* 156 */
+  OP_THEN_ARG,       /* 157 same, but with argument */
+  OP_COMMIT,         /* 158 */
+  OP_COMMIT_ARG,     /* 159 same, but with argument */
 
   /* These are forced failure and success verbs. FAIL and ACCEPT do accept an
   argument, but these cases can be compiled as, for example, (*MARK:X)(*FAIL)
   without the need for a special opcode. */
 
-  OP_FAIL,           /* 159 */
-  OP_ACCEPT,         /* 160 */
-  OP_ASSERT_ACCEPT,  /* 161 Used inside assertions */
-  OP_CLOSE,          /* 162 Used before OP_ACCEPT to close open captures */
+  OP_FAIL,           /* 160 */
+  OP_ACCEPT,         /* 161 */
+  OP_ASSERT_ACCEPT,  /* 162 Used inside assertions */
+  OP_CLOSE,          /* 163 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO,       /* 163 */
+  OP_SKIPZERO,       /* 164 */
 
   /* This is used to identify a DEFINE group during compilation so that it can
   be checked for having only one branch. It is changed to OP_FALSE before
   compilation finishes. */
 
-  OP_DEFINE,         /* 164 */
+  OP_DEFINE,         /* 165 */
 
   /* This is not an opcode, but is used to check that tables indexed by opcode
   are the correct length, in order to catch updating errors - there have been
@@ -1596,6 +1620,7 @@ some cases doesn't actually use these names at all). */
   "Alt", "Ket", "KetRmax", "KetRmin", "KetRpos",                  \
   "Reverse", "Assert", "Assert not", "AssertB", "AssertB not",    \
   "Once",                                                         \
+  "Script run",                                                   \
   "Bra", "BraPos", "CBra", "CBraPos",                             \
   "Cond",                                                         \
   "SBra", "SBraPos", "SCBra", "SCBraPos",                         \
@@ -1679,6 +1704,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* Assert behind                          */ \
   1+LINK_SIZE,                   /* Assert behind not                      */ \
   1+LINK_SIZE,                   /* ONCE                                   */ \
+  1+LINK_SIZE,                   /* SCRIPT_RUN                             */ \
   1+LINK_SIZE,                   /* BRA                                    */ \
   1+LINK_SIZE,                   /* BRAPOS                                 */ \
   1+LINK_SIZE+IMM2_SIZE,         /* CBRA                                   */ \
@@ -1747,6 +1773,8 @@ typedef struct {
   uint8_t gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
   uint8_t caseset;    /* offset to multichar other cases or zero */
   int32_t other_case; /* offset to other case, or zero if none */
+  int16_t scriptx;    /* script extension value */
+  int16_t dummy;      /* spare - to round to multiple of 4 bytes */
 } ucd_record;
 
 /* UCD access macros */
@@ -1769,6 +1797,7 @@ typedef struct {
 #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
 #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
 #define UCD_OTHERCASE(ch)   ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
+#define UCD_SCRIPTX(ch)     GET_UCD(ch)->scriptx
 
 /* Header for serialized pcre2 codes. */
 
@@ -1826,6 +1855,8 @@ extern const uint8_t          PRIV(utf8_table4)[];
 #define _pcre2_hspace_list             PCRE2_SUFFIX(_pcre2_hspace_list_)
 #define _pcre2_vspace_list             PCRE2_SUFFIX(_pcre2_vspace_list_)
 #define _pcre2_ucd_caseless_sets       PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
+#define _pcre2_ucd_digit_sets          PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
+#define _pcre2_ucd_script_sets         PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
 #define _pcre2_ucd_records             PCRE2_SUFFIX(_pcre2_ucd_records_)
 #define _pcre2_ucd_stage1              PCRE2_SUFFIX(_pcre2_ucd_stage1_)
 #define _pcre2_ucd_stage2              PCRE2_SUFFIX(_pcre2_ucd_stage2_)
@@ -1847,6 +1878,8 @@ extern const uint8_t                   PRIV(default_tables)[];
 extern const uint32_t                  PRIV(hspace_list)[];
 extern const uint32_t                  PRIV(vspace_list)[];
 extern const uint32_t                  PRIV(ucd_caseless_sets)[];
+extern const uint32_t                  PRIV(ucd_digit_sets)[];
+extern const uint8_t                   PRIV(ucd_script_sets)[];
 extern const ucd_record                PRIV(ucd_records)[];
 #if PCRE2_CODE_UNIT_WIDTH == 32
 extern const ucd_record                PRIV(dummy_ucd_record)[];
@@ -1894,6 +1927,7 @@ is available. */
 #define _pcre2_jit_get_target        PCRE2_SUFFIX(_pcre2_jit_get_target_)
 #define _pcre2_memctl_malloc         PCRE2_SUFFIX(_pcre2_memctl_malloc_)
 #define _pcre2_ord2utf               PCRE2_SUFFIX(_pcre2_ord2utf_)
+#define _pcre2_script_run            PCRE2_SUFFIX(_pcre2_script_run_)
 #define _pcre2_strcmp                PCRE2_SUFFIX(_pcre2_strcmp_)
 #define _pcre2_strcmp_c8             PCRE2_SUFFIX(_pcre2_strcmp_c8_)
 #define _pcre2_strcpy_c8             PCRE2_SUFFIX(_pcre2_strcpy_c8_)
@@ -1908,7 +1942,7 @@ is available. */
 extern int          _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
                       const compile_block *);
 extern int          _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
-                      int *, uint32_t, BOOL, compile_block *);
+                      int *, uint32_t, uint32_t, BOOL, compile_block *);
 extern PCRE2_SPTR   _pcre2_extuni(uint32_t, PCRE2_SPTR, PCRE2_SPTR, PCRE2_SPTR,
                       BOOL, int *);
 extern PCRE2_SPTR   _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
@@ -1920,6 +1954,7 @@ extern size_t       _pcre2_jit_get_size(void *);
 const char *        _pcre2_jit_get_target(void);
 extern void *       _pcre2_memctl_malloc(size_t, pcre2_memctl *);
 extern unsigned int _pcre2_ord2utf(uint32_t, PCRE2_UCHAR *);
+extern BOOL         _pcre2_script_run(PCRE2_SPTR, PCRE2_SPTR, BOOL);
 extern int          _pcre2_strcmp(PCRE2_SPTR, PCRE2_SPTR);
 extern int          _pcre2_strcmp_c8(PCRE2_SPTR, const char *);
 extern PCRE2_SIZE   _pcre2_strcpy_c8(PCRE2_UCHAR *, const char *);

+ 4 - 1
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -585,6 +585,8 @@ typedef struct pcre2_real_match_context {
 #endif
   int    (*callout)(pcre2_callout_block *, void *);
   void    *callout_data;
+  int    (*substitute_callout)(pcre2_substitute_callout_block *, void *);
+  void    *substitute_callout_data;
   PCRE2_SIZE offset_limit;
   uint32_t heap_limit;
   uint32_t match_limit;
@@ -656,7 +658,8 @@ typedef struct pcre2_real_match_data {
   PCRE2_SIZE       leftchar;      /* Offset to leftmost code unit */
   PCRE2_SIZE       rightchar;     /* Offset to rightmost code unit */
   PCRE2_SIZE       startchar;     /* Offset to starting code unit */
-  uint16_t         matchedby;     /* Type of match (normal, JIT, DFA) */
+  uint8_t          matchedby;     /* Type of match (normal, JIT, DFA) */
+  uint8_t          flags;         /* Various flags */
   uint16_t         oveccount;     /* Number of pairs */
   int              rc;            /* The return code from the match */
   PCRE2_SIZE       ovector[131072]; /* Must be last in the structure */

ファイルの差分が大きいため隠しています
+ 1265 - 199
thirdparty/pcre2/src/pcre2_jit_compile.c


+ 2 - 4
thirdparty/pcre2/src/pcre2_jit_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -152,8 +152,6 @@ else
   jit_stack = NULL;
   }
 
-/* JIT only need two offsets for each ovector entry. Hence
-   the last 1/3 of the ovector will never be touched. */
 
 max_oveccount = functions->top_bracket;
 if (oveccount > max_oveccount)
@@ -173,7 +171,7 @@ else
 if (rc > (int)oveccount)
   rc = 0;
 match_data->code = re;
-match_data->subject = subject;
+match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
 match_data->rc = rc;
 match_data->startchar = arguments.startchar_ptr - subject;
 match_data->leftchar = 0;

+ 13 - 13
thirdparty/pcre2/src/pcre2_maketables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -114,17 +114,17 @@ test for alnum specially. */
 memset(p, 0, cbit_length);
 for (i = 0; i < 256; i++)
   {
-  if (isdigit(i)) p[cbit_digit  + i/8] |= 1 << (i&7);
-  if (isupper(i)) p[cbit_upper  + i/8] |= 1 << (i&7);
-  if (islower(i)) p[cbit_lower  + i/8] |= 1 << (i&7);
-  if (isalnum(i)) p[cbit_word   + i/8] |= 1 << (i&7);
-  if (i == '_')   p[cbit_word   + i/8] |= 1 << (i&7);
-  if (isspace(i)) p[cbit_space  + i/8] |= 1 << (i&7);
-  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
-  if (isgraph(i)) p[cbit_graph  + i/8] |= 1 << (i&7);
-  if (isprint(i)) p[cbit_print  + i/8] |= 1 << (i&7);
-  if (ispunct(i)) p[cbit_punct  + i/8] |= 1 << (i&7);
-  if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1 << (i&7);
+  if (isdigit(i)) p[cbit_digit  + i/8] |= 1u << (i&7);
+  if (isupper(i)) p[cbit_upper  + i/8] |= 1u << (i&7);
+  if (islower(i)) p[cbit_lower  + i/8] |= 1u << (i&7);
+  if (isalnum(i)) p[cbit_word   + i/8] |= 1u << (i&7);
+  if (i == '_')   p[cbit_word   + i/8] |= 1u << (i&7);
+  if (isspace(i)) p[cbit_space  + i/8] |= 1u << (i&7);
+  if (isxdigit(i))p[cbit_xdigit + i/8] |= 1u << (i&7);
+  if (isgraph(i)) p[cbit_graph  + i/8] |= 1u << (i&7);
+  if (isprint(i)) p[cbit_print  + i/8] |= 1u << (i&7);
+  if (ispunct(i)) p[cbit_punct  + i/8] |= 1u << (i&7);
+  if (iscntrl(i)) p[cbit_cntrl  + i/8] |= 1u << (i&7);
   }
 p += cbit_length;
 
@@ -138,8 +138,8 @@ for (i = 0; i < 256; i++)
   int x = 0;
   if (isspace(i)) x += ctype_space;
   if (isalpha(i)) x += ctype_letter;
+  if (islower(i)) x += ctype_lcletter;
   if (isdigit(i)) x += ctype_digit;
-  if (isxdigit(i)) x += ctype_xdigit;
   if (isalnum(i) || i == '_') x += ctype_word;
   *p++ = x;
   }

+ 70 - 18
thirdparty/pcre2/src/pcre2_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2015-2018 University of Cambridge
+          New API code Copyright (c) 2015-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -69,11 +69,12 @@ information, and fields within it. */
 #define PUBLIC_MATCH_OPTIONS \
   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
    PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
-   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
+   PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
 
 #define PUBLIC_JIT_MATCH_OPTIONS \
    (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
-    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
+    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD|\
+    PCRE2_COPY_MATCHED_SUBJECT)
 
 /* Non-error returns from and within the match() function. Error returns are
 externally defined PCRE2_ERROR_xxx codes, which are all negative. */
@@ -1848,7 +1849,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
             }
           else
-            if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
         }
       else
@@ -1870,7 +1871,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             }
           else
 #endif
-          if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+          if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
         }
 
@@ -1902,7 +1903,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               if (Fop == OP_CLASS) RRETURN(MATCH_NOMATCH);
               }
             else
-              if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
           }
         else
@@ -1927,7 +1928,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               }
             else
 #endif
-            if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
+            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
           }
         /* Control never gets here */
@@ -1956,7 +1957,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               if (Fop == OP_CLASS) break;
               }
             else
-              if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
+              if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
             Feptr += len;
             }
 
@@ -1993,7 +1994,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               }
             else
 #endif
-            if ((Lbyte_map[fc/8] & (1 << (fc&7))) == 0) break;
+            if ((Lbyte_map[fc/8] & (1u << (fc&7))) == 0) break;
             Feptr++;
             }
 
@@ -4084,7 +4085,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               GETCHAR(fc, fptr);
               }
             lgb = UCD_GRAPHBREAK(fc);
-            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
+            if ((PRIV(ucp_gbtable)[lgb] & (1u << rgb)) == 0) break;
             Feptr = fptr;
             rgb = lgb;
             }
@@ -5014,6 +5015,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     must record a backtracking point and also set up a chained frame. */
 
     case OP_ONCE:
+    case OP_SCRIPT_RUN:
     case OP_SBRA:
     Lframe_type = GF_NOCAPTURE | Fop;
 
@@ -5526,6 +5528,14 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       case OP_ASSERTBACK_NOT:
       RRETURN(MATCH_MATCH);
 
+      /* At the end of a script run, apply the script-checking rules. This code
+      will never by exercised if Unicode support it not compiled, because in
+      that environment script runs cause an error at compile time. */
+
+      case OP_SCRIPT_RUN:
+      if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
+      break;
+
       /* Whole-pattern recursion is coded as a recurse into group 0, so it
       won't be picked up here. Instead, we catch it when the OP_END is reached.
       Other recursion is handled here. */
@@ -6000,10 +6010,11 @@ pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
   pcre2_match_context *mcontext)
 {
 int rc;
+int was_zero_terminated = 0;
 const uint8_t *start_bits = NULL;
-
 const pcre2_real_code *re = (const pcre2_real_code *)code;
 
+
 BOOL anchored;
 BOOL firstline;
 BOOL has_first_cu = FALSE;
@@ -6043,7 +6054,11 @@ mb->stack_frames = (heapframe *)stack_frames_vector;
 /* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
 subject string. */
 
-if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
+if (length == PCRE2_ZERO_TERMINATED)
+  {
+  length = PRIV(strlen)(subject);
+  was_zero_terminated = 1;
+  }
 end_subject = subject + length;
 
 /* Plausibility checks */
@@ -6158,6 +6173,17 @@ if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET &&
      (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
   return PCRE2_ERROR_BADOFFSETLIMIT;
 
+/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
+free the memory that was obtained. Set the field to NULL for no match cases. */
+
+if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
+  {
+  match_data->memctl.free((void *)match_data->subject,
+    match_data->memctl.memory_data);
+  match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
+  }
+match_data->subject = NULL;
+
 /* If the pattern was successfully studied with JIT support, run the JIT
 executable instead of the rest of this function. Most options must be set at
 compile time for the JIT code to be usable. Fallback to the normal code path if
@@ -6169,7 +6195,19 @@ if (re->executable_jit != NULL && (options & ~PUBLIC_JIT_MATCH_OPTIONS) == 0)
   {
   rc = pcre2_jit_match(code, subject, length, start_offset, options,
     match_data, mcontext);
-  if (rc != PCRE2_ERROR_JIT_BADOPTION) return rc;
+  if (rc != PCRE2_ERROR_JIT_BADOPTION)
+    {
+    if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
+      {
+      length = CU2BYTES(length + was_zero_terminated);
+      match_data->subject = match_data->memctl.malloc(length,
+        match_data->memctl.memory_data);
+      if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
+      memcpy((void *)match_data->subject, subject, length);
+      match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
+      }
+    return rc;
+    }
   }
 #endif
 
@@ -6421,7 +6459,7 @@ for(;;)
 #if PCRE2_CODE_UNIT_WIDTH != 8
             if (c > 255) c = 255;
 #endif
-            ok = (start_bits[c/8] & (1 << (c&7))) != 0;
+            ok = (start_bits[c/8] & (1u << (c&7))) != 0;
             }
           }
         if (!ok)
@@ -6538,7 +6576,7 @@ for(;;)
 #if PCRE2_CODE_UNIT_WIDTH != 8
           if (c > 255) c = 255;
 #endif
-          if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
+          if ((start_bits[c/8] & (1u << (c&7))) != 0) break;
           start_match++;
           }
 
@@ -6809,13 +6847,13 @@ if (mb->match_frames != mb->stack_frames)
 /* Fill in fields that are always returned in the match data. */
 
 match_data->code = re;
-match_data->subject = subject;
 match_data->mark = mb->mark;
 match_data->matchedby = PCRE2_MATCHEDBY_INTERPRETER;
 
 /* Handle a fully successful match. Set the return code to the number of
 captured strings, or 0 if there were too many to fit into the ovector, and then
-set the remaining returned values before returning. */
+set the remaining returned values before returning. Make a copy of the subject
+string if requested. */
 
 if (rc == MATCH_MATCH)
   {
@@ -6825,6 +6863,16 @@ if (rc == MATCH_MATCH)
   match_data->leftchar = mb->start_used_ptr - subject;
   match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
     mb->last_used_ptr : mb->end_match_ptr) - subject;
+  if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
+    {
+    length = CU2BYTES(length + was_zero_terminated);
+    match_data->subject = match_data->memctl.malloc(length,
+      match_data->memctl.memory_data);
+    if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
+    memcpy((void *)match_data->subject, subject, length);
+    match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
+    }
+  else match_data->subject = subject;
   return match_data->rc;
   }
 
@@ -6838,10 +6886,14 @@ match_data->mark = mb->nomatch_mark;
 
 if (rc != MATCH_NOMATCH && rc != PCRE2_ERROR_PARTIAL) match_data->rc = rc;
 
-/* Handle a partial match. */
+/* Handle a partial match. If a "soft" partial match was requested, searching
+for a complete match will have continued, and the value of rc at this point
+will be MATCH_NOMATCH. For a "hard" partial match, it will already be
+PCRE2_ERROR_PARTIAL. */
 
 else if (match_partial != NULL)
   {
+  match_data->subject = subject;
   match_data->ovector[0] = match_partial - subject;
   match_data->ovector[1] = end_subject - subject;
   match_data->startchar = match_partial - subject;

+ 7 - 1
thirdparty/pcre2/src/pcre2_match_data.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2017 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -63,6 +63,7 @@ yield = PRIV(memctl_malloc)(
   (pcre2_memctl *)gcontext);
 if (yield == NULL) return NULL;
 yield->oveccount = oveccount;
+yield->flags = 0;
 return yield;
 }
 
@@ -93,7 +94,12 @@ PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
 pcre2_match_data_free(pcre2_match_data *match_data)
 {
 if (match_data != NULL)
+  {
+  if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
+    match_data->memctl.free((void *)match_data->subject,
+      match_data->memctl.memory_data);
   match_data->memctl.free(match_data, match_data->memctl.memory_data);
+  }
 }
 
 

+ 441 - 0
thirdparty/pcre2/src/pcre2_script_run.c

@@ -0,0 +1,441 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2018 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+/* This module contains the function for checking a script run. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "pcre2_internal.h"
+
+
+/*************************************************
+*                Check script run                *
+*************************************************/
+
+/* A script run is conceptually a sequence of characters all in the same
+Unicode script. However, it isn't quite that simple. There are special rules
+for scripts that are commonly used together, and also special rules for digits.
+This function implements the appropriate checks, which is possible only when
+PCRE2 is compiled with Unicode support. The function returns TRUE if there is
+no Unicode support; however, it should never be called in that circumstance
+because an error is given by pcre2_compile() if a script run is called for in a
+version of PCRE2 compiled without Unicode support.
+
+Arguments:
+  pgr       point to the first character
+  endptr    point after the last character
+  utf       TRUE if in UTF mode
+
+Returns:    TRUE if this is a valid script run
+*/
+
+/* These dummy values must be less than the negation of the largest offset in
+the PRIV(ucd_script_sets) vector, which is held in a 16-bit field in UCD
+records (and is only likely to be a few hundred). */
+
+#define SCRIPT_UNSET        (-99999)
+#define SCRIPT_HANPENDING   (-99998)
+#define SCRIPT_HANHIRAKATA  (-99997)
+#define SCRIPT_HANBOPOMOFO  (-99996)
+#define SCRIPT_HANHANGUL    (-99995)
+#define SCRIPT_LIST         (-99994)
+
+#define INTERSECTION_LIST_SIZE 50
+
+BOOL
+PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
+{
+#ifdef SUPPORT_UNICODE
+int require_script = SCRIPT_UNSET;
+uint8_t intersection_list[INTERSECTION_LIST_SIZE];
+const uint8_t *require_list = NULL;
+uint32_t require_digitset = 0;
+uint32_t c;
+
+#if PCRE2_CODE_UNIT_WIDTH == 32
+(void)utf;    /* Avoid compiler warning */
+#endif
+
+/* Any string containing fewer than 2 characters is a valid script run. */
+
+if (ptr >= endptr) return TRUE;
+GETCHARINCTEST(c, ptr);
+if (ptr >= endptr) return TRUE;
+
+/* Scan strings of two or more characters, checking the Unicode characteristics
+of each code point. We make use of the Script Extensions property. There is
+special code for scripts that can be combined with characters from the Han
+Chinese script. This may be used in conjunction with four other scripts in
+these combinations:
+
+. Han with Hiragana and Katakana is allowed (for Japanese).
+. Han with Bopomofo is allowed (for Taiwanese Mandarin).
+. Han with Hangul is allowed (for Korean).
+
+If the first significant character's script is one of the four, the required
+script type is immediately known. However, if the first significant
+character's script is Han, we have to keep checking for a non-Han character.
+Hence the SCRIPT_HANPENDING state. */
+
+for (;;)
+  {
+  const ucd_record *ucd = GET_UCD(c);
+  int32_t scriptx = ucd->scriptx;
+
+  /* If the script extension is Unknown, the string is not a valid script run.
+  Such characters can only form script runs of length one. */
+
+  if (scriptx == ucp_Unknown) return FALSE;
+
+  /* A character whose script extension is Inherited is always accepted with
+  any script, and plays no further part in this testing. A character whose
+  script is Common is always accepted, but must still be tested for a digit
+  below. The scriptx value at this point is non-zero, because zero is
+  ucp_Unknown, tested for above. */
+
+  if (scriptx != ucp_Inherited)
+    {
+    if (scriptx != ucp_Common)
+      {
+      /* If the script extension value is positive, the character is not a mark
+      that can be used with many scripts. In the simple case we either set or
+      compare with the required script. However, handling the scripts that can
+      combine with Han are more complicated, as is the case when the previous
+      characters have been man-script marks. */
+
+      if (scriptx > 0)
+        {
+        switch(require_script)
+          {
+          /* Either the first significant character (require_script unset) or
+          after only Han characters. */
+
+          case SCRIPT_UNSET:
+          case SCRIPT_HANPENDING:
+          switch(scriptx)
+            {
+            case ucp_Han:
+            require_script = SCRIPT_HANPENDING;
+            break;
+
+            case ucp_Hiragana:
+            case ucp_Katakana:
+            require_script = SCRIPT_HANHIRAKATA;
+            break;
+
+            case ucp_Bopomofo:
+            require_script = SCRIPT_HANBOPOMOFO;
+            break;
+
+            case ucp_Hangul:
+            require_script = SCRIPT_HANHANGUL;
+            break;
+
+            /* Not a Han-related script. If expecting one, fail. Otherise set
+            the requirement to this script. */
+
+            default:
+            if (require_script == SCRIPT_HANPENDING) return FALSE;
+            require_script = scriptx;
+            break;
+            }
+          break;
+
+          /* Previously encountered one of the "with Han" scripts. Check that
+          this character is appropriate. */
+
+          case SCRIPT_HANHIRAKATA:
+          if (scriptx != ucp_Han && scriptx != ucp_Hiragana && 
+              scriptx != ucp_Katakana)
+            return FALSE;
+          break;
+
+          case SCRIPT_HANBOPOMOFO:
+          if (scriptx != ucp_Han && scriptx != ucp_Bopomofo) return FALSE;
+          break;
+
+          case SCRIPT_HANHANGUL:
+          if (scriptx != ucp_Han && scriptx != ucp_Hangul) return FALSE;
+          break;
+
+          /* We have a list of scripts to check that is derived from one or
+          more previous characters. This is either one of the lists in
+          ucd_script_sets[] (for one previous character) or the intersection of
+          several lists for multiple characters. */
+
+          case SCRIPT_LIST:
+            {
+            const uint8_t *list;
+            for (list = require_list; *list != 0; list++)
+              {
+              if (*list == scriptx) break;
+              }
+            if (*list == 0) return FALSE;
+            }
+
+          /* The rest of the string must be in this script, but we have to 
+          allow for the Han complications. */
+          
+          switch(scriptx)
+            {
+            case ucp_Han:
+            require_script = SCRIPT_HANPENDING;
+            break;
+
+            case ucp_Hiragana:
+            case ucp_Katakana:
+            require_script = SCRIPT_HANHIRAKATA;
+            break;
+
+            case ucp_Bopomofo:
+            require_script = SCRIPT_HANBOPOMOFO;
+            break;
+
+            case ucp_Hangul:
+            require_script = SCRIPT_HANHANGUL;
+            break;
+
+            default:
+            require_script = scriptx;
+            break;
+            }  
+          break;
+
+          /* This is the easy case when a single script is required. */
+
+          default:
+          if (scriptx != require_script) return FALSE;
+          break;
+          }
+        }  /* End of handing positive scriptx */
+
+      /* If scriptx is negative, this character is a mark-type character that
+      has a list of permitted scripts. */
+
+      else
+        {
+        uint32_t chspecial;
+        const uint8_t *clist, *rlist;
+        const uint8_t *list = PRIV(ucd_script_sets) - scriptx;
+        
+        switch(require_script)
+          {
+          case SCRIPT_UNSET:
+          require_list = PRIV(ucd_script_sets) - scriptx;
+          require_script = SCRIPT_LIST;
+          break;
+
+          /* An inspection of the Unicode 11.0.0 files shows that there are the
+          following types of Script Extension list that involve the Han,
+          Bopomofo, Hiragana, Katakana, and Hangul scripts:
+
+          . Bopomofo + Han
+          . Han + Hiragana + Katakana
+          . Hiragana + Katakana
+          . Bopopmofo + Hangul + Han + Hiragana + Katakana
+
+          The following code tries to make sense of this. */
+
+#define FOUND_BOPOMOFO 1
+#define FOUND_HIRAGANA 2
+#define FOUND_KATAKANA 4
+#define FOUND_HANGUL   8
+
+          case SCRIPT_HANPENDING:
+          chspecial = 0;
+          for (; *list != 0; list++)
+            {
+            switch (*list)
+              {
+              case ucp_Bopomofo: chspecial |= FOUND_BOPOMOFO; break;
+              case ucp_Hiragana: chspecial |= FOUND_HIRAGANA; break;
+              case ucp_Katakana: chspecial |= FOUND_KATAKANA; break;
+              case ucp_Hangul:   chspecial |= FOUND_HANGUL; break;
+              default: break;
+              }
+            }
+
+           if (chspecial == 0) return FALSE;
+
+           if (chspecial == FOUND_BOPOMOFO)
+             {
+             require_script = SCRIPT_HANBOPOMOFO;
+             }
+           else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
+             {
+             require_script = SCRIPT_HANHIRAKATA;
+             }
+
+          /* Otherwise it must be allowed with all of them, so remain in
+          the pending state. */
+
+          break;
+
+          case SCRIPT_HANHIRAKATA:
+          for (; *list != 0; list++)
+            {
+            if (*list == ucp_Hiragana || *list == ucp_Katakana) break;
+            }
+          if (*list == 0) return FALSE;
+          break;
+
+          case SCRIPT_HANBOPOMOFO:
+          for (; *list != 0; list++)
+            {
+            if (*list == ucp_Bopomofo) break;
+            }
+          if (*list == 0) return FALSE;
+          break;
+
+          case SCRIPT_HANHANGUL:
+          for (; *list != 0; list++)
+            {
+            if (*list == ucp_Hangul) break;
+            }
+          if (*list == 0) return FALSE;
+          break;
+
+          /* Previously encountered one or more characters that are allowed
+          with a list of scripts. Build the intersection of the required list
+          with this character's list in intersection_list[]. This code is
+          written so that it still works OK if the required list is already in
+          that vector. */
+
+          case SCRIPT_LIST:
+            {
+            int i = 0;
+            for (rlist = require_list; *rlist != 0; rlist++)
+              {
+              for (clist = list; *clist != 0; clist++)
+                {
+                if (*rlist == *clist)
+                  {
+                  intersection_list[i++] = *rlist;
+                  break;
+                  }
+                }
+              }
+            if (i == 0) return FALSE;  /* No scripts in common */
+
+            /* If there's just one script in common, we can set it as the
+            unique required script. Otherwise, terminate the intersection list
+            and make it the required list. */
+
+            if (i == 1)
+              {
+              require_script = intersection_list[0];
+              }
+            else
+              {
+              intersection_list[i] = 0;
+              require_list = intersection_list;
+              }
+            }
+          break;
+
+          /* The previously set required script is a single script, not
+          Han-related. Check that it is in this character's list. */
+
+          default:
+          for (; *list != 0; list++)
+            {
+            if (*list == require_script) break;
+            }
+          if (*list == 0) return FALSE;
+          break;
+          }
+        }  /* End of handling negative scriptx */
+      }    /* End of checking non-Common character */
+
+    /* The character is in an acceptable script. We must now ensure that all
+    decimal digits in the string come from the same set. Some scripts (e.g.
+    Common, Arabic) have more than one set of decimal digits. This code does
+    not allow mixing sets, even within the same script. The vector called
+    PRIV(ucd_digit_sets)[] contains, in its first element, the number of
+    following elements, and then, in ascending order, the code points of the
+    '9' characters in every set of 10 digits. Each set is identified by the
+    offset in the vector of its '9' character. An initial check of the first
+    value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
+
+    if (ucd->chartype == ucp_Nd)
+      {
+      uint32_t digitset;
+
+      if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
+        {
+        int mid;
+        int bot = 1;
+        int top = PRIV(ucd_digit_sets)[0];
+        for (;;)
+          {
+          if (top <= bot + 1)    /* <= rather than == is paranoia */
+            {
+            digitset = top;
+            break;
+            }
+          mid = (top + bot) / 2;
+          if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
+          }
+        }
+
+      /* A required value of 0 means "unset". */
+
+      if (require_digitset == 0) require_digitset = digitset;
+        else if (digitset != require_digitset) return FALSE;
+      }   /* End digit handling */
+    }     /* End checking non-Inherited character */
+
+  /* If we haven't yet got to the end, pick up the next character. */
+
+  if (ptr >= endptr) return TRUE;
+  GETCHARINCTEST(c, ptr);
+  }  /* End checking loop */
+
+#else   /* NOT SUPPORT_UNICODE */
+(void)ptr;
+(void)endptr;
+(void)utf;
+return TRUE;
+#endif  /* SUPPORT_UNICODE */
+}
+
+/* End of pcre2_script_run.c */

+ 9 - 7
thirdparty/pcre2/src/pcre2_study.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -54,7 +54,7 @@ collecting data (e.g. minimum matching length). */
 
 /* Set a bit in the starting code unit bit map. */
 
-#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1 << ((c)&7))
+#define SET_BIT(c) re->start_bitmap[(c)/8] |= (1u << ((c)&7))
 
 /* Returns from set_start_bits() */
 
@@ -171,6 +171,7 @@ for (;;)
     /* Fall through */
 
     case OP_ONCE:
+    case OP_SCRIPT_RUN:
     case OP_SBRA:
     case OP_BRAPOS:
     case OP_SBRAPOS:
@@ -842,7 +843,7 @@ for (c = 0; c < table_limit; c++)
 if (table_limit == 32) return;
 for (c = 128; c < 256; c++)
   {
-  if ((re->tables[cbits_offset + c/8] & (1 << (c&7))) != 0)
+  if ((re->tables[cbits_offset + c/8] & (1u << (c&7))) != 0)
     {
     PCRE2_UCHAR buff[6];
     (void)PRIV(ord2utf)(c, buff);
@@ -1075,6 +1076,7 @@ do
       case OP_CBRAPOS:
       case OP_SCBRAPOS:
       case OP_ONCE:
+      case OP_SCRIPT_RUN:
       case OP_ASSERT:
       rc = set_start_bits(re, tcode, utf);
       if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
@@ -1505,11 +1507,11 @@ do
           for (c = 0; c < 16; c++) re->start_bitmap[c] |= classmap[c];
           for (c = 128; c < 256; c++)
             {
-            if ((classmap[c/8] & (1 << (c&7))) != 0)
+            if ((classmap[c/8] & (1u << (c&7))) != 0)
               {
-              int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
-              re->start_bitmap[d/8] |= (1 << (d&7));  /* and then skip on to the */
-              c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
+              int d = (c >> 6) | 0xc0;                 /* Set bit for this starter */
+              re->start_bitmap[d/8] |= (1u << (d&7));  /* and then skip on to the */
+              c = (c & 0xc0) + 0x40 - 1;               /* next relevant character. */
               }
             }
           }

+ 54 - 13
thirdparty/pcre2/src/pcre2_substitute.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -129,7 +129,7 @@ for (; ptr < ptrend; ptr++)
 
     ptr += 1;  /* Must point after \ */
     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
-      code->overall_options, FALSE, NULL);
+      code->overall_options, code->extra_options, FALSE, NULL);
     ptr -= 1;  /* Back to last code unit of escape */
     if (errorcode != 0)
       {
@@ -239,13 +239,17 @@ PCRE2_SIZE extra_needed = 0;
 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
 PCRE2_SIZE *ovector;
 PCRE2_SIZE ovecsave[3];
+pcre2_substitute_callout_block scb;
+
+/* General initialization */
 
 buff_offset = 0;
 lengthleft = buff_length = *blength;
 *blength = PCRE2_UNSET;
 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
 
-/* Partial matching is not valid. */
+/* Partial matching is not valid. This must come after setting *blength to 
+PCRE2_UNSET, so as not to imply an offset in the replacement. */
 
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
   return PCRE2_ERROR_BADOPTION;
@@ -264,6 +268,13 @@ if (match_data == NULL)
 ovector = pcre2_get_ovector_pointer(match_data);
 ovector_count = pcre2_get_ovector_count(match_data);
 
+/* Fixed things in the callout block */
+
+scb.version = 0;
+scb.input = subject;
+scb.output = (PCRE2_SPTR)buffer;
+scb.ovector = ovector;
+
 /* Find lengths of zero-terminated strings and the end of the replacement. */
 
 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
@@ -390,7 +401,7 @@ do
     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
     goto EXIT;   
     }   
-
+    
   /* Count substitutions with a paranoid check for integer overflow; surely no
   real call to this function would ever hit this! */
 
@@ -401,11 +412,14 @@ do
     }
   subs++;
 
-  /* Copy the text leading up to the match. */
+  /* Copy the text leading up to the match, and remember where the insert
+  begins and how many ovector pairs are set. */
 
   if (rc == 0) rc = ovector_count;
   fraglength = ovector[0] - start_offset;
   CHECKMEMCPY(subject + start_offset, fraglength);
+  scb.output_offsets[0] = buff_offset;
+  scb.oveccount = rc;
 
   /* Process the replacement string. Literal mode is set by \Q, but only in
   extended mode when backslashes are being interpreted. In extended mode we
@@ -421,7 +435,7 @@ do
 
     if (ptr >= repend)
       {
-      if (ptrstackptr <= 0) break;       /* End of replacement string */
+      if (ptrstackptr == 0) break;       /* End of replacement string */
       repend = ptrstack[--ptrstackptr];
       ptr = ptrstack[--ptrstackptr];
       continue;
@@ -702,7 +716,7 @@ do
               {
               if (((code->tables + cbits_offset +
                   ((forcecase > 0)? cbit_upper:cbit_lower)
-                  )[ch/8] & (1 << (ch%8))) == 0)
+                  )[ch/8] & (1u << (ch%8))) == 0)
                 ch = (code->tables + fcc_offset)[ch];
               }
             forcecase = forcecasereset;
@@ -760,7 +774,7 @@ do
 
       ptr++;  /* Point after \ */
       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
-        code->overall_options, FALSE, NULL);
+        code->overall_options, code->extra_options, FALSE, NULL);
       if (errorcode != 0) goto BADESCAPE;
 
       switch(rc)
@@ -804,7 +818,7 @@ do
           {
           if (((code->tables + cbits_offset +
               ((forcecase > 0)? cbit_upper:cbit_lower)
-              )[ch/8] & (1 << (ch%8))) == 0)
+              )[ch/8] & (1u << (ch%8))) == 0)
             ch = (code->tables + fcc_offset)[ch];
           }
         forcecase = forcecasereset;
@@ -821,10 +835,37 @@ do
       } /* End handling a literal code unit */
     }   /* End of loop for scanning the replacement. */
 
-  /* The replacement has been copied to the output. Save the details of this
-  match. See above for how this data is used. If we matched an empty string, do
-  the magic for global matches. Finally, update the start offset to point to
-  the rest of the subject string. */
+  /* The replacement has been copied to the output, or its size has been 
+  remembered. Do the callout if there is one and we have done an actual 
+  replacement. */
+  
+  if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
+    {
+    scb.subscount = subs;  
+    scb.output_offsets[1] = buff_offset;
+    rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data); 
+
+    /* A non-zero return means cancel this substitution. Instead, copy the 
+    matched string fragment. */
+
+    if (rc != 0)
+      {
+      PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
+      PCRE2_SIZE oldlength = ovector[1] - ovector[0];
+      
+      buff_offset -= newlength;
+      lengthleft += newlength;
+      CHECKMEMCPY(subject + ovector[0], oldlength);    
+      
+      /* A negative return means do not do any more. */
+      
+      if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
+      }
+    }   
+ 
+  /* Save the details of this match. See above for how this data is used. If we
+  matched an empty string, do the magic for global matches. Finally, update the
+  start offset to point to the rest of the subject string. */
   
   ovecsave[0] = ovector[0];                                
   ovecsave[1] = ovector[1];                                        

+ 37 - 34
thirdparty/pcre2/src/pcre2_tables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -142,7 +142,7 @@ ucp_gbXX values defined in pcre2_ucp.h. These changed between Unicode versions
 code points. The left property selects a word from the table, and the right
 property selects a bit from that word like this:
 
-  PRIV(ucp_gbtable)[left-property] & (1 << right-property)
+  PRIV(ucp_gbtable)[left-property] & (1u << right-property)
 
 The value is non-zero if a grapheme break is NOT permitted between the relevant
 two code points. The breaking rules are as follows:
@@ -183,25 +183,25 @@ are implementing).
 #define ESZ (1<<ucp_gbExtend)|(1<<ucp_gbSpacingMark)|(1<<ucp_gbZWJ)
 
 const uint32_t PRIV(ucp_gbtable)[] = {
-   (1<<ucp_gbLF),                                      /*  0 CR */
-   0,                                                  /*  1 LF */
-   0,                                                  /*  2 Control */
-   ESZ,                                                /*  3 Extend */
-   ESZ|(1<<ucp_gbPrepend)|                             /*  4 Prepend */
-       (1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbT)|
-       (1<<ucp_gbLV)|(1<<ucp_gbLVT)|(1<<ucp_gbOther)|
-       (1<<ucp_gbRegionalIndicator),
-   ESZ,                                                /*  5 SpacingMark */
-   ESZ|(1<<ucp_gbL)|(1<<ucp_gbV)|(1<<ucp_gbLV)|        /*  6 L */
-       (1<<ucp_gbLVT),
-   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                      /*  7 V */
-   ESZ|(1<<ucp_gbT),                                   /*  8 T */
-   ESZ|(1<<ucp_gbV)|(1<<ucp_gbT),                      /*  9 LV */
-   ESZ|(1<<ucp_gbT),                                   /* 10 LVT */
-   (1<<ucp_gbRegionalIndicator),                       /* 11 RegionalIndicator */
-   ESZ,                                                /* 12 Other */
-   ESZ,                                                /* 13 ZWJ */
-   ESZ|(1<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
+   (1u<<ucp_gbLF),                                      /*  0 CR */
+   0,                                                   /*  1 LF */
+   0,                                                   /*  2 Control */
+   ESZ,                                                 /*  3 Extend */
+   ESZ|(1u<<ucp_gbPrepend)|                             /*  4 Prepend */
+       (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
+       (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
+       (1u<<ucp_gbRegionalIndicator),
+   ESZ,                                                 /*  5 SpacingMark */
+   ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)|      /*  6 L */
+       (1u<<ucp_gbLVT),
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  7 V */
+   ESZ|(1u<<ucp_gbT),                                   /*  8 T */
+   ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  9 LV */
+   ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
+   (1u<<ucp_gbRegionalIndicator),                       /* 11 RegionalIndicator */
+   ESZ,                                                 /* 12 Other */
+   ESZ,                                                 /* 13 ZWJ */
+   ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
 };
 
 #undef ESZ
@@ -417,6 +417,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */
 #define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
 #define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
 #define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
+#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
 #define STRING_Vai0 STR_V STR_a STR_i "\0"
 #define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
 #define STRING_Xan0 STR_X STR_a STR_n "\0"
@@ -611,6 +612,7 @@ const char PRIV(utt_names)[] =
   STRING_Tifinagh0
   STRING_Tirhuta0
   STRING_Ugaritic0
+  STRING_Unknown0
   STRING_Vai0
   STRING_Warang_Citi0
   STRING_Xan0
@@ -805,19 +807,20 @@ const ucp_type_table PRIV(utt)[] = {
   { 1424, PT_SC, ucp_Tifinagh },
   { 1433, PT_SC, ucp_Tirhuta },
   { 1441, PT_SC, ucp_Ugaritic },
-  { 1450, PT_SC, ucp_Vai },
-  { 1454, PT_SC, ucp_Warang_Citi },
-  { 1466, PT_ALNUM, 0 },
-  { 1470, PT_PXSPACE, 0 },
-  { 1474, PT_SPACE, 0 },
-  { 1478, PT_UCNC, 0 },
-  { 1482, PT_WORD, 0 },
-  { 1486, PT_SC, ucp_Yi },
-  { 1489, PT_GC, ucp_Z },
-  { 1491, PT_SC, ucp_Zanabazar_Square },
-  { 1508, PT_PC, ucp_Zl },
-  { 1511, PT_PC, ucp_Zp },
-  { 1514, PT_PC, ucp_Zs }
+  { 1450, PT_SC, ucp_Unknown },
+  { 1458, PT_SC, ucp_Vai },
+  { 1462, PT_SC, ucp_Warang_Citi },
+  { 1474, PT_ALNUM, 0 },
+  { 1478, PT_PXSPACE, 0 },
+  { 1482, PT_SPACE, 0 },
+  { 1486, PT_UCNC, 0 },
+  { 1490, PT_WORD, 0 },
+  { 1494, PT_SC, ucp_Yi },
+  { 1497, PT_GC, ucp_Z },
+  { 1499, PT_SC, ucp_Zanabazar_Square },
+  { 1516, PT_PC, ucp_Zl },
+  { 1519, PT_PC, ucp_Zp },
+  { 1522, PT_PC, ucp_Zs }
 };
 
 const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);

ファイルの差分が大きいため隠しています
+ 1019 - 864
thirdparty/pcre2/src/pcre2_ucd.c


+ 1 - 0
thirdparty/pcre2/src/pcre2_ucp.h

@@ -124,6 +124,7 @@ enum {
 /* These are the script identifications. */
 
 enum {
+  ucp_Unknown,
   ucp_Arabic,
   ucp_Armenian,
   ucp_Bengali,

+ 3 - 3
thirdparty/pcre2/src/pcre2_xclass.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-         New API code Copyright (c) 2016 University of Cambridge
+          New API code Copyright (c) 2016-2019 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -85,10 +85,10 @@ if (c < 256)
   if ((*data & XCL_HASPROP) == 0)
     {
     if ((*data & XCL_MAP) == 0) return negated;
-    return (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0;
+    return (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0;
     }
   if ((*data & XCL_MAP) != 0 &&
-    (((uint8_t *)(data + 1))[c/8] & (1 << (c&7))) != 0)
+    (((uint8_t *)(data + 1))[c/8] & (1u << (c&7))) != 0)
     return !negated; /* char found */
   }
 

+ 1 - 1
thirdparty/pcre2/src/sljit/sljitConfigInternal.h

@@ -530,7 +530,7 @@ typedef double sljit_f64;
 #endif /* !SLJIT_FUNC */
 
 #ifndef SLJIT_INDIRECT_CALL
-#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN)) \
+#if ((defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) && (!defined _CALL_ELF || _CALL_ELF == 1)) \
 	|| ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && defined _AIX)
 /* It seems certain ppc compilers use an indirect addressing for functions
    which makes things complicated. */

+ 43 - 3
thirdparty/pcre2/src/sljit/sljitExecAllocator.c

@@ -94,6 +94,46 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 
 #else
 
+#ifdef __APPLE__
+/* Configures TARGET_OS_OSX when appropriate */
+#include <TargetConditionals.h>
+
+#if TARGET_OS_OSX && defined(MAP_JIT)
+#include <sys/utsname.h>
+#endif /* TARGET_OS_OSX && MAP_JIT */
+
+#ifdef MAP_JIT
+
+static SLJIT_INLINE int get_map_jit_flag()
+{
+#if TARGET_OS_OSX
+	/* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a version
+	   of macOS where it's OK to have more than one JIT block. On non-macOS systems, returns
+	   MAP_JIT if it is defined. */
+	static int map_jit_flag = -1;
+
+	/* The following code is thread safe because multiple initialization
+	   sets map_jit_flag to the same value and the code has no side-effects.
+	   Changing the kernel version witout system restart is (very) unlikely. */
+	if (map_jit_flag == -1) {
+		struct utsname name;
+
+		uname(&name);
+
+		/* Kernel version for 10.14.0 (Mojave) */
+		map_jit_flag = (atoi(name.release) >= 18) ? MAP_JIT : 0;
+	}
+
+	return map_jit_flag;
+#else /* !TARGET_OS_OSX */
+	return MAP_JIT;
+#endif /* TARGET_OS_OSX */
+}
+
+#endif /* MAP_JIT */
+
+#endif /* __APPLE__ */
+
 static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 {
 	void *retval;
@@ -103,17 +143,17 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 	int flags = MAP_PRIVATE | MAP_ANON;
 
 #ifdef MAP_JIT
-	flags |= MAP_JIT;
+	flags |= get_map_jit_flag();
 #endif
 
 	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
-#else
+#else /* !MAP_ANON */
 	if (dev_zero < 0) {
 		if (open_dev_zero())
 			return NULL;
 	}
 	retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, dev_zero, 0);
-#endif
+#endif /* MAP_ANON */
 
 	return (retval != MAP_FAILED) ? retval : NULL;
 }

+ 6 - 5
thirdparty/pcre2/src/sljit/sljitLir.c

@@ -201,15 +201,16 @@
 #	define IS_CALL		0x010
 #	define IS_BIT26_COND	0x020
 #	define IS_BIT16_COND	0x040
+#	define IS_BIT23_COND	0x080
 
-#	define IS_COND		(IS_BIT26_COND | IS_BIT16_COND)
+#	define IS_COND		(IS_BIT26_COND | IS_BIT16_COND | IS_BIT23_COND)
 
-#	define PATCH_B		0x080
-#	define PATCH_J		0x100
+#	define PATCH_B		0x100
+#	define PATCH_J		0x200
 
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
-#	define PATCH_ABS32	0x200
-#	define PATCH_ABS48	0x400
+#	define PATCH_ABS32	0x400
+#	define PATCH_ABS48	0x800
 #endif
 
 	/* instruction types */

+ 10 - 10
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c

@@ -51,7 +51,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 	0, 0, 1, 2, 3, 4, 5, 6, 7
 };
 
-#define W_OP (1 << 31)
+#define W_OP (1u << 31)
 #define RD(rd) (reg_map[rd])
 #define RT(rt) (reg_map[rt])
 #define RN(rn) (reg_map[rn] << 5)
@@ -560,7 +560,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 	/* dst must be register, TMP_REG1
 	   arg1 must be register, TMP_REG1, imm
 	   arg2 must be register, TMP_REG2, imm */
-	sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
+	sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0;
 	sljit_ins inst_bits;
 	sljit_s32 op = (flags & 0xffff);
 	sljit_s32 reg;
@@ -710,7 +710,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 		return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
 	case SLJIT_MOV_U8:
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
-		return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
+		return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10));
 	case SLJIT_MOV_S8:
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
 		if (!(flags & INT_OP))
@@ -718,7 +718,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 		return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
 	case SLJIT_MOV_U16:
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
-		return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
+		return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10));
 	case SLJIT_MOV_S16:
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
 		if (!(flags & INT_OP))
@@ -728,7 +728,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
 		if ((flags & INT_OP) && dst == arg2)
 			return SLJIT_SUCCESS;
-		return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+		return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
 	case SLJIT_MOV_S32:
 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
 		if ((flags & INT_OP) && dst == arg2)
@@ -1080,7 +1080,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
 {
-	sljit_ins inv_bits = (op & SLJIT_I32_OP) ? (1 << 31) : 0;
+	sljit_ins inv_bits = (op & SLJIT_I32_OP) ? W_OP : 0;
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op0(compiler, op));
@@ -1360,7 +1360,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
 	sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 
 	if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
-		inv_bits |= (1 << 31);
+		inv_bits |= W_OP;
 
 	if (src & SLJIT_MEM) {
 		emit_fop_mem(compiler, (op & SLJIT_F32_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
@@ -1382,7 +1382,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp
 	sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0;
 
 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
-		inv_bits |= (1 << 31);
+		inv_bits |= W_OP;
 
 	if (src & SLJIT_MEM) {
 		emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
@@ -1662,7 +1662,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi
 	sljit_s32 src, sljit_sw srcw)
 {
 	struct sljit_jump *jump;
-	sljit_ins inv_bits = (type & SLJIT_I32_OP) ? (1 << 31) : 0;
+	sljit_ins inv_bits = (type & SLJIT_I32_OP) ? W_OP : 0;
 
 	SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
 	ADJUST_LOCAL_OFFSET(src, srcw);
@@ -1787,7 +1787,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
 	sljit_s32 dst_reg,
 	sljit_s32 src, sljit_sw srcw)
 {
-	sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0;
+	sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? W_OP : 0;
 	sljit_ins cc;
 
 	CHECK_ERROR();

+ 8 - 3
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c

@@ -368,16 +368,21 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		SLJIT_ASSERT(!(flags & SRC2_IMM));
 
 		if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
 			return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
-#else
+#else /* !SLJIT_MIPS_R1 && !SLJIT_MIPS_R6 */
 			FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
 			return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif
+#endif /* SLJIT_MIPS_R1 || SLJIT_MIPS_R6 */
 		}
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+		FAIL_IF(push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)));
+		FAIL_IF(push_inst(compiler, MUH | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+#else /* !SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS));
 		FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
 		FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+#endif /* SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
 		return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
 

+ 10 - 3
thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c

@@ -459,19 +459,26 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		SLJIT_ASSERT(!(flags & SRC2_IMM));
 
 		if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+			return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
+#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
 			if (op & SLJIT_I32_OP)
 				return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
 			FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
 			return push_inst(compiler, MFLO | D(dst), DR(dst));
-#else
+#else /* !SLJIT_MIPS_R6 && !SLJIT_MIPS_R1 */
 			FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
 			return push_inst(compiler, MFLO | D(dst), DR(dst));
-#endif
+#endif /* SLJIT_MIPS_R6 */
 		}
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+		FAIL_IF(push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst)));
+		FAIL_IF(push_inst(compiler, SELECT_OP(DMUH, MUH) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG));
+#else /* !SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS));
 		FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG));
 		FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst)));
+#endif /* SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG));
 		return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG);
 

+ 141 - 23
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c

@@ -27,17 +27,31 @@
 /* Latest MIPS architecture. */
 /* Automatically detect SLJIT_MIPS_R1 */
 
+#if (defined __mips_isa_rev) && (__mips_isa_rev >= 6)
+#define SLJIT_MIPS_R6 1
+#endif
+
 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
 {
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	return "MIPS32-R6" SLJIT_CPUINFO;
+#else /* !SLJIT_CONFIG_MIPS_32 */
+	return "MIPS64-R6" SLJIT_CPUINFO;
+#endif /* SLJIT_CONFIG_MIPS_32 */
+
+#elif (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 	return "MIPS32-R1" SLJIT_CPUINFO;
-#else
+#else /* !SLJIT_CONFIG_MIPS_32 */
 	return "MIPS64-R1" SLJIT_CPUINFO;
-#endif
+#endif /* SLJIT_CONFIG_MIPS_32 */
+
 #else /* SLJIT_MIPS_R1 */
 	return "MIPS III" SLJIT_CPUINFO;
-#endif
+#endif /* SLJIT_MIPS_R6 */
 }
 
 /* Length of an instruction word
@@ -62,6 +76,7 @@ typedef sljit_u32 sljit_ins;
 
 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
+#define TMP_FREG3	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3)
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
 	0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
@@ -69,14 +84,14 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
 
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
 
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
-	0, 0, 14, 2, 4, 6, 8, 12, 10
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
+	0, 0, 14, 2, 4, 6, 8, 12, 10, 16
 };
 
 #else
 
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
-	0, 0, 13, 14, 15, 16, 17, 12, 18
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 4] = {
+	0, 0, 13, 14, 15, 16, 17, 12, 18, 10
 };
 
 #endif
@@ -102,6 +117,11 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define FR(dr)		(freg_map[dr])
 #define HI(opcode)	((opcode) << 26)
 #define LO(opcode)	(opcode)
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+/* CMP.cond.fmt */
+/* S = (20 << 21) D = (21 << 21) */
+#define CMP_FMT_S	(20 << 21)
+#endif /* SLJIT_MIPS_R6 */
 /* S = (16 << 21) D = (17 << 21) */
 #define FMT_S		(16 << 21)
 #define FMT_D		(17 << 21)
@@ -114,8 +134,13 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define ANDI		(HI(12))
 #define B		(HI(4))
 #define BAL		(HI(1) | (17 << 16))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define BC1EQZ		(HI(17) | (9 << 21) | FT(TMP_FREG3))
+#define BC1NEZ		(HI(17) | (13 << 21) | FT(TMP_FREG3))
+#else /* !SLJIT_MIPS_R6 */
 #define BC1F		(HI(17) | (8 << 21))
 #define BC1T		(HI(17) | (8 << 21) | (1 << 16))
+#endif /* SLJIT_MIPS_R6 */
 #define BEQ		(HI(4))
 #define BGEZ		(HI(1) | (1 << 16))
 #define BGTZ		(HI(7))
@@ -124,20 +149,42 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define BNE		(HI(5))
 #define BREAK		(HI(0) | LO(13))
 #define CFC1		(HI(17) | (2 << 21))
-#define C_UN_S		(HI(17) | FMT_S | LO(49))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define C_UEQ_S		(HI(17) | CMP_FMT_S | LO(3))
+#define C_ULE_S		(HI(17) | CMP_FMT_S | LO(7))
+#define C_ULT_S		(HI(17) | CMP_FMT_S | LO(5))
+#define C_UN_S		(HI(17) | CMP_FMT_S | LO(1))
+#define C_FD		(FD(TMP_FREG3))
+#else /* !SLJIT_MIPS_R6 */
 #define C_UEQ_S		(HI(17) | FMT_S | LO(51))
 #define C_ULE_S		(HI(17) | FMT_S | LO(55))
 #define C_ULT_S		(HI(17) | FMT_S | LO(53))
+#define C_UN_S		(HI(17) | FMT_S | LO(49))
+#define C_FD		(0)
+#endif /* SLJIT_MIPS_R6 */
 #define CVT_S_S		(HI(17) | FMT_S | LO(32))
 #define DADDIU		(HI(25))
 #define DADDU		(HI(0) | LO(45))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define DDIV		(HI(0) | (2 << 6) | LO(30))
+#define DDIVU		(HI(0) | (2 << 6) | LO(31))
+#define DMOD		(HI(0) | (3 << 6) | LO(30))
+#define DMODU		(HI(0) | (3 << 6) | LO(31))
+#define DIV		(HI(0) | (2 << 6) | LO(26))
+#define DIVU		(HI(0) | (2 << 6) | LO(27))
+#define DMUH		(HI(0) | (3 << 6) | LO(28))
+#define DMUHU		(HI(0) | (3 << 6) | LO(29))
+#define DMUL		(HI(0) | (2 << 6) | LO(28))
+#define DMULU		(HI(0) | (2 << 6) | LO(29))
+#else /* !SLJIT_MIPS_R6 */
 #define DDIV		(HI(0) | LO(30))
 #define DDIVU		(HI(0) | LO(31))
 #define DIV		(HI(0) | LO(26))
 #define DIVU		(HI(0) | LO(27))
-#define DIV_S		(HI(17) | FMT_S | LO(3))
 #define DMULT		(HI(0) | LO(28))
 #define DMULTU		(HI(0) | LO(29))
+#endif /* SLJIT_MIPS_R6 */
+#define DIV_S		(HI(17) | FMT_S | LO(3))
 #define DSLL		(HI(0) | LO(56))
 #define DSLL32		(HI(0) | LO(60))
 #define DSLLV		(HI(0) | LO(20))
@@ -151,18 +198,34 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define J		(HI(2))
 #define JAL		(HI(3))
 #define JALR		(HI(0) | LO(9))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define JR		(HI(0) | LO(9))
+#else /* !SLJIT_MIPS_R6 */
 #define JR		(HI(0) | LO(8))
+#endif /* SLJIT_MIPS_R6 */
 #define LD		(HI(55))
 #define LUI		(HI(15))
 #define LW		(HI(35))
 #define MFC1		(HI(17))
+#if !(defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
 #define MFHI		(HI(0) | LO(16))
 #define MFLO		(HI(0) | LO(18))
+#else /* SLJIT_MIPS_R6 */
+#define MOD		(HI(0) | (3 << 6) | LO(26))
+#define MODU		(HI(0) | (3 << 6) | LO(27))
+#endif /* !SLJIT_MIPS_R6 */
 #define MOV_S		(HI(17) | FMT_S | LO(6))
 #define MTC1		(HI(17) | (4 << 21))
-#define MUL_S		(HI(17) | FMT_S | LO(2))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define MUH		(HI(0) | (3 << 6) | LO(24))
+#define MUHU		(HI(0) | (3 << 6) | LO(25))
+#define MUL		(HI(0) | (2 << 6) | LO(24))
+#define MULU		(HI(0) | (2 << 6) | LO(25))
+#else /* !SLJIT_MIPS_R6 */
 #define MULT		(HI(0) | LO(24))
 #define MULTU		(HI(0) | LO(25))
+#endif /* SLJIT_MIPS_R6 */
+#define MUL_S		(HI(17) | FMT_S | LO(2))
 #define NEG_S		(HI(17) | FMT_S | LO(7))
 #define NOP		(HI(0) | LO(0))
 #define NOR		(HI(0) | LO(39))
@@ -188,14 +251,18 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define XOR		(HI(0) | LO(38))
 #define XORI		(HI(14))
 
-#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
+#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) || (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
 #define CLZ		(HI(28) | LO(32))
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#define DCLZ		(LO(18))
+#else /* !SLJIT_MIPS_R6 */
 #define DCLZ		(HI(28) | LO(36))
 #define MOVF		(HI(0) | (0 << 16) | LO(1))
 #define MOVN		(HI(0) | LO(11))
 #define MOVT		(HI(0) | (1 << 16) | LO(1))
 #define MOVZ		(HI(0) | LO(10))
 #define MUL		(HI(28) | LO(2))
+#endif /* SLJIT_MIPS_R6 */
 #define PREF		(HI(51))
 #define PREFX		(HI(19) | LO(15))
 #define SEB		(HI(31) | (16 << 6) | LO(32))
@@ -234,7 +301,13 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit
 
 static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags)
 {
-	return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16);
+	if (flags & IS_BIT26_COND)
+		return (1 << 26);
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+	if (flags & IS_BIT23_COND)
+		return (1 << 23);
+#endif /* SLJIT_MIPS_R6 */
+	return (1 << 16);
 }
 
 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
@@ -1075,34 +1148,62 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 		return push_inst(compiler, NOP, UNMOVABLE_INS);
 	case SLJIT_LMUL_UW:
 	case SLJIT_LMUL_SW:
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULU : DMUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
+		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMUHU : DMUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
+#else /* !SLJIT_CONFIG_MIPS_64 */
+		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULU : MUL) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
+		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MUHU : MUH) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
+#endif /* SLJIT_CONFIG_MIPS_64 */
+		FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
+		return push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
+#else /* !SLJIT_MIPS_R6 */
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? DMULTU : DMULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#else
+#else /* !SLJIT_CONFIG_MIPS_64 */
 		FAIL_IF(push_inst(compiler, (op == SLJIT_LMUL_UW ? MULTU : MULT) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#endif
+#endif /* SLJIT_CONFIG_MIPS_64 */
 		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
 		return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+#endif /* SLJIT_MIPS_R6 */
 	case SLJIT_DIVMOD_UW:
 	case SLJIT_DIVMOD_SW:
 	case SLJIT_DIV_UW:
 	case SLJIT_DIV_SW:
 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
+		if (int_op) {
+			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
+			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
+		}
+		else {
+			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
+			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DMODU : DMOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
+		}
+#else /* !SLJIT_CONFIG_MIPS_64 */
+		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG3), DR(TMP_REG3)));
+		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? MODU : MOD) | S(SLJIT_R0) | T(SLJIT_R1) | D(TMP_REG1), DR(TMP_REG1)));
+#endif /* SLJIT_CONFIG_MIPS_64 */
+		FAIL_IF(push_inst(compiler, ADDU_W | S(TMP_REG3) | TA(0) | D(SLJIT_R0), DR(SLJIT_R0)));
+		return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, ADDU_W | S(TMP_REG1) | TA(0) | D(SLJIT_R1), DR(SLJIT_R1));
+#else /* !SLJIT_MIPS_R6 */
 #if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)
 		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
 		FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS));
-#endif
-
+#endif /* !SLJIT_MIPS_R1 */
 #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64)
 		if (int_op)
 			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
 		else
 			FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#else
+#else /* !SLJIT_CONFIG_MIPS_64 */
 		FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_DIV_UW ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS));
-#endif
-
+#endif /* SLJIT_CONFIG_MIPS_64 */
 		FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0)));
 		return (op >= SLJIT_DIV_UW) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1));
+#endif /* SLJIT_MIPS_R6 */
 	}
 
 	return SLJIT_SUCCESS;
@@ -1408,8 +1509,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
 		inst = C_UN_S;
 		break;
 	}
-
-	return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS);
+	return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1) | C_FD, UNMOVABLE_INS);
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1608,16 +1708,30 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
 	flags = IS_BIT26_COND; \
 	delay_check = src;
 
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+
+#define BR_T() \
+	inst = BC1NEZ; \
+	flags = IS_BIT23_COND; \
+	delay_check = FCSR_FCC;
+#define BR_F() \
+	inst = BC1EQZ; \
+	flags = IS_BIT23_COND; \
+	delay_check = FCSR_FCC;
+
+#else /* !SLJIT_MIPS_R6 */
+
 #define BR_T() \
 	inst = BC1T | JUMP_LENGTH; \
 	flags = IS_BIT16_COND; \
 	delay_check = FCSR_FCC;
-
 #define BR_F() \
 	inst = BC1F | JUMP_LENGTH; \
 	flags = IS_BIT16_COND; \
 	delay_check = FCSR_FCC;
 
+#endif /* SLJIT_MIPS_R6 */
+
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
 {
 	struct sljit_jump *jump;
@@ -1927,7 +2041,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	case SLJIT_GREATER_EQUAL_F64:
 	case SLJIT_UNORDERED_F64:
 	case SLJIT_ORDERED_F64:
+#if (defined SLJIT_MIPS_R6 && SLJIT_MIPS_R6)
+		FAIL_IF(push_inst(compiler, MFC1 | TA(dst_ar) | FS(TMP_FREG3), dst_ar));
+#else /* !SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar));
+#endif /* SLJIT_MIPS_R6 */
 		FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar));
 		FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar));
 		src_ar = dst_ar;

+ 1 - 1
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c

@@ -42,7 +42,7 @@ typedef sljit_u32 sljit_ins;
 #include <sys/cache.h>
 #endif
 
-#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
+#if (defined _CALL_ELF && _CALL_ELF == 2)
 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
 #endif
 

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません