浏览代码

Merge pull request #61122 from akien-mga/pcre2-10.40

Rémi Verschelde 3 年之前
父节点
当前提交
311de59e3c
共有 48 个文件被更改,包括 8922 次插入5547 次删除
  1. 2 2
      COPYRIGHT.txt
  2. 1 1
      thirdparty/README.md
  3. 3 3
      thirdparty/pcre2/AUTHORS
  4. 3 3
      thirdparty/pcre2/LICENCE
  5. 6 3
      thirdparty/pcre2/src/config.h
  6. 2 2
      thirdparty/pcre2/src/pcre2.h
  7. 30 13
      thirdparty/pcre2/src/pcre2_auto_possess.c
  8. 171 79
      thirdparty/pcre2/src/pcre2_compile.c
  9. 67 8
      thirdparty/pcre2/src/pcre2_dfa_match.c
  10. 2 2
      thirdparty/pcre2/src/pcre2_error.c
  11. 3 3
      thirdparty/pcre2/src/pcre2_extuni.c
  12. 69 25
      thirdparty/pcre2/src/pcre2_internal.h
  13. 14 3
      thirdparty/pcre2/src/pcre2_intmodedep.h
  14. 238 164
      thirdparty/pcre2/src/pcre2_jit_compile.c
  15. 1 1
      thirdparty/pcre2/src/pcre2_jit_match.c
  16. 1 1
      thirdparty/pcre2/src/pcre2_jit_misc.c
  17. 35 35
      thirdparty/pcre2/src/pcre2_jit_simd_inc.h
  18. 268 64
      thirdparty/pcre2/src/pcre2_match.c
  19. 188 285
      thirdparty/pcre2/src/pcre2_script_run.c
  20. 1 1
      thirdparty/pcre2/src/pcre2_string_utils.c
  21. 2 2
      thirdparty/pcre2/src/pcre2_study.c
  22. 24 5
      thirdparty/pcre2/src/pcre2_substitute.c
  23. 11 646
      thirdparty/pcre2/src/pcre2_tables.c
  24. 1778 1138
      thirdparty/pcre2/src/pcre2_ucd.c
  25. 221 132
      thirdparty/pcre2/src/pcre2_ucp.h
  26. 1524 0
      thirdparty/pcre2/src/pcre2_ucptables.c
  27. 19 1
      thirdparty/pcre2/src/pcre2_xclass.c
  28. 75 37
      thirdparty/pcre2/src/sljit/sljitConfigInternal.h
  29. 12 4
      thirdparty/pcre2/src/sljit/sljitExecAllocator.c
  30. 268 251
      thirdparty/pcre2/src/sljit/sljitLir.c
  31. 241 193
      thirdparty/pcre2/src/sljit/sljitLir.h
  32. 381 155
      thirdparty/pcre2/src/sljit/sljitNativeARM_32.c
  33. 309 219
      thirdparty/pcre2/src/sljit/sljitNativeARM_64.c
  34. 398 188
      thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
  35. 133 146
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
  36. 88 55
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
  37. 379 175
      thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
  38. 12 9
      thirdparty/pcre2/src/sljit/sljitNativePPC_32.c
  39. 46 44
      thirdparty/pcre2/src/sljit/sljitNativePPC_64.c
  40. 286 191
      thirdparty/pcre2/src/sljit/sljitNativePPC_common.c
  41. 262 171
      thirdparty/pcre2/src/sljit/sljitNativeS390X.c
  42. 52 55
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c
  43. 201 86
      thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c
  44. 471 355
      thirdparty/pcre2/src/sljit/sljitNativeX86_32.c
  45. 420 371
      thirdparty/pcre2/src/sljit/sljitNativeX86_64.c
  46. 184 203
      thirdparty/pcre2/src/sljit/sljitNativeX86_common.c
  47. 3 3
      thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c
  48. 17 14
      thirdparty/pcre2/src/sljit/sljitUtils.c

+ 2 - 2
COPYRIGHT.txt

@@ -365,8 +365,8 @@ License: Apache-2.0
 
 Files: ./thirdparty/pcre2/
 Comment: PCRE2
-Copyright: 1997-2021, University of Cambridge
- 2009-2021, Zoltan Herczeg
+Copyright: 1997-2022, University of Cambridge
+ 2009-2022, Zoltan Herczeg
 License: BSD-3-clause
 
 Files: ./thirdparty/recastnavigation/

+ 1 - 1
thirdparty/README.md

@@ -531,7 +531,7 @@ Exclude:
 ## pcre2
 
 - Upstream: http://www.pcre.org
-- Version: 10.39 (35fee4193b852cb504892352bd0155de10809889, 2021)
+- Version: 10.40 (3103b8f20a3b9944b177e812fde29fbfb8b90558, 2022)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:

+ 3 - 3
thirdparty/pcre2/AUTHORS

@@ -8,7 +8,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved
 
 
@@ -19,7 +19,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.
 
 
@@ -30,7 +30,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Emain domain:     freemail.hu
 
-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.
 
 ####

+ 3 - 3
thirdparty/pcre2/LICENCE

@@ -26,7 +26,7 @@ Email domain:     gmail.com
 Retired from University of Cambridge Computing Service,
 Cambridge, England.
 
-Copyright (c) 1997-2021 University of Cambridge
+Copyright (c) 1997-2022 University of Cambridge
 All rights reserved.
 
 
@@ -37,7 +37,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2010-2021 Zoltan Herczeg
+Copyright(c) 2010-2022 Zoltan Herczeg
 All rights reserved.
 
 
@@ -48,7 +48,7 @@ Written by:       Zoltan Herczeg
 Email local part: hzmester
 Email domain:     freemail.hu
 
-Copyright(c) 2009-2021 Zoltan Herczeg
+Copyright(c) 2009-2022 Zoltan Herczeg
 All rights reserved.
 
 

+ 6 - 3
thirdparty/pcre2/src/config.h

@@ -97,6 +97,9 @@ sure both macros are undefined; an emulation function will then be used. */
 /* Have PTHREAD_PRIO_INHERIT. */
 /* #undef HAVE_PTHREAD_PRIO_INHERIT */
 
+/* Define to 1 if you have the <readline.h> header file. */
+/* #undef HAVE_READLINE_H */
+
 /* Define to 1 if you have the <readline/history.h> header file. */
 /* #undef HAVE_READLINE_HISTORY_H */
 
@@ -233,7 +236,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_NAME "PCRE2"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "PCRE2 10.39"
+#define PACKAGE_STRING "PCRE2 10.40"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "pcre2"
@@ -242,7 +245,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "10.39"
+#define PACKAGE_VERSION "10.40"
 
 /* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
    parentheses (of any kind) in a pattern. This limits the amount of system
@@ -435,7 +438,7 @@ sure both macros are undefined; an emulation function will then be used. */
 #endif
 
 /* Version number of package */
-#define VERSION "10.39"
+#define VERSION "10.40"
 
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */

+ 2 - 2
thirdparty/pcre2/src/pcre2.h

@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
 /* The current PCRE version information. */
 
 #define PCRE2_MAJOR           10
-#define PCRE2_MINOR           39
+#define PCRE2_MINOR           40
 #define PCRE2_PRERELEASE      
-#define PCRE2_DATE            2021-10-29
+#define PCRE2_DATE            2022-04-14
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE2, the appropriate

+ 30 - 13
thirdparty/pcre2/src/pcre2_auto_possess.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -123,18 +123,21 @@ opcode is used to select the column. The values are as follows:
 */
 
 static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
-/* ANY LAMP GC  PC  SC ALNUM SPACE PXSPACE WORD CLIST UCNC */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_ANY */
-  { 0,  3,  0,  0,  0,    3,    1,      1,   0,    0,   0 },  /* PT_LAMP */
-  { 0,  0,  2,  4,  0,    9,   10,     10,  11,    0,   0 },  /* PT_GC */
-  { 0,  0,  5,  2,  0,   15,   16,     16,  17,    0,   0 },  /* PT_PC */
-  { 0,  0,  0,  0,  2,    0,    0,      0,   0,    0,   0 },  /* PT_SC */
-  { 0,  3,  6, 12,  0,    3,    1,      1,   0,    0,   0 },  /* PT_ALNUM */
-  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_SPACE */
-  { 0,  1,  7, 13,  0,    1,    3,      3,   1,    0,   0 },  /* PT_PXSPACE */
-  { 0,  0,  8, 14,  0,    0,    1,      1,   3,    0,   0 },  /* PT_WORD */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   0 },  /* PT_CLIST */
-  { 0,  0,  0,  0,  0,    0,    0,      0,   0,    0,   3 }   /* PT_UCNC */
+/* ANY LAMP GC  PC  SC  SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_ANY */
+  { 0,  3,  0,  0,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_LAMP */
+  { 0,  0,  2,  4,  0,   0,    9,   10,     10,  11,    0,   0,    0,    0 },  /* PT_GC */
+  { 0,  0,  5,  2,  0,   0,   15,   16,     16,  17,    0,   0,    0,    0 },  /* PT_PC */
+  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SC */
+  { 0,  0,  0,  0,  2,   2,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_SCX */
+  { 0,  3,  6, 12,  0,   0,    3,    1,      1,   0,    0,   0,    0,    0 },  /* PT_ALNUM */
+  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_SPACE */
+  { 0,  1,  7, 13,  0,   0,    1,    3,      3,   1,    0,   0,    0,    0 },  /* PT_PXSPACE */
+  { 0,  0,  8, 14,  0,   0,    0,    1,      1,   3,    0,   0,    0,    0 },  /* PT_WORD */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_CLIST */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   3,    0,    0 },  /* PT_UCNC */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 },  /* PT_BIDICL */
+  { 0,  0,  0,  0,  0,   0,    0,    0,      0,   0,    0,   0,    0,    0 }   /* PT_BOOL */
 };
 
 /* This table is used to check whether auto-possessification is possible
@@ -196,6 +199,7 @@ static BOOL
 check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
   BOOL negated)
 {
+BOOL ok;
 const uint32_t *p;
 const ucd_record *prop = GET_UCD(c);
 
@@ -215,6 +219,11 @@ switch(ptype)
   case PT_SC:
   return (pdata == prop->script) == negated;
 
+  case PT_SCX:
+  ok = (pdata == prop->script
+        || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0);
+  return ok == negated;
+
   /* These are specials */
 
   case PT_ALNUM:
@@ -251,6 +260,14 @@ switch(ptype)
     if (c == *p++) return negated;
     }
   break;  /* Control never reaches here */
+
+  /* Haven't yet thought these through. */
+
+  case PT_BIDICL:
+  return FALSE;
+
+  case PT_BOOL:
+  return FALSE;
   }
 
 return FALSE;

+ 171 - 79
thirdparty/pcre2/src/pcre2_compile.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -124,7 +124,7 @@ static unsigned int
 
 static int
   compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
-    uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
+    uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
     compile_block *, PCRE2_SIZE *);
 
 static int
@@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */
 
 #define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
 
-/* Private flags added to firstcu and reqcu. */
+/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
+variables, which are concerned with first and required code units. A value
+greater than or equal to REQ_NONE means "no code unit set"; otherwise the
+matching xxcu variable is set, and the low valued bits are relevant. */
 
-#define REQ_CASELESS    (1u << 0)       /* Indicates caselessness */
-#define REQ_VARY        (1u << 1)       /* reqcu followed non-literal item */
-/* Negative values for the firstcu and reqcu flags */
-#define REQ_UNSET       (-2)            /* Not yet found anything */
-#define REQ_NONE        (-1)            /* Found not fixed char */
+#define REQ_UNSET     0xffffffffu  /* Not yet found anything */
+#define REQ_NONE      0xfffffffeu  /* Found not fixed character */
+#define REQ_CASELESS  0x00000001u  /* Code unit in xxcu is caseless */
+#define REQ_VARY      0x00000002u  /* Code unit is followed by non-literal */
 
 /* These flags are used in the groupinfo vector. */
 
@@ -2088,7 +2090,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
 PCRE2_UCHAR c;
 PCRE2_SIZE i, bot, top;
 PCRE2_SPTR ptr = *ptrptr;
-PCRE2_UCHAR name[32];
+PCRE2_UCHAR name[50];
+PCRE2_UCHAR *vptr = NULL;
+uint16_t ptscript = PT_NOTSCRIPT;
 
 if (ptr >= cb->end_pattern) goto ERROR_RETURN;
 c = *ptr++;
@@ -2100,36 +2104,95 @@ negation. */
 if (c == CHAR_LEFT_CURLY_BRACKET)
   {
   if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+
   if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
     {
     *negptr = TRUE;
     ptr++;
     }
+
   for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
     {
     if (ptr >= cb->end_pattern) goto ERROR_RETURN;
     c = *ptr++;
+    while (c == '_' || c == '-' || isspace(c))
+      {
+      if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+      c = *ptr++;
+      }
     if (c == CHAR_NUL) goto ERROR_RETURN;
     if (c == CHAR_RIGHT_CURLY_BRACKET) break;
-    name[i] = c;
+    name[i] = tolower(c);
+    if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
     }
+
   if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
   name[i] = 0;
   }
 
-/* Otherwise there is just one following character, which must be an ASCII
-letter. */
+/* If { doesn't follow \p or \P there is just one following character, which
+must be an ASCII letter. */
 
 else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
   {
-  name[0] = c;
+  name[0] = tolower(c);
   name[1] = 0;
   }
 else goto ERROR_RETURN;
 
 *ptrptr = ptr;
 
-/* Search for a recognized property name using binary chop. */
+/* If the property contains ':' or '=' we have class name and value separately
+specified. The following are supported:
+
+  . Bidi_Class (synonym bc), for which the property names are "bidi<name>".
+  . Script (synonym sc) for which the property name is the script name
+  . Script_Extensions (synonym scx), ditto
+
+As this is a small number, we currently just check the names directly. If this
+grows, a sorted table and a switch will be neater.
+
+For both the script properties, set a PT_xxx value so that (1) they can be
+distinguished and (2) invalid script names that happen to be the name of
+another property can be diagnosed. */
+
+if (vptr != NULL)
+  {
+  int offset = 0;
+  PCRE2_UCHAR sname[8];
+
+  *vptr = 0;   /* Terminate property name */
+  if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
+      PRIV(strcmp_c8)(name, STRING_bc) == 0)
+    {
+    offset = 4;
+    sname[0] = CHAR_b;
+    sname[1] = CHAR_i;  /* There is no strcpy_c8 function */
+    sname[2] = CHAR_d;
+    sname[3] = CHAR_i;
+    }
+
+  else if (PRIV(strcmp_c8)(name, STRING_script) == 0 ||
+           PRIV(strcmp_c8)(name, STRING_sc) == 0)
+    ptscript = PT_SC;
+
+  else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 ||
+           PRIV(strcmp_c8)(name, STRING_scx) == 0)
+    ptscript = PT_SCX;
+
+  else
+    {
+    *errorcodeptr = ERR47;
+    return FALSE;
+    }
+
+  /* Adjust the string in name[] as needed */
+
+  memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
+  if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR));
+  }
+
+/* Search for a recognized property using binary chop. */
 
 bot = 0;
 top = PRIV(utt_size);
@@ -2139,15 +2202,37 @@ while (bot < top)
   int r;
   i = (bot + top) >> 1;
   r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
+
+  /* When a matching property is found, some extra checking is needed when the
+  \p{xx:yy} syntax is used and xx is either sc or scx. */
+
   if (r == 0)
     {
-    *ptypeptr = PRIV(utt)[i].type;
     *pdataptr = PRIV(utt)[i].value;
-    return TRUE;
+    if (vptr == NULL || ptscript == PT_NOTSCRIPT)
+      {
+      *ptypeptr = PRIV(utt)[i].type;
+      return TRUE;
+      }
+
+    switch (PRIV(utt)[i].type)
+      {
+      case PT_SC:
+      *ptypeptr = PT_SC;
+      return TRUE;
+
+      case PT_SCX:
+      *ptypeptr = ptscript;
+      return TRUE;
+      }
+
+    break;  /* Non-script found */
     }
+
   if (r > 0) bot = i + 1; else top = i;
   }
-*errorcodeptr = ERR47;   /* Unrecognized name */
+
+*errorcodeptr = ERR47;   /* Unrecognized property */
 return FALSE;
 
 ERROR_RETURN:            /* Malformed \P or \p */
@@ -5285,9 +5370,9 @@ Arguments:
   pptrptr           points to the current parsed pattern pointer
   errorcodeptr      points to error code variable
   firstcuptr        place to put the first required code unit
-  firstcuflagsptr   place to put the first code unit flags, or a negative number
+  firstcuflagsptr   place to put the first code unit flags
   reqcuptr          place to put the last required code unit
-  reqcuflagsptr     place to put the last required code unit flags, or a negative number
+  reqcuflagsptr     place to put the last required code unit flags
   bcptr             points to current branch chain
   cb                contains pointers to tables etc.
   lengthptr         NULL during the real compile phase
@@ -5300,8 +5385,8 @@ Returns:            0 There's been an error, *errorcodeptr is non-zero
 
 static int
 compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
-  int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
-  uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
+  int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
+  uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
   compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 int bravalue = 0;
@@ -5316,9 +5401,9 @@ uint32_t zeroreqcu, zerofirstcu;
 uint32_t escape;
 uint32_t *pptr = *pptrptr;
 uint32_t meta, meta_arg;
-int32_t firstcuflags, reqcuflags;
-int32_t zeroreqcuflags, zerofirstcuflags;
-int32_t req_caseopt, reqvary, tempreqvary;
+uint32_t firstcuflags, reqcuflags;
+uint32_t zeroreqcuflags, zerofirstcuflags;
+uint32_t req_caseopt, reqvary, tempreqvary;
 PCRE2_SIZE offset = 0;
 PCRE2_SIZE length_prevgroup = 0;
 PCRE2_UCHAR *code = *codeptr;
@@ -5374,13 +5459,13 @@ item types that can be repeated set these backoff variables appropriately. */
 firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
 firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
 
-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
+/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
 according to the current setting of the caseless flag. The REQ_CASELESS value
 leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
 to record the case status of the value. This is used only for ASCII characters.
 */
 
-req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
+req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
 
 /* Switch on next META item until the end of the branch */
 
@@ -5395,13 +5480,12 @@ for (;; pptr++)
   BOOL possessive_quantifier;
   BOOL note_group_empty;
   int class_has_8bitchar;
-  int i;
   uint32_t mclength;
   uint32_t skipunits;
   uint32_t subreqcu, subfirstcu;
   uint32_t groupnumber;
   uint32_t verbarglen, verbculen;
-  int32_t subreqcuflags, subfirstcuflags;  /* Must be signed */
+  uint32_t subreqcuflags, subfirstcuflags;
   open_capitem *oc;
   PCRE2_UCHAR mcbuffer[8];
 
@@ -5770,9 +5854,9 @@ for (;; pptr++)
         if (taboffset >= 0)
           {
           if (tabopt >= 0)
-            for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
+            for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
           else
-            for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
+            for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
           }
 
         /* Now see if we need to remove any special characters. An option
@@ -5786,9 +5870,9 @@ for (;; pptr++)
         being built and we are done. */
 
         if (local_negate)
-          for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
         else
-          for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
 
         /* Every class contains at least one < 256 character. */
 
@@ -5827,21 +5911,23 @@ for (;; pptr++)
         switch(escape)
           {
           case ESC_d:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
           break;
 
           case ESC_D:
           should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
           break;
 
           case ESC_w:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
           break;
 
           case ESC_W:
           should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
           break;
 
           /* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
@@ -5852,12 +5938,13 @@ for (;; pptr++)
           longer treat \s and \S specially. */
 
           case ESC_s:
-          for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
+          for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
           break;
 
           case ESC_S:
           should_flip_negation = TRUE;
-          for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
+          for (int i = 0; i < 32; i++)
+            classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
           break;
 
           /* When adding the horizontal or vertical space lists to a class, or
@@ -6098,7 +6185,7 @@ for (;; pptr++)
         if (negate_class && !xclass_has_prop)
           {
           /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
-          for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+          for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
           }
         memcpy(code, classbits, 32);
         code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
@@ -6124,7 +6211,7 @@ for (;; pptr++)
       if (negate_class)
         {
        /* Using 255 ^ instead of ~ avoids clang sanitize warning. */
-       for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+       for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
        }
       memcpy(code, classbits, 32);
       }
@@ -6198,7 +6285,7 @@ for (;; pptr++)
     verbarglen = *(++pptr);
     verbculen = 0;
     tempcode = code++;
-    for (i = 0; i < (int)verbarglen; i++)
+    for (int i = 0; i < (int)verbarglen; i++)
       {
       meta = *(++pptr);
 #ifdef SUPPORT_UNICODE
@@ -6247,6 +6334,7 @@ for (;; pptr++)
     bravalue = OP_COND;
       {
       int count, index;
+      unsigned int i;
       PCRE2_SPTR name;
       named_group *ng = cb->named_groups;
       uint32_t length = *(++pptr);
@@ -6286,7 +6374,7 @@ for (;; pptr++)
         groupnumber = 0;
         if (meta == META_COND_RNUMBER)
           {
-          for (i = 1; i < (int)length; i++)
+          for (i = 1; i < length; i++)
             {
             groupnumber = groupnumber * 10 + name[i] - CHAR_0;
             if (groupnumber > MAX_GROUP_NUMBER)
@@ -6608,7 +6696,7 @@ for (;; pptr++)
 
       if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
         {
-        if (subfirstcuflags >= 0)
+        if (subfirstcuflags < REQ_NONE)
           {
           firstcu = subfirstcu;
           firstcuflags = subfirstcuflags;
@@ -6622,7 +6710,7 @@ for (;; pptr++)
       into reqcu if there wasn't one, using the vary flag that was in
       existence beforehand. */
 
-      else if (subfirstcuflags >= 0 && subreqcuflags < 0)
+      else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
         {
         subreqcu = subfirstcu;
         subreqcuflags = subfirstcuflags | tempreqvary;
@@ -6631,7 +6719,7 @@ for (;; pptr++)
       /* If the subpattern set a required code unit (or set a first code unit
       that isn't really the first code unit - see above), set it. */
 
-      if (subreqcuflags >= 0)
+      if (subreqcuflags < REQ_NONE)
         {
         reqcu = subreqcu;
         reqcuflags = subreqcuflags;
@@ -6650,7 +6738,7 @@ for (;; pptr++)
     in that example, 'X' ends up set for both. */
 
     else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
-             subreqcuflags >= 0 && subfirstcuflags >= 0)
+             subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
       {
       reqcu = subreqcu;
       reqcuflags = subreqcuflags;
@@ -6680,7 +6768,7 @@ for (;; pptr++)
       this name is duplicated. */
 
       groupnumber = 0;
-      for (i = 0; i < cb->names_found; i++, ng++)
+      for (unsigned int i = 0; i < cb->names_found; i++, ng++)
         {
         if (length == ng->length &&
             PRIV(strncmp)(name, ng->name, length) == 0)
@@ -6935,14 +7023,19 @@ for (;; pptr++)
 #endif  /* MAYBE_UTF_MULTI */
 
       /* Handle the case of a single code unit - either with no UTF support, or
-      with UTF disabled, or for a single-code-unit UTF character. */
+      with UTF disabled, or for a single-code-unit UTF character. In the latter
+      case, for a repeated positive match, get the caseless flag for the
+      required code unit from the previous character, because a class like [Aa]
+      sets a caseless A but by now the req_caseopt flag has been reset. */
+
         {
         mcbuffer[0] = code[-1];
         mclength = 1;
         if (op_previous <= OP_CHARI && repeat_min > 1)
           {
           reqcu = mcbuffer[0];
-          reqcuflags = req_caseopt | cb->req_varyopt;
+          reqcuflags = cb->req_varyopt;
+          if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
           }
         }
       goto OUTPUT_SINGLE_REPEAT;  /* Code shared with single character types */
@@ -7034,7 +7127,7 @@ for (;; pptr++)
           *lengthptr += delta;
           }
 
-        else for (i = 0; i < replicate; i++)
+        else for (int i = 0; i < replicate; i++)
           {
           memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
           previous = code;
@@ -7210,12 +7303,12 @@ for (;; pptr++)
 
             else
               {
-              if (groupsetfirstcu && reqcuflags < 0)
+              if (groupsetfirstcu && reqcuflags >= REQ_NONE)
                 {
                 reqcu = firstcu;
                 reqcuflags = firstcuflags;
                 }
-              for (i = 1; (uint32_t)i < repeat_min; i++)
+              for (uint32_t i = 1; i < repeat_min; i++)
                 {
                 memcpy(code, previous, CU2BYTES(len));
                 code += len;
@@ -7259,14 +7352,14 @@ for (;; pptr++)
 
           /* This is compiling for real */
 
-          else for (i = repeat_max - 1; i >= 0; i--)
+          else for (uint32_t i = repeat_max; i >= 1; i--)
             {
             *code++ = OP_BRAZERO + repeat_type;
 
             /* All but the final copy start a new nesting, maintaining the
             chain of brackets outstanding. */
 
-            if (i != 0)
+            if (i != 1)
               {
               int linkoffset;
               *code++ = OP_BRA;
@@ -7985,9 +8078,9 @@ Arguments:
   errorcodeptr      -> pointer to error code variable
   skipunits         skip this many code units at start (for brackets and OP_COND)
   firstcuptr        place to put the first required code unit
-  firstcuflagsptr   place to put the first code unit flags, or a negative number
+  firstcuflagsptr   place to put the first code unit flags
   reqcuptr          place to put the last required code unit
-  reqcuflagsptr     place to put the last required code unit flags, or a negative number
+  reqcuflagsptr     place to put the last required code unit flags
   bcptr             pointer to the chain of currently open branches
   cb                points to the data block with tables pointers etc.
   lengthptr         NULL during the real compile phase
@@ -8001,7 +8094,7 @@ Returns:            0 There has been an error
 static int
 compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
   int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
-  int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
+  uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
   branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
 {
 PCRE2_UCHAR *code = *codeptr;
@@ -8014,9 +8107,9 @@ int okreturn = 1;
 uint32_t *pptr = *pptrptr;
 uint32_t firstcu, reqcu;
 uint32_t lookbehindlength;
-int32_t firstcuflags, reqcuflags;
+uint32_t firstcuflags, reqcuflags;
 uint32_t branchfirstcu, branchreqcu;
-int32_t branchfirstcuflags, branchreqcuflags;
+uint32_t branchfirstcuflags, branchreqcuflags;
 PCRE2_SIZE length;
 branch_chain bc;
 
@@ -8135,9 +8228,9 @@ for (;;)
 
       if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
         {
-        if (firstcuflags >= 0)
+        if (firstcuflags < REQ_NONE)
           {
-          if (reqcuflags < 0)
+          if (reqcuflags >= REQ_NONE)
             {
             reqcu = firstcu;
             reqcuflags = firstcuflags;
@@ -8149,8 +8242,8 @@ for (;;)
       /* If we (now or from before) have no firstcu, a firstcu from the
       branch becomes a reqcu if there isn't a branch reqcu. */
 
-      if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
-          branchreqcuflags < 0)
+      if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
+          branchreqcuflags >= REQ_NONE)
         {
         branchreqcu = branchfirstcu;
         branchreqcuflags = branchfirstcuflags;
@@ -8298,7 +8391,7 @@ Returns:     TRUE or FALSE
 */
 
 static BOOL
-is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
+is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
   int atomcount, BOOL inassert)
 {
 do {
@@ -8321,7 +8414,7 @@ do {
             op == OP_SCBRA || op == OP_SCBRAPOS)
      {
      int n = GET2(scode, 1+LINK_SIZE);
-     int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
+     uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
      if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
      }
 
@@ -8681,15 +8774,15 @@ Returns:     the fixed first code unit, or 0 with REQ_NONE in flags
 */
 
 static uint32_t
-find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
+find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
 {
 uint32_t c = 0;
-int cflags = REQ_NONE;
+uint32_t cflags = REQ_NONE;
 
 *flags = REQ_NONE;
 do {
    uint32_t d;
-   int dflags;
+   uint32_t dflags;
    int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
              *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
    PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
@@ -8712,9 +8805,8 @@ do {
      case OP_SCRIPT_RUN:
      d = find_firstassertedcu(scode, &dflags, inassert +
        ((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
-     if (dflags < 0)
-       return 0;
-     if (cflags < 0) { c = d; cflags = dflags; }
+     if (dflags >= REQ_NONE) return 0;
+     if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
        else if (c != d || cflags != dflags) return 0;
      break;
 
@@ -8727,7 +8819,7 @@ do {
      case OP_MINPLUS:
      case OP_POSPLUS:
      if (inassert == 0) return 0;
-     if (cflags < 0) { c = scode[1]; cflags = 0; }
+     if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
        else if (c != scode[1]) return 0;
      break;
 
@@ -8753,7 +8845,7 @@ do {
 #endif
 #endif
 
-     if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
+     if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
        else if (c != scode[1]) return 0;
      break;
      }
@@ -9689,7 +9781,7 @@ PCRE2_SIZE re_blocksize;              /* Size of memory block */
 PCRE2_SIZE big32count = 0;            /* 32-bit literals >= 0x80000000 */
 PCRE2_SIZE parsed_size_needed;        /* Needed for parsed pattern */
 
-int32_t firstcuflags, reqcuflags;     /* Type of first/req code unit */
+uint32_t firstcuflags, reqcuflags;    /* Type of first/req code unit */
 uint32_t firstcu, reqcu;              /* Value of first/req code unit */
 uint32_t setflags = 0;                /* NL and BSR set flags */
 
@@ -10369,13 +10461,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
   (these are not saved during the compile because they can cause conflicts with
   actual literals that follow). */
 
-  if (firstcuflags < 0)
+  if (firstcuflags >= REQ_NONE)
     firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
 
   /* Save the data for a first code unit. The existence of one means the
   minimum length must be at least 1. */
 
-  if (firstcuflags >= 0)
+  if (firstcuflags < REQ_NONE)
     {
     re->first_codeunit = firstcu;
     re->flags |= PCRE2_FIRSTSET;
@@ -10422,16 +10514,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
   different character and not a non-starting code unit of the first character,
   because the minimum length count is in characters, not code units. */
 
-  if (reqcuflags >= 0)
+  if (reqcuflags < REQ_NONE)
     {
 #if PCRE2_CODE_UNIT_WIDTH == 16
     if ((re->overall_options & PCRE2_UTF) == 0 ||   /* Not UTF */
-        firstcuflags < 0 ||                         /* First not set */
+        firstcuflags >= REQ_NONE ||                 /* First not set */
         (firstcu & 0xf800) != 0xd800 ||             /* First not surrogate */
         (reqcu & 0xfc00) != 0xdc00)                 /* Req not low surrogate */
 #elif PCRE2_CODE_UNIT_WIDTH == 8
     if ((re->overall_options & PCRE2_UTF) == 0 ||   /* Not UTF */
-        firstcuflags < 0 ||                         /* First not set */
+        firstcuflags >= REQ_NONE ||                 /* First not set */
         (firstcu & 0x80) == 0 ||                    /* First is ASCII */
         (reqcu & 0x80) == 0)                        /* Req is ASCII */
 #endif

+ 67 - 8
thirdparty/pcre2/src/pcre2_dfa_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2021 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -1193,6 +1193,11 @@ for (;;)
           OK = prop->script == code[2];
           break;
 
+          case PT_SCX:
+          OK = (prop->script == code[2] ||
+                MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0);
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1240,6 +1245,15 @@ for (;;)
                c >= 0xe000;
           break;
 
+          case PT_BIDICL:
+          OK = UCD_BIDICLASS(c) == code[2];
+          break;
+
+          case PT_BOOL:
+          OK = MAPBIT(PRIV(ucd_boolprop_sets) +
+            UCD_BPROPS_PROP(prop), code[2]) != 0;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1451,6 +1465,11 @@ for (;;)
           OK = prop->script == code[3];
           break;
 
+          case PT_SCX:
+          OK = (prop->script == code[3] ||
+                MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1498,6 +1517,15 @@ for (;;)
                c >= 0xe000;
           break;
 
+          case PT_BIDICL:
+          OK = UCD_BIDICLASS(c) == code[3];
+          break;
+
+          case PT_BOOL:
+          OK = MAPBIT(PRIV(ucd_boolprop_sets) +
+            UCD_BPROPS_PROP(prop), code[3]) != 0;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1692,6 +1720,11 @@ for (;;)
           OK = prop->script == code[3];
           break;
 
+          case PT_SCX:
+          OK = (prop->script == code[3] ||
+                MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0);
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -1739,6 +1772,15 @@ for (;;)
                c >= 0xe000;
           break;
 
+          case PT_BIDICL:
+          OK = UCD_BIDICLASS(c) == code[3];
+          break;
+
+          case PT_BOOL:
+          OK = MAPBIT(PRIV(ucd_boolprop_sets) +
+            UCD_BPROPS_PROP(prop), code[3]) != 0;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -1958,6 +2000,12 @@ for (;;)
           OK = prop->script == code[1 + IMM2_SIZE + 2];
           break;
 
+          case PT_SCX:
+          OK = (prop->script == code[1 + IMM2_SIZE + 2] ||
+                MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop),
+                  code[1 + IMM2_SIZE + 2]) != 0);
+          break;
+
           /* These are specials for combination cases. */
 
           case PT_ALNUM:
@@ -2005,6 +2053,15 @@ for (;;)
                c >= 0xe000;
           break;
 
+          case PT_BIDICL:
+          OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2];
+          break;
+
+          case PT_BOOL:
+          OK = MAPBIT(PRIV(ucd_boolprop_sets) +
+            UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0;
+          break;
+
           /* Should never occur, but keep compilers from grumbling. */
 
           default:
@@ -3285,20 +3342,22 @@ rws->next = NULL;
 rws->size = RWS_BASE_SIZE;
 rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
 
-/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
-subject string. */
+/* Recognize NULL, length 0 as an empty string. */
 
-if (length == PCRE2_ZERO_TERMINATED)
-  {
-  length = PRIV(strlen)(subject);
-  was_zero_terminated = 1;
-  }
+if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
 
 /* Plausibility checks */
 
 if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
 if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
   return PCRE2_ERROR_NULL;
+
+if (length == PCRE2_ZERO_TERMINATED)
+  {
+  length = PRIV(strlen)(subject);
+  was_zero_terminated = 1;
+  }
+
 if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
 

+ 2 - 2
thirdparty/pcre2/src/pcre2_error.c

@@ -119,7 +119,7 @@ static const unsigned char compile_error_texts[] =
   /* 45 */
   "this version of PCRE2 does not have support for \\P, \\p, or \\X\0"
   "malformed \\P or \\p sequence\0"
-  "unknown property name after \\P or \\p\0"
+  "unknown property after \\P or \\p\0"
   "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0"
   "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
   /* 50 */
@@ -253,7 +253,7 @@ static const unsigned char match_error_texts[] =
   "unknown substring\0"
   /* 50 */
   "non-unique substring name\0"
-  "NULL argument passed\0"
+  "NULL argument passed with non-zero length\0"
   "nested recursion at the same subject position\0"
   "matching depth limit exceeded\0"
   "requested value is not available\0"

+ 3 - 3
thirdparty/pcre2/src/pcre2_extuni.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -105,7 +105,7 @@ while (eptr < end_subject)
   /* Not breaking between Regional Indicators is allowed only if there
   are an even number of preceding RIs. */
 
-  if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
     {
     int ricount = 0;
     PCRE2_SPTR bptr = eptr - 1;
@@ -123,7 +123,7 @@ while (eptr < end_subject)
         }
       else
       c = *bptr;
-      if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
       ricount++;
       }
     if ((ricount & 1) != 0) break;  /* Grapheme break required */

+ 69 - 25
thirdparty/pcre2/src/pcre2_internal.h

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -954,6 +954,13 @@ a positive value. */
 #define STRING_LIMIT_RECURSION_EQ         "LIMIT_RECURSION="
 #define STRING_MARK                       "MARK"
 
+#define STRING_bc                         "bc"
+#define STRING_bidiclass                  "bidiclass"
+#define STRING_sc                         "sc"
+#define STRING_script                     "script"
+#define STRING_scriptextensions           "scriptextensions"
+#define STRING_scx                        "scx"
+
 #else  /* SUPPORT_UNICODE */
 
 /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
@@ -1248,26 +1255,39 @@ only. */
 #define STRING_LIMIT_RECURSION_EQ         STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
 #define STRING_MARK                       STR_M STR_A STR_R STR_K
 
+#define STRING_bc                         STR_b STR_c
+#define STRING_bidiclass                  STR_b STR_i STR_d STR_i STR_c STR_l STR_a STR_s STR_s
+#define STRING_sc                         STR_s STR_c
+#define STRING_script                     STR_s STR_c STR_r STR_i STR_p STR_t
+#define STRING_scriptextensions           STR_s STR_c STR_r STR_i STR_p STR_t STR_e STR_x STR_t STR_e STR_n STR_s STR_i STR_o STR_n STR_s
+#define STRING_scx                        STR_s STR_c STR_x
+
+
 #endif  /* SUPPORT_UNICODE */
 
 /* -------------------- End of character and string names -------------------*/
 
 /* -------------------- Definitions for compiled patterns -------------------*/
 
-/* Codes for different types of Unicode property */
+/* Codes for different types of Unicode property. If these definitions are
+changed, the autopossessifying table in pcre2_auto_possess.c must be updated to
+match. */
 
 #define PT_ANY        0    /* Any property - matches all chars */
 #define PT_LAMP       1    /* L& - the union of Lu, Ll, Lt */
 #define PT_GC         2    /* Specified general characteristic (e.g. L) */
 #define PT_PC         3    /* Specified particular characteristic (e.g. Lu) */
-#define PT_SC         4    /* Script (e.g. Han) */
-#define PT_ALNUM      5    /* Alphanumeric - the union of L and N */
-#define PT_SPACE      6    /* Perl space - Z plus 9,10,12,13 */
-#define PT_PXSPACE    7    /* POSIX space - Z plus 9,10,11,12,13 */
-#define PT_WORD       8    /* Word - L plus N plus underscore */
-#define PT_CLIST      9    /* Pseudo-property: match character list */
-#define PT_UCNC      10    /* Universal Character nameable character */
-#define PT_TABSIZE   11    /* Size of square table for autopossessify tests */
+#define PT_SC         4    /* Script only (e.g. Han) */
+#define PT_SCX        5    /* Script extensions (includes SC) */
+#define PT_ALNUM      6    /* Alphanumeric - the union of L and N */
+#define PT_SPACE      7    /* Perl space - general category Z plus 9,10,12,13 */
+#define PT_PXSPACE    8    /* POSIX space - Z plus 9,10,11,12,13 */
+#define PT_WORD       9    /* Word - L plus N plus underscore */
+#define PT_CLIST     10    /* Pseudo-property: match character list */
+#define PT_UCNC      11    /* Universal Character nameable character */
+#define PT_BIDICL    12    /* Specified bidi class */
+#define PT_BOOL      13    /* Boolean property */
+#define PT_TABSIZE   14    /* Size of square table for autopossessify tests */
 
 /* The following special properties are used only in XCLASS items, when POSIX
 classes are specified and PCRE2_UCP is set - in other words, for Unicode
@@ -1275,22 +1295,27 @@ handling of these classes. They are not available via the \p or \P escapes like
 those in the above list, and so they do not take part in the autopossessifying
 table. */
 
-#define PT_PXGRAPH   11    /* [:graph:] - characters that mark the paper */
-#define PT_PXPRINT   12    /* [:print:] - [:graph:] plus non-control spaces */
-#define PT_PXPUNCT   13    /* [:punct:] - punctuation characters */
+#define PT_PXGRAPH   14    /* [:graph:] - characters that mark the paper */
+#define PT_PXPRINT   15    /* [:print:] - [:graph:] plus non-control spaces */
+#define PT_PXPUNCT   16    /* [:punct:] - punctuation characters */
+
+/* This value is used when parsing \p and \P escapes to indicate that neither
+\p{script:...} nor \p{scx:...} has been encountered. */
+
+#define PT_NOTSCRIPT 255
 
 /* Flag bits and data types for the extended class (OP_XCLASS) for classes that
 contain characters with values greater than 255. */
 
-#define XCL_NOT       0x01    /* Flag: this is a negative class */
-#define XCL_MAP       0x02    /* Flag: a 32-byte map is present */
-#define XCL_HASPROP   0x04    /* Flag: property checks are present. */
+#define XCL_NOT      0x01  /* Flag: this is a negative class */
+#define XCL_MAP      0x02  /* Flag: a 32-byte map is present */
+#define XCL_HASPROP  0x04  /* Flag: property checks are present. */
 
-#define XCL_END       0    /* Marks end of individual items */
-#define XCL_SINGLE    1    /* Single item (one multibyte char) follows */
-#define XCL_RANGE     2    /* A range (two multibyte chars) follows */
-#define XCL_PROP      3    /* Unicode property (2-byte property code follows) */
-#define XCL_NOTPROP   4    /* Unicode inverted property (ditto) */
+#define XCL_END      0     /* Marks end of individual items */
+#define XCL_SINGLE   1     /* Single item (one multibyte char) follows */
+#define XCL_RANGE    2     /* A range (two multibyte chars) follows */
+#define XCL_PROP     3     /* Unicode property (2-byte property code follows) */
+#define XCL_NOTPROP  4     /* Unicode inverted property (ditto) */
 
 /* These are escaped items that aren't just an encoding of a particular data
 value such as \n. They must have non-zero values, as check_escape() returns 0
@@ -1797,8 +1822,8 @@ typedef struct {
   uint8_t gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
   uint8_t caseset;    /* offset to multichar other cases or zero */
   int32_t other_case; /* offset to other case, or zero if none */
-  int16_t scriptx;    /* script extension value */
-  int16_t dummy;      /* spare - to round to multiple of 4 bytes */
+  uint16_t scriptx_bidiclass; /* script extension (11 bit) and bidi class (5 bit) values */
+  uint16_t bprops;    /* binary properties offset */
 } ucd_record;
 
 /* UCD access macros */
@@ -1815,13 +1840,30 @@ typedef struct {
 #define GET_UCD(ch) REAL_GET_UCD(ch)
 #endif
 
+#define UCD_SCRIPTX_MASK 0x3ff
+#define UCD_BIDICLASS_SHIFT 11
+#define UCD_BPROPS_MASK 0xfff
+
+#define UCD_SCRIPTX_PROP(prop) ((prop)->scriptx_bidiclass & UCD_SCRIPTX_MASK)
+#define UCD_BIDICLASS_PROP(prop) ((prop)->scriptx_bidiclass >> UCD_BIDICLASS_SHIFT)
+#define UCD_BPROPS_PROP(prop) ((prop)->bprops & UCD_BPROPS_MASK)
+
 #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
 #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
 #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
 #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
 #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
 #define UCD_OTHERCASE(ch)   ((uint32_t)((int)ch + (int)(GET_UCD(ch)->other_case)))
-#define UCD_SCRIPTX(ch)     GET_UCD(ch)->scriptx
+#define UCD_SCRIPTX(ch)     UCD_SCRIPTX_PROP(GET_UCD(ch))
+#define UCD_BPROPS(ch)      UCD_BPROPS_PROP(GET_UCD(ch))
+#define UCD_BIDICLASS(ch)   UCD_BIDICLASS_PROP(GET_UCD(ch))
+
+/* The "scriptx" and bprops fields contain offsets into vectors of 32-bit words
+that form a bitmap representing a list of scripts or boolean properties. These
+macros test or set a bit in the map by number. */
+
+#define MAPBIT(map,n) ((map)[(n)/32]&(1u<<((n)%32)))
+#define MAPSET(map,n) ((map)[(n)/32]|=(1u<<((n)%32)))
 
 /* Header for serialized pcre2 codes. */
 
@@ -1878,6 +1920,7 @@ extern const uint8_t          PRIV(utf8_table4)[];
 #endif
 #define _pcre2_hspace_list             PCRE2_SUFFIX(_pcre2_hspace_list_)
 #define _pcre2_vspace_list             PCRE2_SUFFIX(_pcre2_vspace_list_)
+#define _pcre2_ucd_boolprop_sets       PCRE2_SUFFIX(_pcre2_ucd_boolprop_sets_)
 #define _pcre2_ucd_caseless_sets       PCRE2_SUFFIX(_pcre2_ucd_caseless_sets_)
 #define _pcre2_ucd_digit_sets          PCRE2_SUFFIX(_pcre2_ucd_digit_sets_)
 #define _pcre2_ucd_script_sets         PCRE2_SUFFIX(_pcre2_ucd_script_sets_)
@@ -1901,9 +1944,10 @@ extern const pcre2_match_context       PRIV(default_match_context);
 extern const uint8_t                   PRIV(default_tables)[];
 extern const uint32_t                  PRIV(hspace_list)[];
 extern const uint32_t                  PRIV(vspace_list)[];
+extern const uint32_t                  PRIV(ucd_boolprop_sets)[];
 extern const uint32_t                  PRIV(ucd_caseless_sets)[];
 extern const uint32_t                  PRIV(ucd_digit_sets)[];
-extern const uint8_t                   PRIV(ucd_script_sets)[];
+extern const uint32_t                  PRIV(ucd_script_sets)[];
 extern const ucd_record                PRIV(ucd_records)[];
 #if PCRE2_CODE_UNIT_WIDTH == 32
 extern const ucd_record                PRIV(dummy_ucd_record)[];

+ 14 - 3
thirdparty/pcre2/src/pcre2_intmodedep.h

@@ -519,7 +519,7 @@ it is. This is called only in UTF-32 mode - we don't put a test within the
 macro because almost all calls are already within a block of UTF-32 only
 code.
 
-These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
+These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */
 
 #define BACKCHAR(eptr) do { } while (0)
 
@@ -747,8 +747,8 @@ typedef struct compile_block {
   uint32_t class_range_start;      /* Overall class range start */
   uint32_t class_range_end;        /* Overall class range end */
   PCRE2_UCHAR nl[4];               /* Newline string when fixed length */
+  uint32_t req_varyopt;            /* "After variable item" flag for reqbyte */
   int  max_lookbehind;             /* Maximum lookbehind (characters) */
-  int  req_varyopt;                /* "After variable item" flag for reqbyte */
   BOOL had_accept;                 /* (*ACCEPT) encountered */
   BOOL had_pruneorskip;            /* (*PRUNE) or (*SKIP) encountered */
   BOOL had_recurse;                /* Had a recursion or subroutine call */
@@ -764,7 +764,7 @@ typedef struct pcre2_real_jit_stack {
 } pcre2_real_jit_stack;
 
 /* Structure for items in a linked list that represents an explicit recursive
-call within the pattern when running pcre_dfa_match(). */
+call within the pattern when running pcre2_dfa_match(). */
 
 typedef struct dfa_recursion_info {
   struct dfa_recursion_info *prevrec;
@@ -838,6 +838,17 @@ multiple of PCRE2_SIZE. See various comments above. */
 typedef char check_heapframe_size[
   ((sizeof(heapframe) % sizeof(PCRE2_SIZE)) == 0)? (+1):(-1)];
 
+/* Structure for computing the alignment of heapframe. */
+
+typedef struct heapframe_align {
+  char unalign;    /* Completely unalign the current offset */
+  heapframe frame; /* Offset is its alignment */
+} heapframe_align;
+
+/* This define is the minimum alignment required for a heapframe, in bytes. */
+
+#define HEAPFRAME_ALIGNMENT offsetof(heapframe_align, frame)
+
 /* Structure for passing "static" information around between the functions
 doing traditional NFA matching (pcre2_match() and friends). */
 

文件差异内容过多而无法显示
+ 238 - 164
thirdparty/pcre2/src/pcre2_jit_compile.c


+ 1 - 1
thirdparty/pcre2/src/pcre2_jit_match.c

@@ -120,7 +120,7 @@ else if ((options & PCRE2_PARTIAL_SOFT) != 0)
 if (functions == NULL || functions->executable_funcs[index] == NULL)
   return PCRE2_ERROR_JIT_BADOPTION;
 
-/* Sanity checks should be handled by pcre_exec. */
+/* Sanity checks should be handled by pcre2_match. */
 arguments.str = subject + start_offset;
 arguments.begin = subject;
 arguments.end = subject + length;

+ 1 - 1
thirdparty/pcre2/src/pcre2_jit_misc.c

@@ -135,7 +135,7 @@ return NULL;
 
 pcre2_jit_stack *jit_stack;
 
-if (startsize < 1 || maxsize < 1)
+if (startsize == 0 || maxsize == 0 || maxsize > SIZE_MAX - STACK_GROWTH_RATE)
   return NULL;
 if (startsize > maxsize)
   startsize = maxsize;

+ 35 - 35
thirdparty/pcre2/src/pcre2_jit_simd_inc.h

@@ -339,7 +339,7 @@ if (common->mode != PCRE2_JIT_COMPLETE)
   {
   JUMPHERE(partial_quit[0]);
   JUMPHERE(partial_quit[1]);
-  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
   CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
   }
 else
@@ -537,7 +537,7 @@ if (common->match_end_ptr != 0)
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
 
-  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
   CMOV(SLJIT_LESS, STR_END, TMP1, 0);
   }
 
@@ -883,14 +883,14 @@ if (char1 == char2)
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
   if (common->utf && offset > 0)
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_utf));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf));
   else
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
 #else
-  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                   SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs));
+  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                   SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs));
 #endif
   }
 else
@@ -904,14 +904,14 @@ else
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
     if (common->utf && offset > 0)
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask_utf));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf));
     else
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
 #else
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_mask));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask));
 #endif
     }
   else
@@ -922,14 +922,14 @@ else
 
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
     if (common->utf && offset > 0)
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2_utf));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf));
     else
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
 #else
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(UW) | SLJIT_ARG3(UW) | SLJIT_ARG4(UW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcs_2));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2));
 #endif
     }
   }
@@ -1067,7 +1067,7 @@ else
   OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
   OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
 
-  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, STR_END, 0, SLJIT_R0, 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0);
   CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0);
   }
 
@@ -1084,31 +1084,31 @@ if (diff == 1) {
   if (char1a == char1b && char2a == char2b) {
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
     if (common->utf)
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0_utf));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf));
     else
 #endif
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_0));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0));
   } else {
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
     if (common->utf)
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1_utf));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf));
     else
 #endif
-      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                       SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_1));
+      sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                       SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1));
   }
 } else {
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
   if (common->utf)
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default_utf));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf));
   else
 #endif
-    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW),
-                     SLJIT_IMM, SLJIT_FUNC_OFFSET(ffcps_default));
+    sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W),
+                     SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default));
 }
 
 /* Restore STR_PTR register. */
@@ -1418,7 +1418,7 @@ if (common->mode != PCRE2_JIT_COMPLETE)
   {
   JUMPHERE(partial_quit[0]);
   JUMPHERE(partial_quit[1]);
-  OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
   CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
   }
 else
@@ -1673,7 +1673,7 @@ if (common->match_end_ptr != 0)
   OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
   OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1));
 
-  OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
+  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0);
   CMOV(SLJIT_LESS, STR_END, TMP1, 0);
   }
 

+ 268 - 64
thirdparty/pcre2/src/pcre2_match.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2015-2021 University of Cambridge
+          New API code Copyright (c) 2015-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -49,7 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /* #define DEBUG_SHOW_OPS */
 /* #define DEBUG_SHOW_RMATCH */
 
-#ifdef DEBUG_FRAME_DISPLAY
+#ifdef DEBUG_FRAMES_DISPLAY
 #include <stdarg.h>
 #endif
 
@@ -159,7 +159,8 @@ enum { RM100=100, RM101 };
 #ifdef SUPPORT_UNICODE
 enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
        RM208,     RM209, RM210, RM211, RM212, RM213, RM214, RM215,
-       RM216,     RM217, RM218, RM219, RM220, RM221, RM222 };
+       RM216,     RM217, RM218, RM219, RM220, RM221, RM222, RM223,
+       RM224,     RM225 };
 #endif
 
 /* Define short names for general fields in the current backtrack frame, which
@@ -2421,40 +2422,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       {
       const uint32_t *cp;
       const ucd_record *prop = GET_UCD(fc);
+      BOOL notmatch = Fop == OP_NOTPROP;
 
       switch(Fecode[1])
         {
         case PT_ANY:
-        if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+        if (notmatch) RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_LAMP:
         if ((prop->chartype == ucp_Lu ||
              prop->chartype == ucp_Ll ||
-             prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
+             prop->chartype == ucp_Lt) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_GC:
-        if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
+        if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_PC:
-        if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
+        if ((Fecode[2] == prop->chartype) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
         case PT_SC:
-        if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
+        if ((Fecode[2] == prop->script) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
+        case PT_SCX:
+          {
+          BOOL ok = (Fecode[2] == prop->script ||
+                     MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
+          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
+          }
+        break;
+
         /* These are specials */
 
         case PT_ALNUM:
         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
-             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
+             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
@@ -2468,12 +2478,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
           {
           HSPACE_CASES:
           VSPACE_CASES:
-          if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+          if (notmatch) RRETURN(MATCH_NOMATCH);
           break;
 
           default:
-          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
-            (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
+          if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
+            RRETURN(MATCH_NOMATCH);
           break;
           }
         break;
@@ -2481,7 +2491,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
         case PT_WORD:
         if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
              PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
-             fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
+             fc == CHAR_UNDERSCORE) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
@@ -2490,19 +2500,32 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
         for (;;)
           {
           if (fc < *cp)
-            { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
+            { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
           if (fc == *cp++)
-            { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
+            { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
           }
         break;
 
         case PT_UCNC:
         if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
              fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
-             fc >= 0xe000) == (Fop == OP_NOTPROP))
+             fc >= 0xe000) == notmatch)
           RRETURN(MATCH_NOMATCH);
         break;
 
+        case PT_BIDICL:
+        if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
+          RRETURN(MATCH_NOMATCH);
+        break;
+
+        case PT_BOOL:
+          {
+          BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+            UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
+          if (ok == notmatch) RRETURN(MATCH_NOMATCH);
+          }
+        break;
+
         /* This should never occur */
 
         default:
@@ -2616,18 +2639,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 
     /* First, ensure the minimum number of matches are present. Use inline
     code for maximizing the speed, and do the type test once at the start
-    (i.e. keep it out of the loop). The code for UTF mode is separated out for
-    tidiness, except for Unicode property tests. */
+    (i.e. keep it out of the loops). As there are no calls to RMATCH in the
+    loops, we can use an ordinary variable for "notmatch". The code for UTF
+    mode is separated out for tidiness, except for Unicode property tests. */
 
     if (Lmin > 0)
       {
 #ifdef SUPPORT_UNICODE
       if (proptype >= 0)  /* Property tests in all modes */
         {
+        BOOL notmatch = Lctype == OP_NOTPROP;
         switch(proptype)
           {
           case PT_ANY:
-          if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+          if (notmatch) RRETURN(MATCH_NOMATCH);
           for (i = 1; i <= Lmin; i++)
             {
             if (Feptr >= mb->end_subject)
@@ -2652,7 +2677,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             chartype = UCD_CHARTYPE(fc);
             if ((chartype == ucp_Lu ||
                  chartype == ucp_Ll ||
-                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
+                 chartype == ucp_Lt) == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2666,7 +2691,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(fc, Feptr);
-            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2680,7 +2705,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(fc, Feptr);
-            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2694,7 +2719,26 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               RRETURN(MATCH_NOMATCH);
               }
             GETCHARINCTEST(fc, Feptr);
-            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_SCX:
+          for (i = 1; i <= Lmin; i++)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            prop = GET_UCD(fc);
+            ok = (prop->script == Lpropvalue ||
+                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
+            if (ok == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2710,7 +2754,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               }
             GETCHARINCTEST(fc, Feptr);
             category = UCD_CATEGORY(fc);
-            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
+            if ((category == ucp_L || category == ucp_N) == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2733,11 +2777,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               {
               HSPACE_CASES:
               VSPACE_CASES:
-              if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+              if (notmatch) RRETURN(MATCH_NOMATCH);
               break;
 
               default:
-              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
+              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
                 RRETURN(MATCH_NOMATCH);
               break;
               }
@@ -2756,7 +2800,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             GETCHARINCTEST(fc, Feptr);
             category = UCD_CATEGORY(fc);
             if ((category == ucp_L || category == ucp_N ||
-                fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
+                fc == CHAR_UNDERSCORE) == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -2776,12 +2820,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               {
               if (fc < *cp)
                 {
-                if (Lctype == OP_NOTPROP) break;
+                if (notmatch) break;
                 RRETURN(MATCH_NOMATCH);
                 }
               if (fc == *cp++)
                 {
-                if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
+                if (notmatch) RRETURN(MATCH_NOMATCH);
                 break;
                 }
               }
@@ -2799,7 +2843,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             GETCHARINCTEST(fc, Feptr);
             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
-                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
+                 fc >= 0xe000) == notmatch)
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_BIDICL:
+          for (i = 1; i <= Lmin; i++)
+            {
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
+              RRETURN(MATCH_NOMATCH);
+            }
+          break;
+
+          case PT_BOOL:
+          for (i = 1; i <= Lmin; i++)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            prop = GET_UCD(fc);
+            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
+            if (ok == notmatch)
               RRETURN(MATCH_NOMATCH);
             }
           break;
@@ -3343,7 +3420,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
     if (Lmin == Lmax) continue;
 
     /* If minimizing, we have to test the rest of the pattern before each
-    subsequent match. */
+    subsequent match. This means we cannot use a local "notmatch" variable as
+    in the other cases. As all 4 temporary 32-bit values in the frame are
+    already in use, just test the type each time. */
 
     if (reptype == REPTYPE_MIN)
       {
@@ -3440,6 +3519,28 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             }
           /* Control never gets here */
 
+          case PT_SCX:
+          for (;;)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            RMATCH(Fecode, RM225);
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            prop = GET_UCD(fc);
+            ok = (prop->script == Lpropvalue
+                  || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
+            if (ok == (Lctype == OP_NOTPROP))
+              RRETURN(MATCH_NOMATCH);
+            }
+          /* Control never gets here */
+
           case PT_ALNUM:
           for (;;)
             {
@@ -3454,8 +3555,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               }
             GETCHARINCTEST(fc, Feptr);
             category = UCD_CATEGORY(fc);
-            if ((category == ucp_L || category == ucp_N) ==
-                (Lctype == OP_NOTPROP))
+            if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
               RRETURN(MATCH_NOMATCH);
             }
           /* Control never gets here */
@@ -3562,6 +3662,45 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             }
           /* Control never gets here */
 
+          case PT_BIDICL:
+          for (;;)
+            {
+            RMATCH(Fecode, RM224);
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+              RRETURN(MATCH_NOMATCH);
+            }
+          /* Control never gets here */
+
+          case PT_BOOL:
+          for (;;)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            RMATCH(Fecode, RM223);
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
+            GETCHARINCTEST(fc, Feptr);
+            prop = GET_UCD(fc);
+            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
+            if (ok == (Lctype == OP_NOTPROP))
+              RRETURN(MATCH_NOMATCH);
+            }
+          /* Control never gets here */
+
           /* This should never occur */
           default:
           return PCRE2_ERROR_INTERNAL;
@@ -3870,7 +4009,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
       }
 
     /* If maximizing, it is worth using inline code for speed, doing the type
-    test once at the start (i.e. keep it out of the loop). */
+    test once at the start (i.e. keep it out of the loops). Once again,
+    "notmatch" can be an ordinary local variable because the loops do not call
+    RMATCH. */
 
     else
       {
@@ -3879,6 +4020,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
 #ifdef SUPPORT_UNICODE
       if (proptype >= 0)
         {
+        BOOL notmatch = Lctype == OP_NOTPROP;
         switch(proptype)
           {
           case PT_ANY:
@@ -3891,7 +4033,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               break;
               }
             GETCHARLENTEST(fc, Feptr, len);
-            if (Lctype == OP_NOTPROP) break;
+            if (notmatch) break;
             Feptr+= len;
             }
           break;
@@ -3910,7 +4052,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             chartype = UCD_CHARTYPE(fc);
             if ((chartype == ucp_Lu ||
                  chartype == ucp_Ll ||
-                 chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
+                 chartype == ucp_Lt) == notmatch)
               break;
             Feptr+= len;
             }
@@ -3926,8 +4068,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               break;
               }
             GETCHARLENTEST(fc, Feptr, len);
-            if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
-              break;
+            if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
             Feptr+= len;
             }
           break;
@@ -3942,8 +4083,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               break;
               }
             GETCHARLENTEST(fc, Feptr, len);
-            if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
-              break;
+            if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
             Feptr+= len;
             }
           break;
@@ -3958,8 +4098,27 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               break;
               }
             GETCHARLENTEST(fc, Feptr, len);
-            if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
+            if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
+            Feptr+= len;
+            }
+          break;
+
+          case PT_SCX:
+          for (i = Lmin; i < Lmax; i++)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            int len = 1;
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            GETCHARLENTEST(fc, Feptr, len);
+            prop = GET_UCD(fc);
+            ok = (prop->script == Lpropvalue ||
+                  MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
+            if (ok == notmatch) break;
             Feptr+= len;
             }
           break;
@@ -3976,8 +4135,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               }
             GETCHARLENTEST(fc, Feptr, len);
             category = UCD_CATEGORY(fc);
-            if ((category == ucp_L || category == ucp_N) ==
-                (Lctype == OP_NOTPROP))
+            if ((category == ucp_L || category == ucp_N) == notmatch)
               break;
             Feptr+= len;
             }
@@ -4002,11 +4160,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
               {
               HSPACE_CASES:
               VSPACE_CASES:
-              if (Lctype == OP_NOTPROP) goto ENDLOOP99;  /* Break the loop */
+              if (notmatch) goto ENDLOOP99;  /* Break the loop */
               break;
 
               default:
-              if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
+              if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
                 goto ENDLOOP99;   /* Break the loop */
               break;
               }
@@ -4028,7 +4186,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             GETCHARLENTEST(fc, Feptr, len);
             category = UCD_CATEGORY(fc);
             if ((category == ucp_L || category == ucp_N ||
-                 fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
+                 fc == CHAR_UNDERSCORE) == notmatch)
               break;
             Feptr+= len;
             }
@@ -4049,9 +4207,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             for (;;)
               {
               if (fc < *cp)
-                { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
+                { if (notmatch) break; else goto GOT_MAX; }
               if (fc == *cp++)
-                { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
+                { if (notmatch) goto GOT_MAX; else break; }
               }
             Feptr += len;
             }
@@ -4070,12 +4228,47 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
             GETCHARLENTEST(fc, Feptr, len);
             if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
                  fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
-                 fc >= 0xe000) == (Lctype == OP_NOTPROP))
+                 fc >= 0xe000) == notmatch)
               break;
             Feptr += len;
             }
           break;
 
+          case PT_BIDICL:
+          for (i = Lmin; i < Lmax; i++)
+            {
+            int len = 1;
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            GETCHARLENTEST(fc, Feptr, len);
+            if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
+            Feptr+= len;
+            }
+          break;
+
+          case PT_BOOL:
+          for (i = Lmin; i < Lmax; i++)
+            {
+            BOOL ok;
+            const ucd_record *prop;
+            int len = 1;
+            if (Feptr >= mb->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            GETCHARLENTEST(fc, Feptr, len);
+            prop = GET_UCD(fc);
+            ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+              UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
+            if (ok == notmatch) break;
+            Feptr+= len;
+            }
+          break;
+
           default:
           return PCRE2_ERROR_INTERNAL;
           }
@@ -6066,7 +6259,7 @@ switch (Freturn_id)
   LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
   LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
   LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
-  LBL(221) LBL(222)
+  LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
 #endif
 
   default:
@@ -6129,8 +6322,8 @@ PCRE2_UCHAR req_cu2 = 0;
 PCRE2_SPTR bumpalong_limit;
 PCRE2_SPTR end_subject;
 PCRE2_SPTR true_end_subject;
-PCRE2_SPTR start_match = subject + start_offset;
-PCRE2_SPTR req_cu_ptr = start_match - 1;
+PCRE2_SPTR start_match;
+PCRE2_SPTR req_cu_ptr;
 PCRE2_SPTR start_partial;
 PCRE2_SPTR match_partial;
 
@@ -6170,9 +6363,18 @@ PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
     PCRE2_KEEP_UNINITIALIZED;
 mb->stack_frames = (heapframe *)stack_frames_vector;
 
-/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
-subject string. */
+/* Recognize NULL, length 0 as an empty string. */
+
+if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
+
+/* Plausibility checks */
+
+if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
+if (code == NULL || subject == NULL || match_data == NULL)
+  return PCRE2_ERROR_NULL;
 
+start_match = subject + start_offset;
+req_cu_ptr = start_match - 1;
 if (length == PCRE2_ZERO_TERMINATED)
   {
   length = PRIV(strlen)(subject);
@@ -6180,11 +6382,6 @@ if (length == PCRE2_ZERO_TERMINATED)
   }
 true_end_subject = end_subject = subject + length;
 
-/* Plausibility checks */
-
-if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
-if (code == NULL || subject == NULL || match_data == NULL)
-  return PCRE2_ERROR_NULL;
 if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
 
 /* Check that the first field in the block is the magic number. */
@@ -6482,7 +6679,7 @@ if (utf &&
     /* If the end precedes start_match, it means there is invalid UTF in the
     extra code units we reversed over because of a lookbehind. Advance past the
     first bad code unit, and then skip invalid character starting code units in
-    8-bit and 16-bit modes, and try again. */
+    8-bit and 16-bit modes, and try again with the original end point. */
 
     if (end_subject < start_match)
       {
@@ -6491,6 +6688,7 @@ if (utf &&
       while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
         mb->check_subject++;
 #endif
+      end_subject = true_end_subject;
       }
 
     /* Otherwise, set the not end of line option, and do the match. */
@@ -6601,10 +6799,16 @@ the pattern. It is not used at all if there are no capturing parentheses.
 
 The last of these is changed within the match() function if the frame vector
 has to be expanded. We therefore put it into the match block so that it is
-correct when calling match() more than once for non-anchored patterns. */
+correct when calling match() more than once for non-anchored patterns.
+
+We must also pad frame_size for alignment to ensure subsequent frames are as
+aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
+array, that does not guarantee it is suitably aligned for pointers, as some
+architectures have pointers that are larger than a size_t. */
 
-frame_size = offsetof(heapframe, ovector) +
-  re->top_bracket * 2 * sizeof(PCRE2_SIZE);
+frame_size = (offsetof(heapframe, ovector) +
+  re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
+  ~(HEAPFRAME_ALIGNMENT - 1);
 
 /* Limits set in the pattern override the match context only if they are
 smaller. */
@@ -6648,7 +6852,7 @@ mb->match_frames_top =
 to avoid uninitialized memory read errors when it is copied to a new frame. */
 
 memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
-  re->top_bracket * 2 * sizeof(PCRE2_SIZE));
+  frame_size - offsetof(heapframe, ovector));
 
 /* Pointers to the individual character tables */
 

+ 188 - 285
thirdparty/pcre2/src/pcre2_script_run.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -68,26 +68,26 @@ Arguments:
 Returns:    TRUE if this is a valid script run
 */
 
-/* These dummy values must be less than the negation of the largest offset in
-the PRIV(ucd_script_sets) vector, which is held in a 16-bit field in UCD
-records (and is only likely to be a few hundred). */
+/* These are states in the checking process. */
 
-#define SCRIPT_UNSET        (-99999)
-#define SCRIPT_HANPENDING   (-99998)
-#define SCRIPT_HANHIRAKATA  (-99997)
-#define SCRIPT_HANBOPOMOFO  (-99996)
-#define SCRIPT_HANHANGUL    (-99995)
-#define SCRIPT_LIST         (-99994)
+enum { SCRIPT_UNSET,          /* Requirement as yet unknown */
+       SCRIPT_MAP,            /* Bitmap contains acceptable scripts */
+       SCRIPT_HANPENDING,     /* Have had only Han characters */
+       SCRIPT_HANHIRAKATA,    /* Expect Han or Hirikata */
+       SCRIPT_HANBOPOMOFO,    /* Expect Han or Bopomofo */
+       SCRIPT_HANHANGUL       /* Expect Han or Hangul */
+       };
 
-#define INTERSECTION_LIST_SIZE 50
+#define UCD_MAPSIZE (ucp_Unknown/32 + 1)
+#define FULL_MAPSIZE (ucp_Script_Count/32 + 1)
 
 BOOL
 PRIV(script_run)(PCRE2_SPTR ptr, PCRE2_SPTR endptr, BOOL utf)
 {
 #ifdef SUPPORT_UNICODE
-int require_script = SCRIPT_UNSET;
-uint8_t intersection_list[INTERSECTION_LIST_SIZE];
-const uint8_t *require_list = NULL;
+uint32_t require_state = SCRIPT_UNSET;
+uint32_t require_map[FULL_MAPSIZE];
+uint32_t map[FULL_MAPSIZE];
 uint32_t require_digitset = 0;
 uint32_t c;
 
@@ -101,11 +101,17 @@ if (ptr >= endptr) return TRUE;
 GETCHARINCTEST(c, ptr);
 if (ptr >= endptr) return TRUE;
 
+/* Initialize the require map. This is a full-size bitmap that has a bit for
+every script, as opposed to the maps in ucd_script_sets, which only have bits
+for scripts less than ucp_Unknown - those that appear in script extension
+lists. */
+
+for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] = 0;
+
 /* Scan strings of two or more characters, checking the Unicode characteristics
-of each code point. We make use of the Script Extensions property. There is
-special code for scripts that can be combined with characters from the Han
-Chinese script. This may be used in conjunction with four other scripts in
-these combinations:
+of each code point. There is special code for scripts that can be combined with
+characters from the Han Chinese script. This may be used in conjunction with
+four other scripts in these combinations:
 
 . Han with Hiragana and Katakana is allowed (for Japanese).
 . Han with Bopomofo is allowed (for Taiwanese Mandarin).
@@ -119,310 +125,207 @@ Hence the SCRIPT_HANPENDING state. */
 for (;;)
   {
   const ucd_record *ucd = GET_UCD(c);
-  int32_t scriptx = ucd->scriptx;
+  uint32_t script = ucd->script;
 
-  /* If the script extension is Unknown, the string is not a valid script run.
-  Such characters can only form script runs of length one. */
+  /* If the script is Unknown, the string is not a valid script run. Such
+  characters can only form script runs of length one (see test above). */
 
-  if (scriptx == ucp_Unknown) return FALSE;
+  if (script == ucp_Unknown) return FALSE;
 
-  /* A character whose script extension is Inherited is always accepted with
-  any script, and plays no further part in this testing. A character whose
-  script is Common is always accepted, but must still be tested for a digit
-  below. The scriptx value at this point is non-zero, because zero is
-  ucp_Unknown, tested for above. */
+  /* A character without any script extensions whose script is Inherited or
+  Common is always accepted with any script. If there are extensions, the
+  following processing happens for all scripts. */
 
-  if (scriptx != ucp_Inherited)
+  if (UCD_SCRIPTX_PROP(ucd) != 0 || (script != ucp_Inherited && script != ucp_Common))
     {
-    if (scriptx != ucp_Common)
+    BOOL OK;
+
+    /* Set up a full-sized map for this character that can include bits for all
+    scripts. Copy the scriptx map for this character (which covers those
+    scripts that appear in script extension lists), set the remaining values to
+    zero, and then, except for Common or Inherited, add this script's bit to
+    the map. */
+
+    memcpy(map, PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(ucd), UCD_MAPSIZE * sizeof(uint32_t));
+    memset(map + UCD_MAPSIZE, 0, (FULL_MAPSIZE - UCD_MAPSIZE) * sizeof(uint32_t));
+    if (script != ucp_Common && script != ucp_Inherited) MAPSET(map, script);
+
+    /* Handle the different checking states */
+
+    switch(require_state)
       {
-      /* If the script extension value is positive, the character is not a mark
-      that can be used with many scripts. In the simple case we either set or
-      compare with the required script. However, handling the scripts that can
-      combine with Han are more complicated, as is the case when the previous
-      characters have been man-script marks. */
+      /* First significant character - it might follow Common or Inherited
+      characters that do not have any script extensions. */
 
-      if (scriptx > 0)
+      case SCRIPT_UNSET:
+      switch(script)
         {
-        switch(require_script)
-          {
-          /* Either the first significant character (require_script unset) or
-          after only Han characters. */
-
-          case SCRIPT_UNSET:
-          case SCRIPT_HANPENDING:
-          switch(scriptx)
-            {
-            case ucp_Han:
-            require_script = SCRIPT_HANPENDING;
-            break;
-
-            case ucp_Hiragana:
-            case ucp_Katakana:
-            require_script = SCRIPT_HANHIRAKATA;
-            break;
-
-            case ucp_Bopomofo:
-            require_script = SCRIPT_HANBOPOMOFO;
-            break;
-
-            case ucp_Hangul:
-            require_script = SCRIPT_HANHANGUL;
-            break;
-
-            /* Not a Han-related script. If expecting one, fail. Otherise set
-            the requirement to this script. */
-
-            default:
-            if (require_script == SCRIPT_HANPENDING) return FALSE;
-            require_script = scriptx;
-            break;
-            }
-          break;
+        case ucp_Han:
+        require_state = SCRIPT_HANPENDING;
+        break;
+
+        case ucp_Hiragana:
+        case ucp_Katakana:
+        require_state = SCRIPT_HANHIRAKATA;
+        break;
+
+        case ucp_Bopomofo:
+        require_state = SCRIPT_HANBOPOMOFO;
+        break;
+
+        case ucp_Hangul:
+        require_state = SCRIPT_HANHANGUL;
+        break;
+
+        default:
+        memcpy(require_map, map, FULL_MAPSIZE * sizeof(uint32_t));
+        require_state = SCRIPT_MAP;
+        break;
+        }
+      break;
 
-          /* Previously encountered one of the "with Han" scripts. Check that
-          this character is appropriate. */
+      /* The first significant character was Han. An inspection of the Unicode
+      11.0.0 files shows that there are the following types of Script Extension
+      list that involve the Han, Bopomofo, Hiragana, Katakana, and Hangul
+      scripts:
 
-          case SCRIPT_HANHIRAKATA:
-          if (scriptx != ucp_Han && scriptx != ucp_Hiragana && 
-              scriptx != ucp_Katakana)
-            return FALSE;
-          break;
+      . Bopomofo + Han
+      . Han + Hiragana + Katakana
+      . Hiragana + Katakana
+      . Bopopmofo + Hangul + Han + Hiragana + Katakana
 
-          case SCRIPT_HANBOPOMOFO:
-          if (scriptx != ucp_Han && scriptx != ucp_Bopomofo) return FALSE;
-          break;
+      The following code tries to make sense of this. */
 
-          case SCRIPT_HANHANGUL:
-          if (scriptx != ucp_Han && scriptx != ucp_Hangul) return FALSE;
-          break;
+#define FOUND_BOPOMOFO 1
+#define FOUND_HIRAGANA 2
+#define FOUND_KATAKANA 4
+#define FOUND_HANGUL   8
 
-          /* We have a list of scripts to check that is derived from one or
-          more previous characters. This is either one of the lists in
-          ucd_script_sets[] (for one previous character) or the intersection of
-          several lists for multiple characters. */
-
-          case SCRIPT_LIST:
-            {
-            const uint8_t *list;
-            for (list = require_list; *list != 0; list++)
-              {
-              if (*list == scriptx) break;
-              }
-            if (*list == 0) return FALSE;
-            }
-
-          /* The rest of the string must be in this script, but we have to 
-          allow for the Han complications. */
-          
-          switch(scriptx)
-            {
-            case ucp_Han:
-            require_script = SCRIPT_HANPENDING;
-            break;
-
-            case ucp_Hiragana:
-            case ucp_Katakana:
-            require_script = SCRIPT_HANHIRAKATA;
-            break;
-
-            case ucp_Bopomofo:
-            require_script = SCRIPT_HANBOPOMOFO;
-            break;
-
-            case ucp_Hangul:
-            require_script = SCRIPT_HANHANGUL;
-            break;
-
-            default:
-            require_script = scriptx;
-            break;
-            }  
-          break;
+      case SCRIPT_HANPENDING:
+      if (script != ucp_Han)   /* Another Han does nothing */
+        {
+        uint32_t chspecial = 0;
 
-          /* This is the easy case when a single script is required. */
+        if (MAPBIT(map, ucp_Bopomofo) != 0) chspecial |= FOUND_BOPOMOFO;
+        if (MAPBIT(map, ucp_Hiragana) != 0) chspecial |= FOUND_HIRAGANA;
+        if (MAPBIT(map, ucp_Katakana) != 0) chspecial |= FOUND_KATAKANA;
+        if (MAPBIT(map, ucp_Hangul) != 0)   chspecial |= FOUND_HANGUL;
 
-          default:
-          if (scriptx != require_script) return FALSE;
-          break;
-          }
-        }  /* End of handing positive scriptx */
+        if (chspecial == 0) return FALSE;   /* Not allowed with Han */
 
-      /* If scriptx is negative, this character is a mark-type character that
-      has a list of permitted scripts. */
+        if (chspecial == FOUND_BOPOMOFO)
+          require_state = SCRIPT_HANBOPOMOFO;
+        else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
+          require_state = SCRIPT_HANHIRAKATA;
 
-      else
-        {
-        uint32_t chspecial;
-        const uint8_t *clist, *rlist;
-        const uint8_t *list = PRIV(ucd_script_sets) - scriptx;
-        
-        switch(require_script)
-          {
-          case SCRIPT_UNSET:
-          require_list = PRIV(ucd_script_sets) - scriptx;
-          require_script = SCRIPT_LIST;
-          break;
+        /* Otherwise this character must be allowed with all of them, so remain
+        in the pending state. */
+        }
+      break;
 
-          /* An inspection of the Unicode 11.0.0 files shows that there are the
-          following types of Script Extension list that involve the Han,
-          Bopomofo, Hiragana, Katakana, and Hangul scripts:
+      /* Previously encountered one of the "with Han" scripts. Check that
+      this character is appropriate. */
 
-          . Bopomofo + Han
-          . Han + Hiragana + Katakana
-          . Hiragana + Katakana
-          . Bopopmofo + Hangul + Han + Hiragana + Katakana
+      case SCRIPT_HANHIRAKATA:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hiragana) +
+          MAPBIT(map, ucp_Katakana) == 0) return FALSE;
+      break;
 
-          The following code tries to make sense of this. */
+      case SCRIPT_HANBOPOMOFO:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Bopomofo) == 0) return FALSE;
+      break;
 
-#define FOUND_BOPOMOFO 1
-#define FOUND_HIRAGANA 2
-#define FOUND_KATAKANA 4
-#define FOUND_HANGUL   8
+      case SCRIPT_HANHANGUL:
+      if (MAPBIT(map, ucp_Han) + MAPBIT(map, ucp_Hangul) == 0) return FALSE;
+      break;
 
-          case SCRIPT_HANPENDING:
-          chspecial = 0;
-          for (; *list != 0; list++)
-            {
-            switch (*list)
-              {
-              case ucp_Bopomofo: chspecial |= FOUND_BOPOMOFO; break;
-              case ucp_Hiragana: chspecial |= FOUND_HIRAGANA; break;
-              case ucp_Katakana: chspecial |= FOUND_KATAKANA; break;
-              case ucp_Hangul:   chspecial |= FOUND_HANGUL; break;
-              default: break;
-              }
-            }
-
-           if (chspecial == 0) return FALSE;
-
-           if (chspecial == FOUND_BOPOMOFO)
-             {
-             require_script = SCRIPT_HANBOPOMOFO;
-             }
-           else if (chspecial == (FOUND_HIRAGANA|FOUND_KATAKANA))
-             {
-             require_script = SCRIPT_HANHIRAKATA;
-             }
-
-          /* Otherwise it must be allowed with all of them, so remain in
-          the pending state. */
+      /* Previously encountered one or more characters that are allowed with a
+      list of scripts. */
 
-          break;
+      case SCRIPT_MAP:
+      OK = FALSE;
 
-          case SCRIPT_HANHIRAKATA:
-          for (; *list != 0; list++)
-            {
-            if (*list == ucp_Hiragana || *list == ucp_Katakana) break;
-            }
-          if (*list == 0) return FALSE;
+      for (int i = 0; i < FULL_MAPSIZE; i++)
+        {
+        if ((require_map[i] & map[i]) != 0)
+          {
+          OK = TRUE;
           break;
+          }
+        }
 
-          case SCRIPT_HANBOPOMOFO:
-          for (; *list != 0; list++)
-            {
-            if (*list == ucp_Bopomofo) break;
-            }
-          if (*list == 0) return FALSE;
-          break;
+      if (!OK) return FALSE;
 
-          case SCRIPT_HANHANGUL:
-          for (; *list != 0; list++)
-            {
-            if (*list == ucp_Hangul) break;
-            }
-          if (*list == 0) return FALSE;
-          break;
+      /* The rest of the string must be in this script, but we have to
+      allow for the Han complications. */
 
-          /* Previously encountered one or more characters that are allowed
-          with a list of scripts. Build the intersection of the required list
-          with this character's list in intersection_list[]. This code is
-          written so that it still works OK if the required list is already in
-          that vector. */
-
-          case SCRIPT_LIST:
-            {
-            int i = 0;
-            for (rlist = require_list; *rlist != 0; rlist++)
-              {
-              for (clist = list; *clist != 0; clist++)
-                {
-                if (*rlist == *clist)
-                  {
-                  intersection_list[i++] = *rlist;
-                  break;
-                  }
-                }
-              }
-            if (i == 0) return FALSE;  /* No scripts in common */
-
-            /* If there's just one script in common, we can set it as the
-            unique required script. Otherwise, terminate the intersection list
-            and make it the required list. */
-
-            if (i == 1)
-              {
-              require_script = intersection_list[0];
-              }
-            else
-              {
-              intersection_list[i] = 0;
-              require_list = intersection_list;
-              }
-            }
-          break;
+      switch(script)
+        {
+        case ucp_Han:
+        require_state = SCRIPT_HANPENDING;
+        break;
 
-          /* The previously set required script is a single script, not
-          Han-related. Check that it is in this character's list. */
+        case ucp_Hiragana:
+        case ucp_Katakana:
+        require_state = SCRIPT_HANHIRAKATA;
+        break;
 
-          default:
-          for (; *list != 0; list++)
-            {
-            if (*list == require_script) break;
-            }
-          if (*list == 0) return FALSE;
-          break;
-          }
-        }  /* End of handling negative scriptx */
-      }    /* End of checking non-Common character */
-
-    /* The character is in an acceptable script. We must now ensure that all
-    decimal digits in the string come from the same set. Some scripts (e.g.
-    Common, Arabic) have more than one set of decimal digits. This code does
-    not allow mixing sets, even within the same script. The vector called
-    PRIV(ucd_digit_sets)[] contains, in its first element, the number of
-    following elements, and then, in ascending order, the code points of the
-    '9' characters in every set of 10 digits. Each set is identified by the
-    offset in the vector of its '9' character. An initial check of the first
-    value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
-
-    if (ucd->chartype == ucp_Nd)
-      {
-      uint32_t digitset;
+        case ucp_Bopomofo:
+        require_state = SCRIPT_HANBOPOMOFO;
+        break;
+
+        case ucp_Hangul:
+        require_state = SCRIPT_HANHANGUL;
+        break;
+
+        /* Compute the intersection of the required list of scripts and the
+        allowed scripts for this character. */
 
-      if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
+        default:
+        for (int i = 0; i < FULL_MAPSIZE; i++) require_map[i] &= map[i];
+        break;
+        }
+
+      break;
+      }
+    }   /* End checking character's script and extensions. */
+
+  /* The character is in an acceptable script. We must now ensure that all
+  decimal digits in the string come from the same set. Some scripts (e.g.
+  Common, Arabic) have more than one set of decimal digits. This code does
+  not allow mixing sets, even within the same script. The vector called
+  PRIV(ucd_digit_sets)[] contains, in its first element, the number of
+  following elements, and then, in ascending order, the code points of the
+  '9' characters in every set of 10 digits. Each set is identified by the
+  offset in the vector of its '9' character. An initial check of the first
+  value picks up ASCII digits quickly. Otherwise, a binary chop is used. */
+
+  if (ucd->chartype == ucp_Nd)
+    {
+    uint32_t digitset;
+
+    if (c <= PRIV(ucd_digit_sets)[1]) digitset = 1; else
+      {
+      int mid;
+      int bot = 1;
+      int top = PRIV(ucd_digit_sets)[0];
+      for (;;)
         {
-        int mid;
-        int bot = 1;
-        int top = PRIV(ucd_digit_sets)[0];
-        for (;;)
+        if (top <= bot + 1)    /* <= rather than == is paranoia */
           {
-          if (top <= bot + 1)    /* <= rather than == is paranoia */
-            {
-            digitset = top;
-            break;
-            }
-          mid = (top + bot) / 2;
-          if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
+          digitset = top;
+          break;
           }
+        mid = (top + bot) / 2;
+        if (c <= PRIV(ucd_digit_sets)[mid]) top = mid; else bot = mid;
         }
+      }
 
-      /* A required value of 0 means "unset". */
+    /* A required value of 0 means "unset". */
 
-      if (require_digitset == 0) require_digitset = digitset;
-        else if (digitset != require_digitset) return FALSE;
-      }   /* End digit handling */
-    }     /* End checking non-Inherited character */
+    if (require_digitset == 0) require_digitset = digitset;
+      else if (digitset != require_digitset) return FALSE;
+    }   /* End digit handling */
 
   /* If we haven't yet got to the end, pick up the next character. */
 

+ 1 - 1
thirdparty/pcre2/src/pcre2_string_utils.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2018 University of Cambridge
+          New API code Copyright (c) 2018-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without

+ 2 - 2
thirdparty/pcre2/src/pcre2_study.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -908,7 +908,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
 {
 uint32_t c;
 for (c = 0; c < table_limit; c++)
-  re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
+  re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
 if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
 #endif

+ 24 - 5
thirdparty/pcre2/src/pcre2_substitute.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2020 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -259,6 +259,18 @@ PCRE2_UNSET, so as not to imply an offset in the replacement. */
 
 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
   return PCRE2_ERROR_BADOPTION;
+  
+/* Validate length and find the end of the replacement. A NULL replacement of 
+zero length is interpreted as an empty string. */
+
+if (replacement == NULL) 
+  {
+  if (rlength != 0) return PCRE2_ERROR_NULL;
+  replacement = (PCRE2_SPTR)""; 
+  } 
+   
+if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
+repend = replacement + rlength;
 
 /* Check for using a match that has already happened. Note that the subject
 pointer in the match data may be NULL after a no-match. */
@@ -312,11 +324,18 @@ scb.input = subject;
 scb.output = (PCRE2_SPTR)buffer;
 scb.ovector = ovector;
 
-/* Find lengths of zero-terminated strings and the end of the replacement. */
+/* A NULL subject of zero length is treated as an empty string. */
 
-if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
-if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
-repend = replacement + rlength;
+if (subject == NULL)
+  {
+  if (length != 0) return PCRE2_ERROR_NULL; 
+  subject = (PCRE2_SPTR)"";
+  } 
+
+/* Find length of zero-terminated subject */
+
+if (length == PCRE2_ZERO_TERMINATED)
+  length = subject? PRIV(strlen)(subject) : 0;
 
 /* Check UTF replacement string if necessary. */
 

+ 11 - 646
thirdparty/pcre2/src/pcre2_tables.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2021 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -51,10 +51,10 @@ defined. */
 #include "pcre2_internal.h"
 #endif /* PCRE2_PCRE2TEST */
 
-
 /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
 the definition is next to the definition of the opcodes in pcre2_internal.h.
-This is mode-dependent, so is skipped when this file is included by pcre2test. */
+This is mode-dependent, so it is skipped when this file is included by
+pcre2test. */
 
 #ifndef PCRE2_PCRE2TEST
 const uint8_t PRIV(OP_lengths)[] = { OP_LENGTHS };
@@ -119,6 +119,9 @@ const uint8_t PRIV(utf8_table4)[] = {
 
 #endif /* UTF-8 support needed */
 
+/* Tables concerned with Unicode properties are relevant only when Unicode
+support is enabled. See also the pcre2_ucptables.c file, which is generated by
+a Python script from Unicode data files. */
 
 #ifdef SUPPORT_UNICODE
 
@@ -190,7 +193,7 @@ const uint32_t PRIV(ucp_gbtable)[] = {
    ESZ|(1u<<ucp_gbPrepend)|                             /*  4 Prepend */
        (1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbT)|
        (1u<<ucp_gbLV)|(1u<<ucp_gbLVT)|(1u<<ucp_gbOther)|
-       (1u<<ucp_gbRegionalIndicator),
+       (1u<<ucp_gbRegional_Indicator),
    ESZ,                                                 /*  5 SpacingMark */
    ESZ|(1u<<ucp_gbL)|(1u<<ucp_gbV)|(1u<<ucp_gbLV)|      /*  6 L */
        (1u<<ucp_gbLVT),
@@ -198,7 +201,7 @@ const uint32_t PRIV(ucp_gbtable)[] = {
    ESZ|(1u<<ucp_gbT),                                   /*  8 T */
    ESZ|(1u<<ucp_gbV)|(1u<<ucp_gbT),                     /*  9 LV */
    ESZ|(1u<<ucp_gbT),                                   /* 10 LVT */
-   (1u<<ucp_gbRegionalIndicator),                       /* 11 RegionalIndicator */
+   (1u<<ucp_gbRegional_Indicator),                      /* 11 Regional Indicator */
    ESZ,                                                 /* 12 Other */
    ESZ,                                                 /* 13 ZWJ */
    ESZ|(1u<<ucp_gbExtended_Pictographic)                /* 14 Extended Pictographic */
@@ -221,648 +224,10 @@ const int PRIV(ucp_typerange)[] = {
 };
 #endif /* SUPPORT_JIT */
 
-/* The PRIV(utt)[] table below translates Unicode property names into type and
-code values. It is searched by binary chop, so must be in collating sequence of
-name. Originally, the table contained pointers to the name strings in the first
-field of each entry. However, that leads to a large number of relocations when
-a shared library is dynamically loaded. A significant reduction is made by
-putting all the names into a single, large string and then using offsets in the
-table itself. Maintenance is more error-prone, but frequent changes to this
-data are unlikely.
-
-July 2008: There is now a script called maint/GenerateUtt.py that can be used
-to generate this data automatically instead of maintaining it by hand.
-
-The script was updated in March 2009 to generate a new EBCDIC-compliant
-version. Like all other character and string literals that are compared against
-the regular expression pattern, we must use STR_ macros instead of literal
-strings to make sure that UTF-8 support works on EBCDIC platforms. */
-
-#define STRING_Adlam0 STR_A STR_d STR_l STR_a STR_m "\0"
-#define STRING_Ahom0 STR_A STR_h STR_o STR_m "\0"
-#define STRING_Anatolian_Hieroglyphs0 STR_A STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
-#define STRING_Any0 STR_A STR_n STR_y "\0"
-#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
-#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
-#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
-#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
-#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
-#define STRING_Bassa_Vah0 STR_B STR_a STR_s STR_s STR_a STR_UNDERSCORE STR_V STR_a STR_h "\0"
-#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
-#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
-#define STRING_Bhaiksuki0 STR_B STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
-#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
-#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
-#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
-#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
-#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
-#define STRING_C0 STR_C "\0"
-#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
-#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
-#define STRING_Caucasian_Albanian0 STR_C STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
-#define STRING_Cc0 STR_C STR_c "\0"
-#define STRING_Cf0 STR_C STR_f "\0"
-#define STRING_Chakma0 STR_C STR_h STR_a STR_k STR_m STR_a "\0"
-#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
-#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
-#define STRING_Chorasmian0 STR_C STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
-#define STRING_Cn0 STR_C STR_n "\0"
-#define STRING_Co0 STR_C STR_o "\0"
-#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
-#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
-#define STRING_Cs0 STR_C STR_s "\0"
-#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
-#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
-#define STRING_Cypro_Minoan0 STR_C STR_y STR_p STR_r STR_o STR_UNDERSCORE STR_M STR_i STR_n STR_o STR_a STR_n "\0"
-#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
-#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
-#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
-#define STRING_Dives_Akuru0 STR_D STR_i STR_v STR_e STR_s STR_UNDERSCORE STR_A STR_k STR_u STR_r STR_u "\0"
-#define STRING_Dogra0 STR_D STR_o STR_g STR_r STR_a "\0"
-#define STRING_Duployan0 STR_D STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
-#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
-#define STRING_Elbasan0 STR_E STR_l STR_b STR_a STR_s STR_a STR_n "\0"
-#define STRING_Elymaic0 STR_E STR_l STR_y STR_m STR_a STR_i STR_c "\0"
-#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
-#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
-#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
-#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
-#define STRING_Grantha0 STR_G STR_r STR_a STR_n STR_t STR_h STR_a "\0"
-#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
-#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
-#define STRING_Gunjala_Gondi0 STR_G STR_u STR_n STR_j STR_a STR_l STR_a STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
-#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
-#define STRING_Han0 STR_H STR_a STR_n "\0"
-#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
-#define STRING_Hanifi_Rohingya0 STR_H STR_a STR_n STR_i STR_f STR_i STR_UNDERSCORE STR_R STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
-#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
-#define STRING_Hatran0 STR_H STR_a STR_t STR_r STR_a STR_n "\0"
-#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
-#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
-#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
-#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
-#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
-#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
-#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
-#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
-#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
-#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
-#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
-#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
-#define STRING_Khitan_Small_Script0 STR_K STR_h STR_i STR_t STR_a STR_n STR_UNDERSCORE STR_S STR_m STR_a STR_l STR_l STR_UNDERSCORE STR_S STR_c STR_r STR_i STR_p STR_t "\0"
-#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
-#define STRING_Khojki0 STR_K STR_h STR_o STR_j STR_k STR_i "\0"
-#define STRING_Khudawadi0 STR_K STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
-#define STRING_L0 STR_L "\0"
-#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
-#define STRING_Lao0 STR_L STR_a STR_o "\0"
-#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
-#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
-#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
-#define STRING_Linear_A0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_A "\0"
-#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
-#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
-#define STRING_Ll0 STR_L STR_l "\0"
-#define STRING_Lm0 STR_L STR_m "\0"
-#define STRING_Lo0 STR_L STR_o "\0"
-#define STRING_Lt0 STR_L STR_t "\0"
-#define STRING_Lu0 STR_L STR_u "\0"
-#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
-#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
-#define STRING_M0 STR_M "\0"
-#define STRING_Mahajani0 STR_M STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
-#define STRING_Makasar0 STR_M STR_a STR_k STR_a STR_s STR_a STR_r "\0"
-#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
-#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
-#define STRING_Manichaean0 STR_M STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
-#define STRING_Marchen0 STR_M STR_a STR_r STR_c STR_h STR_e STR_n "\0"
-#define STRING_Masaram_Gondi0 STR_M STR_a STR_s STR_a STR_r STR_a STR_m STR_UNDERSCORE STR_G STR_o STR_n STR_d STR_i "\0"
-#define STRING_Mc0 STR_M STR_c "\0"
-#define STRING_Me0 STR_M STR_e "\0"
-#define STRING_Medefaidrin0 STR_M STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
-#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
-#define STRING_Mende_Kikakui0 STR_M STR_e STR_n STR_d STR_e STR_UNDERSCORE STR_K STR_i STR_k STR_a STR_k STR_u STR_i "\0"
-#define STRING_Meroitic_Cursive0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_C STR_u STR_r STR_s STR_i STR_v STR_e "\0"
-#define STRING_Meroitic_Hieroglyphs0 STR_M STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
-#define STRING_Miao0 STR_M STR_i STR_a STR_o "\0"
-#define STRING_Mn0 STR_M STR_n "\0"
-#define STRING_Modi0 STR_M STR_o STR_d STR_i "\0"
-#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
-#define STRING_Mro0 STR_M STR_r STR_o "\0"
-#define STRING_Multani0 STR_M STR_u STR_l STR_t STR_a STR_n STR_i "\0"
-#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
-#define STRING_N0 STR_N "\0"
-#define STRING_Nabataean0 STR_N STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
-#define STRING_Nandinagari0 STR_N STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
-#define STRING_Nd0 STR_N STR_d "\0"
-#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
-#define STRING_Newa0 STR_N STR_e STR_w STR_a "\0"
-#define STRING_Nko0 STR_N STR_k STR_o "\0"
-#define STRING_Nl0 STR_N STR_l "\0"
-#define STRING_No0 STR_N STR_o "\0"
-#define STRING_Nushu0 STR_N STR_u STR_s STR_h STR_u "\0"
-#define STRING_Nyiakeng_Puachue_Hmong0 STR_N STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_UNDERSCORE STR_P STR_u STR_a STR_c STR_h STR_u STR_e STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
-#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
-#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
-#define STRING_Old_Hungarian0 STR_O STR_l STR_d STR_UNDERSCORE STR_H STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
-#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
-#define STRING_Old_North_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_N STR_o STR_r STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
-#define STRING_Old_Permic0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_m STR_i STR_c "\0"
-#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
-#define STRING_Old_Sogdian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
-#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
-#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
-#define STRING_Old_Uyghur0 STR_O STR_l STR_d STR_UNDERSCORE STR_U STR_y STR_g STR_h STR_u STR_r "\0"
-#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
-#define STRING_Osage0 STR_O STR_s STR_a STR_g STR_e "\0"
-#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
-#define STRING_P0 STR_P "\0"
-#define STRING_Pahawh_Hmong0 STR_P STR_a STR_h STR_a STR_w STR_h STR_UNDERSCORE STR_H STR_m STR_o STR_n STR_g "\0"
-#define STRING_Palmyrene0 STR_P STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
-#define STRING_Pau_Cin_Hau0 STR_P STR_a STR_u STR_UNDERSCORE STR_C STR_i STR_n STR_UNDERSCORE STR_H STR_a STR_u "\0"
-#define STRING_Pc0 STR_P STR_c "\0"
-#define STRING_Pd0 STR_P STR_d "\0"
-#define STRING_Pe0 STR_P STR_e "\0"
-#define STRING_Pf0 STR_P STR_f "\0"
-#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
-#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
-#define STRING_Pi0 STR_P STR_i "\0"
-#define STRING_Po0 STR_P STR_o "\0"
-#define STRING_Ps0 STR_P STR_s "\0"
-#define STRING_Psalter_Pahlavi0 STR_P STR_s STR_a STR_l STR_t STR_e STR_r STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
-#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
-#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
-#define STRING_S0 STR_S "\0"
-#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
-#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
-#define STRING_Sc0 STR_S STR_c "\0"
-#define STRING_Sharada0 STR_S STR_h STR_a STR_r STR_a STR_d STR_a "\0"
-#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
-#define STRING_Siddham0 STR_S STR_i STR_d STR_d STR_h STR_a STR_m "\0"
-#define STRING_SignWriting0 STR_S STR_i STR_g STR_n STR_W STR_r STR_i STR_t STR_i STR_n STR_g "\0"
-#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
-#define STRING_Sk0 STR_S STR_k "\0"
-#define STRING_Sm0 STR_S STR_m "\0"
-#define STRING_So0 STR_S STR_o "\0"
-#define STRING_Sogdian0 STR_S STR_o STR_g STR_d STR_i STR_a STR_n "\0"
-#define STRING_Sora_Sompeng0 STR_S STR_o STR_r STR_a STR_UNDERSCORE STR_S STR_o STR_m STR_p STR_e STR_n STR_g "\0"
-#define STRING_Soyombo0 STR_S STR_o STR_y STR_o STR_m STR_b STR_o "\0"
-#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
-#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
-#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
-#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
-#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
-#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
-#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
-#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
-#define STRING_Takri0 STR_T STR_a STR_k STR_r STR_i "\0"
-#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
-#define STRING_Tangsa0 STR_T STR_a STR_n STR_g STR_s STR_a "\0"
-#define STRING_Tangut0 STR_T STR_a STR_n STR_g STR_u STR_t "\0"
-#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
-#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
-#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
-#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
-#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
-#define STRING_Tirhuta0 STR_T STR_i STR_r STR_h STR_u STR_t STR_a "\0"
-#define STRING_Toto0 STR_T STR_o STR_t STR_o "\0"
-#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
-#define STRING_Unknown0 STR_U STR_n STR_k STR_n STR_o STR_w STR_n "\0"
-#define STRING_Vai0 STR_V STR_a STR_i "\0"
-#define STRING_Vithkuqi0 STR_V STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0"
-#define STRING_Wancho0 STR_W STR_a STR_n STR_c STR_h STR_o "\0"
-#define STRING_Warang_Citi0 STR_W STR_a STR_r STR_a STR_n STR_g STR_UNDERSCORE STR_C STR_i STR_t STR_i "\0"
-#define STRING_Xan0 STR_X STR_a STR_n "\0"
-#define STRING_Xps0 STR_X STR_p STR_s "\0"
-#define STRING_Xsp0 STR_X STR_s STR_p "\0"
-#define STRING_Xuc0 STR_X STR_u STR_c "\0"
-#define STRING_Xwd0 STR_X STR_w STR_d "\0"
-#define STRING_Yezidi0 STR_Y STR_e STR_z STR_i STR_d STR_i "\0"
-#define STRING_Yi0 STR_Y STR_i "\0"
-#define STRING_Z0 STR_Z "\0"
-#define STRING_Zanabazar_Square0 STR_Z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_UNDERSCORE STR_S STR_q STR_u STR_a STR_r STR_e "\0"
-#define STRING_Zl0 STR_Z STR_l "\0"
-#define STRING_Zp0 STR_Z STR_p "\0"
-#define STRING_Zs0 STR_Z STR_s "\0"
-
-const char PRIV(utt_names)[] =
-  STRING_Adlam0
-  STRING_Ahom0
-  STRING_Anatolian_Hieroglyphs0
-  STRING_Any0
-  STRING_Arabic0
-  STRING_Armenian0
-  STRING_Avestan0
-  STRING_Balinese0
-  STRING_Bamum0
-  STRING_Bassa_Vah0
-  STRING_Batak0
-  STRING_Bengali0
-  STRING_Bhaiksuki0
-  STRING_Bopomofo0
-  STRING_Brahmi0
-  STRING_Braille0
-  STRING_Buginese0
-  STRING_Buhid0
-  STRING_C0
-  STRING_Canadian_Aboriginal0
-  STRING_Carian0
-  STRING_Caucasian_Albanian0
-  STRING_Cc0
-  STRING_Cf0
-  STRING_Chakma0
-  STRING_Cham0
-  STRING_Cherokee0
-  STRING_Chorasmian0
-  STRING_Cn0
-  STRING_Co0
-  STRING_Common0
-  STRING_Coptic0
-  STRING_Cs0
-  STRING_Cuneiform0
-  STRING_Cypriot0
-  STRING_Cypro_Minoan0
-  STRING_Cyrillic0
-  STRING_Deseret0
-  STRING_Devanagari0
-  STRING_Dives_Akuru0
-  STRING_Dogra0
-  STRING_Duployan0
-  STRING_Egyptian_Hieroglyphs0
-  STRING_Elbasan0
-  STRING_Elymaic0
-  STRING_Ethiopic0
-  STRING_Georgian0
-  STRING_Glagolitic0
-  STRING_Gothic0
-  STRING_Grantha0
-  STRING_Greek0
-  STRING_Gujarati0
-  STRING_Gunjala_Gondi0
-  STRING_Gurmukhi0
-  STRING_Han0
-  STRING_Hangul0
-  STRING_Hanifi_Rohingya0
-  STRING_Hanunoo0
-  STRING_Hatran0
-  STRING_Hebrew0
-  STRING_Hiragana0
-  STRING_Imperial_Aramaic0
-  STRING_Inherited0
-  STRING_Inscriptional_Pahlavi0
-  STRING_Inscriptional_Parthian0
-  STRING_Javanese0
-  STRING_Kaithi0
-  STRING_Kannada0
-  STRING_Katakana0
-  STRING_Kayah_Li0
-  STRING_Kharoshthi0
-  STRING_Khitan_Small_Script0
-  STRING_Khmer0
-  STRING_Khojki0
-  STRING_Khudawadi0
-  STRING_L0
-  STRING_L_AMPERSAND0
-  STRING_Lao0
-  STRING_Latin0
-  STRING_Lepcha0
-  STRING_Limbu0
-  STRING_Linear_A0
-  STRING_Linear_B0
-  STRING_Lisu0
-  STRING_Ll0
-  STRING_Lm0
-  STRING_Lo0
-  STRING_Lt0
-  STRING_Lu0
-  STRING_Lycian0
-  STRING_Lydian0
-  STRING_M0
-  STRING_Mahajani0
-  STRING_Makasar0
-  STRING_Malayalam0
-  STRING_Mandaic0
-  STRING_Manichaean0
-  STRING_Marchen0
-  STRING_Masaram_Gondi0
-  STRING_Mc0
-  STRING_Me0
-  STRING_Medefaidrin0
-  STRING_Meetei_Mayek0
-  STRING_Mende_Kikakui0
-  STRING_Meroitic_Cursive0
-  STRING_Meroitic_Hieroglyphs0
-  STRING_Miao0
-  STRING_Mn0
-  STRING_Modi0
-  STRING_Mongolian0
-  STRING_Mro0
-  STRING_Multani0
-  STRING_Myanmar0
-  STRING_N0
-  STRING_Nabataean0
-  STRING_Nandinagari0
-  STRING_Nd0
-  STRING_New_Tai_Lue0
-  STRING_Newa0
-  STRING_Nko0
-  STRING_Nl0
-  STRING_No0
-  STRING_Nushu0
-  STRING_Nyiakeng_Puachue_Hmong0
-  STRING_Ogham0
-  STRING_Ol_Chiki0
-  STRING_Old_Hungarian0
-  STRING_Old_Italic0
-  STRING_Old_North_Arabian0
-  STRING_Old_Permic0
-  STRING_Old_Persian0
-  STRING_Old_Sogdian0
-  STRING_Old_South_Arabian0
-  STRING_Old_Turkic0
-  STRING_Old_Uyghur0
-  STRING_Oriya0
-  STRING_Osage0
-  STRING_Osmanya0
-  STRING_P0
-  STRING_Pahawh_Hmong0
-  STRING_Palmyrene0
-  STRING_Pau_Cin_Hau0
-  STRING_Pc0
-  STRING_Pd0
-  STRING_Pe0
-  STRING_Pf0
-  STRING_Phags_Pa0
-  STRING_Phoenician0
-  STRING_Pi0
-  STRING_Po0
-  STRING_Ps0
-  STRING_Psalter_Pahlavi0
-  STRING_Rejang0
-  STRING_Runic0
-  STRING_S0
-  STRING_Samaritan0
-  STRING_Saurashtra0
-  STRING_Sc0
-  STRING_Sharada0
-  STRING_Shavian0
-  STRING_Siddham0
-  STRING_SignWriting0
-  STRING_Sinhala0
-  STRING_Sk0
-  STRING_Sm0
-  STRING_So0
-  STRING_Sogdian0
-  STRING_Sora_Sompeng0
-  STRING_Soyombo0
-  STRING_Sundanese0
-  STRING_Syloti_Nagri0
-  STRING_Syriac0
-  STRING_Tagalog0
-  STRING_Tagbanwa0
-  STRING_Tai_Le0
-  STRING_Tai_Tham0
-  STRING_Tai_Viet0
-  STRING_Takri0
-  STRING_Tamil0
-  STRING_Tangsa0
-  STRING_Tangut0
-  STRING_Telugu0
-  STRING_Thaana0
-  STRING_Thai0
-  STRING_Tibetan0
-  STRING_Tifinagh0
-  STRING_Tirhuta0
-  STRING_Toto0
-  STRING_Ugaritic0
-  STRING_Unknown0
-  STRING_Vai0
-  STRING_Vithkuqi0
-  STRING_Wancho0
-  STRING_Warang_Citi0
-  STRING_Xan0
-  STRING_Xps0
-  STRING_Xsp0
-  STRING_Xuc0
-  STRING_Xwd0
-  STRING_Yezidi0
-  STRING_Yi0
-  STRING_Z0
-  STRING_Zanabazar_Square0
-  STRING_Zl0
-  STRING_Zp0
-  STRING_Zs0;
-
-const ucp_type_table PRIV(utt)[] = {
-  {   0, PT_SC, ucp_Adlam },
-  {   6, PT_SC, ucp_Ahom },
-  {  11, PT_SC, ucp_Anatolian_Hieroglyphs },
-  {  33, PT_ANY, 0 },
-  {  37, PT_SC, ucp_Arabic },
-  {  44, PT_SC, ucp_Armenian },
-  {  53, PT_SC, ucp_Avestan },
-  {  61, PT_SC, ucp_Balinese },
-  {  70, PT_SC, ucp_Bamum },
-  {  76, PT_SC, ucp_Bassa_Vah },
-  {  86, PT_SC, ucp_Batak },
-  {  92, PT_SC, ucp_Bengali },
-  { 100, PT_SC, ucp_Bhaiksuki },
-  { 110, PT_SC, ucp_Bopomofo },
-  { 119, PT_SC, ucp_Brahmi },
-  { 126, PT_SC, ucp_Braille },
-  { 134, PT_SC, ucp_Buginese },
-  { 143, PT_SC, ucp_Buhid },
-  { 149, PT_GC, ucp_C },
-  { 151, PT_SC, ucp_Canadian_Aboriginal },
-  { 171, PT_SC, ucp_Carian },
-  { 178, PT_SC, ucp_Caucasian_Albanian },
-  { 197, PT_PC, ucp_Cc },
-  { 200, PT_PC, ucp_Cf },
-  { 203, PT_SC, ucp_Chakma },
-  { 210, PT_SC, ucp_Cham },
-  { 215, PT_SC, ucp_Cherokee },
-  { 224, PT_SC, ucp_Chorasmian },
-  { 235, PT_PC, ucp_Cn },
-  { 238, PT_PC, ucp_Co },
-  { 241, PT_SC, ucp_Common },
-  { 248, PT_SC, ucp_Coptic },
-  { 255, PT_PC, ucp_Cs },
-  { 258, PT_SC, ucp_Cuneiform },
-  { 268, PT_SC, ucp_Cypriot },
-  { 276, PT_SC, ucp_Cypro_Minoan },
-  { 289, PT_SC, ucp_Cyrillic },
-  { 298, PT_SC, ucp_Deseret },
-  { 306, PT_SC, ucp_Devanagari },
-  { 317, PT_SC, ucp_Dives_Akuru },
-  { 329, PT_SC, ucp_Dogra },
-  { 335, PT_SC, ucp_Duployan },
-  { 344, PT_SC, ucp_Egyptian_Hieroglyphs },
-  { 365, PT_SC, ucp_Elbasan },
-  { 373, PT_SC, ucp_Elymaic },
-  { 381, PT_SC, ucp_Ethiopic },
-  { 390, PT_SC, ucp_Georgian },
-  { 399, PT_SC, ucp_Glagolitic },
-  { 410, PT_SC, ucp_Gothic },
-  { 417, PT_SC, ucp_Grantha },
-  { 425, PT_SC, ucp_Greek },
-  { 431, PT_SC, ucp_Gujarati },
-  { 440, PT_SC, ucp_Gunjala_Gondi },
-  { 454, PT_SC, ucp_Gurmukhi },
-  { 463, PT_SC, ucp_Han },
-  { 467, PT_SC, ucp_Hangul },
-  { 474, PT_SC, ucp_Hanifi_Rohingya },
-  { 490, PT_SC, ucp_Hanunoo },
-  { 498, PT_SC, ucp_Hatran },
-  { 505, PT_SC, ucp_Hebrew },
-  { 512, PT_SC, ucp_Hiragana },
-  { 521, PT_SC, ucp_Imperial_Aramaic },
-  { 538, PT_SC, ucp_Inherited },
-  { 548, PT_SC, ucp_Inscriptional_Pahlavi },
-  { 570, PT_SC, ucp_Inscriptional_Parthian },
-  { 593, PT_SC, ucp_Javanese },
-  { 602, PT_SC, ucp_Kaithi },
-  { 609, PT_SC, ucp_Kannada },
-  { 617, PT_SC, ucp_Katakana },
-  { 626, PT_SC, ucp_Kayah_Li },
-  { 635, PT_SC, ucp_Kharoshthi },
-  { 646, PT_SC, ucp_Khitan_Small_Script },
-  { 666, PT_SC, ucp_Khmer },
-  { 672, PT_SC, ucp_Khojki },
-  { 679, PT_SC, ucp_Khudawadi },
-  { 689, PT_GC, ucp_L },
-  { 691, PT_LAMP, 0 },
-  { 694, PT_SC, ucp_Lao },
-  { 698, PT_SC, ucp_Latin },
-  { 704, PT_SC, ucp_Lepcha },
-  { 711, PT_SC, ucp_Limbu },
-  { 717, PT_SC, ucp_Linear_A },
-  { 726, PT_SC, ucp_Linear_B },
-  { 735, PT_SC, ucp_Lisu },
-  { 740, PT_PC, ucp_Ll },
-  { 743, PT_PC, ucp_Lm },
-  { 746, PT_PC, ucp_Lo },
-  { 749, PT_PC, ucp_Lt },
-  { 752, PT_PC, ucp_Lu },
-  { 755, PT_SC, ucp_Lycian },
-  { 762, PT_SC, ucp_Lydian },
-  { 769, PT_GC, ucp_M },
-  { 771, PT_SC, ucp_Mahajani },
-  { 780, PT_SC, ucp_Makasar },
-  { 788, PT_SC, ucp_Malayalam },
-  { 798, PT_SC, ucp_Mandaic },
-  { 806, PT_SC, ucp_Manichaean },
-  { 817, PT_SC, ucp_Marchen },
-  { 825, PT_SC, ucp_Masaram_Gondi },
-  { 839, PT_PC, ucp_Mc },
-  { 842, PT_PC, ucp_Me },
-  { 845, PT_SC, ucp_Medefaidrin },
-  { 857, PT_SC, ucp_Meetei_Mayek },
-  { 870, PT_SC, ucp_Mende_Kikakui },
-  { 884, PT_SC, ucp_Meroitic_Cursive },
-  { 901, PT_SC, ucp_Meroitic_Hieroglyphs },
-  { 922, PT_SC, ucp_Miao },
-  { 927, PT_PC, ucp_Mn },
-  { 930, PT_SC, ucp_Modi },
-  { 935, PT_SC, ucp_Mongolian },
-  { 945, PT_SC, ucp_Mro },
-  { 949, PT_SC, ucp_Multani },
-  { 957, PT_SC, ucp_Myanmar },
-  { 965, PT_GC, ucp_N },
-  { 967, PT_SC, ucp_Nabataean },
-  { 977, PT_SC, ucp_Nandinagari },
-  { 989, PT_PC, ucp_Nd },
-  { 992, PT_SC, ucp_New_Tai_Lue },
-  { 1004, PT_SC, ucp_Newa },
-  { 1009, PT_SC, ucp_Nko },
-  { 1013, PT_PC, ucp_Nl },
-  { 1016, PT_PC, ucp_No },
-  { 1019, PT_SC, ucp_Nushu },
-  { 1025, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
-  { 1048, PT_SC, ucp_Ogham },
-  { 1054, PT_SC, ucp_Ol_Chiki },
-  { 1063, PT_SC, ucp_Old_Hungarian },
-  { 1077, PT_SC, ucp_Old_Italic },
-  { 1088, PT_SC, ucp_Old_North_Arabian },
-  { 1106, PT_SC, ucp_Old_Permic },
-  { 1117, PT_SC, ucp_Old_Persian },
-  { 1129, PT_SC, ucp_Old_Sogdian },
-  { 1141, PT_SC, ucp_Old_South_Arabian },
-  { 1159, PT_SC, ucp_Old_Turkic },
-  { 1170, PT_SC, ucp_Old_Uyghur },
-  { 1181, PT_SC, ucp_Oriya },
-  { 1187, PT_SC, ucp_Osage },
-  { 1193, PT_SC, ucp_Osmanya },
-  { 1201, PT_GC, ucp_P },
-  { 1203, PT_SC, ucp_Pahawh_Hmong },
-  { 1216, PT_SC, ucp_Palmyrene },
-  { 1226, PT_SC, ucp_Pau_Cin_Hau },
-  { 1238, PT_PC, ucp_Pc },
-  { 1241, PT_PC, ucp_Pd },
-  { 1244, PT_PC, ucp_Pe },
-  { 1247, PT_PC, ucp_Pf },
-  { 1250, PT_SC, ucp_Phags_Pa },
-  { 1259, PT_SC, ucp_Phoenician },
-  { 1270, PT_PC, ucp_Pi },
-  { 1273, PT_PC, ucp_Po },
-  { 1276, PT_PC, ucp_Ps },
-  { 1279, PT_SC, ucp_Psalter_Pahlavi },
-  { 1295, PT_SC, ucp_Rejang },
-  { 1302, PT_SC, ucp_Runic },
-  { 1308, PT_GC, ucp_S },
-  { 1310, PT_SC, ucp_Samaritan },
-  { 1320, PT_SC, ucp_Saurashtra },
-  { 1331, PT_PC, ucp_Sc },
-  { 1334, PT_SC, ucp_Sharada },
-  { 1342, PT_SC, ucp_Shavian },
-  { 1350, PT_SC, ucp_Siddham },
-  { 1358, PT_SC, ucp_SignWriting },
-  { 1370, PT_SC, ucp_Sinhala },
-  { 1378, PT_PC, ucp_Sk },
-  { 1381, PT_PC, ucp_Sm },
-  { 1384, PT_PC, ucp_So },
-  { 1387, PT_SC, ucp_Sogdian },
-  { 1395, PT_SC, ucp_Sora_Sompeng },
-  { 1408, PT_SC, ucp_Soyombo },
-  { 1416, PT_SC, ucp_Sundanese },
-  { 1426, PT_SC, ucp_Syloti_Nagri },
-  { 1439, PT_SC, ucp_Syriac },
-  { 1446, PT_SC, ucp_Tagalog },
-  { 1454, PT_SC, ucp_Tagbanwa },
-  { 1463, PT_SC, ucp_Tai_Le },
-  { 1470, PT_SC, ucp_Tai_Tham },
-  { 1479, PT_SC, ucp_Tai_Viet },
-  { 1488, PT_SC, ucp_Takri },
-  { 1494, PT_SC, ucp_Tamil },
-  { 1500, PT_SC, ucp_Tangsa },
-  { 1507, PT_SC, ucp_Tangut },
-  { 1514, PT_SC, ucp_Telugu },
-  { 1521, PT_SC, ucp_Thaana },
-  { 1528, PT_SC, ucp_Thai },
-  { 1533, PT_SC, ucp_Tibetan },
-  { 1541, PT_SC, ucp_Tifinagh },
-  { 1550, PT_SC, ucp_Tirhuta },
-  { 1558, PT_SC, ucp_Toto },
-  { 1563, PT_SC, ucp_Ugaritic },
-  { 1572, PT_SC, ucp_Unknown },
-  { 1580, PT_SC, ucp_Vai },
-  { 1584, PT_SC, ucp_Vithkuqi },
-  { 1593, PT_SC, ucp_Wancho },
-  { 1600, PT_SC, ucp_Warang_Citi },
-  { 1612, PT_ALNUM, 0 },
-  { 1616, PT_PXSPACE, 0 },
-  { 1620, PT_SPACE, 0 },
-  { 1624, PT_UCNC, 0 },
-  { 1628, PT_WORD, 0 },
-  { 1632, PT_SC, ucp_Yezidi },
-  { 1639, PT_SC, ucp_Yi },
-  { 1642, PT_GC, ucp_Z },
-  { 1644, PT_SC, ucp_Zanabazar_Square },
-  { 1661, PT_PC, ucp_Zl },
-  { 1664, PT_PC, ucp_Zp },
-  { 1667, PT_PC, ucp_Zs }
-};
+/* Finally, include the tables that are auto-generated from the Unicode data
+files. */
 
-const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+#include "pcre2_ucptables.c"
 
 #endif /* SUPPORT_UNICODE */
 

文件差异内容过多而无法显示
+ 1778 - 1138
thirdparty/pcre2/src/pcre2_ucd.c


+ 221 - 132
thirdparty/pcre2/src/pcre2_ucp.h

@@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2018 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
+a new version of this code.
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE.
 -----------------------------------------------------------------------------
 */
 
-
 #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
 #define PCRE2_UCP_H_IDEMPOTENT_GUARD
 
-/* This file contains definitions of the property values that are returned by
-the UCD access macros. New values that are added for new releases of Unicode
-should always be at the end of each enum, for backwards compatibility.
+/* This file contains definitions of the Unicode property values that are
+returned by the UCD access macros and used throughout PCRE2.
 
-IMPORTANT: Note also that the specific numeric values of the enums have to be
-the same as the values that are generated by the maint/MultiStage2.py script,
-where the equivalent property descriptive names are listed in vectors.
-
-ALSO: The specific values of the first two enums are assumed for the table
-called catposstab in pcre2_compile.c. */
+IMPORTANT: The specific values of the first two enums (general and particular
+character categories) are assumed by the table called catposstab in the file
+pcre2_auto_possess.c. They are unlikely to change, but should be checked after
+an update. */
 
 /* These are the general character categories. */
 
 enum {
-  ucp_C,     /* Other */
-  ucp_L,     /* Letter */
-  ucp_M,     /* Mark */
-  ucp_N,     /* Number */
-  ucp_P,     /* Punctuation */
-  ucp_S,     /* Symbol */
-  ucp_Z      /* Separator */
+  ucp_C,
+  ucp_L,
+  ucp_M,
+  ucp_N,
+  ucp_P,
+  ucp_S,
+  ucp_Z,
 };
 
 /* These are the particular character categories. */
@@ -97,7 +97,98 @@ enum {
   ucp_So,    /* Other symbol */
   ucp_Zl,    /* Line separator */
   ucp_Zp,    /* Paragraph separator */
-  ucp_Zs     /* Space separator */
+  ucp_Zs,    /* Space separator */
+};
+
+/* These are Boolean properties. */
+
+enum {
+  ucp_ASCII,
+  ucp_ASCII_Hex_Digit,
+  ucp_Alphabetic,
+  ucp_Bidi_Control,
+  ucp_Bidi_Mirrored,
+  ucp_Case_Ignorable,
+  ucp_Cased,
+  ucp_Changes_When_Casefolded,
+  ucp_Changes_When_Casemapped,
+  ucp_Changes_When_Lowercased,
+  ucp_Changes_When_Titlecased,
+  ucp_Changes_When_Uppercased,
+  ucp_Dash,
+  ucp_Default_Ignorable_Code_Point,
+  ucp_Deprecated,
+  ucp_Diacritic,
+  ucp_Emoji,
+  ucp_Emoji_Component,
+  ucp_Emoji_Modifier,
+  ucp_Emoji_Modifier_Base,
+  ucp_Emoji_Presentation,
+  ucp_Extended_Pictographic,
+  ucp_Extender,
+  ucp_Grapheme_Base,
+  ucp_Grapheme_Extend,
+  ucp_Grapheme_Link,
+  ucp_Hex_Digit,
+  ucp_IDS_Binary_Operator,
+  ucp_IDS_Trinary_Operator,
+  ucp_ID_Continue,
+  ucp_ID_Start,
+  ucp_Ideographic,
+  ucp_Join_Control,
+  ucp_Logical_Order_Exception,
+  ucp_Lowercase,
+  ucp_Math,
+  ucp_Noncharacter_Code_Point,
+  ucp_Pattern_Syntax,
+  ucp_Pattern_White_Space,
+  ucp_Prepended_Concatenation_Mark,
+  ucp_Quotation_Mark,
+  ucp_Radical,
+  ucp_Regional_Indicator,
+  ucp_Sentence_Terminal,
+  ucp_Soft_Dotted,
+  ucp_Terminal_Punctuation,
+  ucp_Unified_Ideograph,
+  ucp_Uppercase,
+  ucp_Variation_Selector,
+  ucp_White_Space,
+  ucp_XID_Continue,
+  ucp_XID_Start,
+  /* This must be last */
+  ucp_Bprop_Count
+};
+
+/* Size of entries in ucd_boolprop_sets[] */
+
+#define ucd_boolprop_sets_item_size 2
+
+/* These are the bidi class values. */
+
+enum {
+  ucp_bidiAL,   /* Arabic letter */
+  ucp_bidiAN,   /* Arabic number */
+  ucp_bidiB,    /* Paragraph separator */
+  ucp_bidiBN,   /* Boundary neutral */
+  ucp_bidiCS,   /* Common separator */
+  ucp_bidiEN,   /* European number */
+  ucp_bidiES,   /* European separator */
+  ucp_bidiET,   /* European terminator */
+  ucp_bidiFSI,  /* First strong isolate */
+  ucp_bidiL,    /* Left to right */
+  ucp_bidiLRE,  /* Left to right embedding */
+  ucp_bidiLRI,  /* Left to right isolate */
+  ucp_bidiLRO,  /* Left to right override */
+  ucp_bidiNSM,  /* Non-spacing mark */
+  ucp_bidiON,   /* Other neutral */
+  ucp_bidiPDF,  /* Pop directional format */
+  ucp_bidiPDI,  /* Pop directional isolate */
+  ucp_bidiR,    /* Right to left */
+  ucp_bidiRLE,  /* Right to left embedding */
+  ucp_bidiRLI,  /* Right to left isolate */
+  ucp_bidiRLO,  /* Right to left override */
+  ucp_bidiS,    /* Segment separator */
+  ucp_bidiWS,   /* White space */
 };
 
 /* These are grapheme break properties. The Extended Pictographic property
@@ -115,191 +206,189 @@ enum {
   ucp_gbT,                     /*  8 Hangul syllable type T */
   ucp_gbLV,                    /*  9 Hangul syllable type LV */
   ucp_gbLVT,                   /* 10 Hangul syllable type LVT */
-  ucp_gbRegionalIndicator,     /* 11 */
+  ucp_gbRegional_Indicator,    /* 11 */
   ucp_gbOther,                 /* 12 */
   ucp_gbZWJ,                   /* 13 */
-  ucp_gbExtended_Pictographic  /* 14 */
+  ucp_gbExtended_Pictographic, /* 14 */
 };
 
 /* These are the script identifications. */
 
 enum {
-  ucp_Unknown,
-  ucp_Arabic,
-  ucp_Armenian,
-  ucp_Bengali,
-  ucp_Bopomofo,
-  ucp_Braille,
-  ucp_Buginese,
-  ucp_Buhid,
-  ucp_Canadian_Aboriginal,
-  ucp_Cherokee,
-  ucp_Common,
-  ucp_Coptic,
-  ucp_Cypriot,
+  /* Scripts which has characters in other scripts. */
+  ucp_Latin,
+  ucp_Greek,
   ucp_Cyrillic,
-  ucp_Deseret,
+  ucp_Arabic,
+  ucp_Syriac,
+  ucp_Thaana,
   ucp_Devanagari,
-  ucp_Ethiopic,
-  ucp_Georgian,
-  ucp_Glagolitic,
-  ucp_Gothic,
-  ucp_Greek,
-  ucp_Gujarati,
+  ucp_Bengali,
   ucp_Gurmukhi,
-  ucp_Han,
-  ucp_Hangul,
-  ucp_Hanunoo,
-  ucp_Hebrew,
-  ucp_Hiragana,
-  ucp_Inherited,
+  ucp_Gujarati,
+  ucp_Oriya,
+  ucp_Tamil,
+  ucp_Telugu,
   ucp_Kannada,
-  ucp_Katakana,
-  ucp_Kharoshthi,
-  ucp_Khmer,
-  ucp_Lao,
-  ucp_Latin,
-  ucp_Limbu,
-  ucp_Linear_B,
   ucp_Malayalam,
-  ucp_Mongolian,
-  ucp_Myanmar,
-  ucp_New_Tai_Lue,
-  ucp_Ogham,
-  ucp_Old_Italic,
-  ucp_Old_Persian,
-  ucp_Oriya,
-  ucp_Osmanya,
-  ucp_Runic,
-  ucp_Shavian,
   ucp_Sinhala,
-  ucp_Syloti_Nagri,
-  ucp_Syriac,
+  ucp_Myanmar,
+  ucp_Georgian,
+  ucp_Hangul,
+  ucp_Mongolian,
+  ucp_Hiragana,
+  ucp_Katakana,
+  ucp_Bopomofo,
+  ucp_Han,
+  ucp_Yi,
   ucp_Tagalog,
+  ucp_Hanunoo,
+  ucp_Buhid,
   ucp_Tagbanwa,
+  ucp_Limbu,
   ucp_Tai_Le,
-  ucp_Tamil,
-  ucp_Telugu,
-  ucp_Thaana,
+  ucp_Linear_B,
+  ucp_Cypriot,
+  ucp_Buginese,
+  ucp_Coptic,
+  ucp_Glagolitic,
+  ucp_Syloti_Nagri,
+  ucp_Phags_Pa,
+  ucp_Nko,
+  ucp_Kayah_Li,
+  ucp_Javanese,
+  ucp_Kaithi,
+  ucp_Mandaic,
+  ucp_Chakma,
+  ucp_Sharada,
+  ucp_Takri,
+  ucp_Duployan,
+  ucp_Grantha,
+  ucp_Khojki,
+  ucp_Linear_A,
+  ucp_Mahajani,
+  ucp_Manichaean,
+  ucp_Modi,
+  ucp_Old_Permic,
+  ucp_Psalter_Pahlavi,
+  ucp_Khudawadi,
+  ucp_Tirhuta,
+  ucp_Multani,
+  ucp_Adlam,
+  ucp_Masaram_Gondi,
+  ucp_Dogra,
+  ucp_Gunjala_Gondi,
+  ucp_Hanifi_Rohingya,
+  ucp_Sogdian,
+  ucp_Nandinagari,
+  ucp_Yezidi,
+  ucp_Cypro_Minoan,
+  ucp_Old_Uyghur,
+
+  /* Scripts which has no characters in other scripts. */
+  ucp_Unknown,
+  ucp_Common,
+  ucp_Armenian,
+  ucp_Hebrew,
   ucp_Thai,
+  ucp_Lao,
   ucp_Tibetan,
-  ucp_Tifinagh,
+  ucp_Ethiopic,
+  ucp_Cherokee,
+  ucp_Canadian_Aboriginal,
+  ucp_Ogham,
+  ucp_Runic,
+  ucp_Khmer,
+  ucp_Old_Italic,
+  ucp_Gothic,
+  ucp_Deseret,
+  ucp_Inherited,
   ucp_Ugaritic,
-  ucp_Yi,
-  /* New for Unicode 5.0 */
+  ucp_Shavian,
+  ucp_Osmanya,
+  ucp_Braille,
+  ucp_New_Tai_Lue,
+  ucp_Tifinagh,
+  ucp_Old_Persian,
+  ucp_Kharoshthi,
   ucp_Balinese,
   ucp_Cuneiform,
-  ucp_Nko,
-  ucp_Phags_Pa,
   ucp_Phoenician,
-  /* New for Unicode 5.1 */
-  ucp_Carian,
-  ucp_Cham,
-  ucp_Kayah_Li,
+  ucp_Sundanese,
   ucp_Lepcha,
-  ucp_Lycian,
-  ucp_Lydian,
   ucp_Ol_Chiki,
-  ucp_Rejang,
-  ucp_Saurashtra,
-  ucp_Sundanese,
   ucp_Vai,
-  /* New for Unicode 5.2 */
+  ucp_Saurashtra,
+  ucp_Rejang,
+  ucp_Lycian,
+  ucp_Carian,
+  ucp_Lydian,
+  ucp_Cham,
+  ucp_Tai_Tham,
+  ucp_Tai_Viet,
   ucp_Avestan,
-  ucp_Bamum,
   ucp_Egyptian_Hieroglyphs,
-  ucp_Imperial_Aramaic,
-  ucp_Inscriptional_Pahlavi,
-  ucp_Inscriptional_Parthian,
-  ucp_Javanese,
-  ucp_Kaithi,
+  ucp_Samaritan,
   ucp_Lisu,
+  ucp_Bamum,
   ucp_Meetei_Mayek,
+  ucp_Imperial_Aramaic,
   ucp_Old_South_Arabian,
+  ucp_Inscriptional_Parthian,
+  ucp_Inscriptional_Pahlavi,
   ucp_Old_Turkic,
-  ucp_Samaritan,
-  ucp_Tai_Tham,
-  ucp_Tai_Viet,
-  /* New for Unicode 6.0.0 */
   ucp_Batak,
   ucp_Brahmi,
-  ucp_Mandaic,
-  /* New for Unicode 6.1.0 */
-  ucp_Chakma,
   ucp_Meroitic_Cursive,
   ucp_Meroitic_Hieroglyphs,
   ucp_Miao,
-  ucp_Sharada,
   ucp_Sora_Sompeng,
-  ucp_Takri,
-  /* New for Unicode 7.0.0 */
-  ucp_Bassa_Vah,
   ucp_Caucasian_Albanian,
-  ucp_Duployan,
+  ucp_Bassa_Vah,
   ucp_Elbasan,
-  ucp_Grantha,
-  ucp_Khojki,
-  ucp_Khudawadi,
-  ucp_Linear_A,
-  ucp_Mahajani,
-  ucp_Manichaean,
+  ucp_Pahawh_Hmong,
   ucp_Mende_Kikakui,
-  ucp_Modi,
   ucp_Mro,
-  ucp_Nabataean,
   ucp_Old_North_Arabian,
-  ucp_Old_Permic,
-  ucp_Pahawh_Hmong,
+  ucp_Nabataean,
   ucp_Palmyrene,
-  ucp_Psalter_Pahlavi,
   ucp_Pau_Cin_Hau,
   ucp_Siddham,
-  ucp_Tirhuta,
   ucp_Warang_Citi,
-  /* New for Unicode 8.0.0 */
   ucp_Ahom,
   ucp_Anatolian_Hieroglyphs,
   ucp_Hatran,
-  ucp_Multani,
   ucp_Old_Hungarian,
   ucp_SignWriting,
-  /* New for Unicode 10.0.0 (no update since 8.0.0) */
-  ucp_Adlam,
   ucp_Bhaiksuki,
   ucp_Marchen,
   ucp_Newa,
   ucp_Osage,
   ucp_Tangut,
-  ucp_Masaram_Gondi,
   ucp_Nushu,
   ucp_Soyombo,
   ucp_Zanabazar_Square,
-  /* New for Unicode 11.0.0 */
-  ucp_Dogra,
-  ucp_Gunjala_Gondi,
-  ucp_Hanifi_Rohingya,
   ucp_Makasar,
   ucp_Medefaidrin,
   ucp_Old_Sogdian,
-  ucp_Sogdian,
-  /* New for Unicode 12.0.0 */
   ucp_Elymaic,
-  ucp_Nandinagari,
   ucp_Nyiakeng_Puachue_Hmong,
   ucp_Wancho,
-  /* New for Unicode 13.0.0 */
   ucp_Chorasmian,
   ucp_Dives_Akuru,
   ucp_Khitan_Small_Script,
-  ucp_Yezidi,
-  /* New for Unicode 14.0.0 */
-  ucp_Cypro_Minoan,
-  ucp_Old_Uyghur,
   ucp_Tangsa,
   ucp_Toto,
-  ucp_Vithkuqi
+  ucp_Vithkuqi,
+
+  /* This must be last */
+  ucp_Script_Count
 };
 
+/* Size of entries in ucd_script_sets[] */
+
+#define ucd_script_sets_item_size 3
+
 #endif  /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
 
 /* End of pcre2_ucp.h */

+ 1524 - 0
thirdparty/pcre2/src/pcre2_ucptables.c

@@ -0,0 +1,1524 @@
+/*************************************************
+*      Perl-Compatible Regular Expressions       *
+*************************************************/
+
+/* PCRE is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+                       Written by Philip Hazel
+     Original API code Copyright (c) 1997-2012 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
+
+This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
+Instead, modify the maint/GenerateUcpTables.py script and run it to generate
+a new version of this code.
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of its
+      contributors may be used to endorse or promote products derived from
+      this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifdef SUPPORT_UNICODE
+
+/* The PRIV(utt)[] table below translates Unicode property names into type and
+code values. It is searched by binary chop, so must be in collating sequence of
+name. Originally, the table contained pointers to the name strings in the first
+field of each entry. However, that leads to a large number of relocations when
+a shared library is dynamically loaded. A significant reduction is made by
+putting all the names into a single, large string and using offsets instead.
+All letters are lower cased, and underscores are removed, in accordance with
+the "loose matching" rules that Unicode advises and Perl uses. */
+
+#define STRING_adlam0 STR_a STR_d STR_l STR_a STR_m "\0"
+#define STRING_adlm0 STR_a STR_d STR_l STR_m "\0"
+#define STRING_aghb0 STR_a STR_g STR_h STR_b "\0"
+#define STRING_ahex0 STR_a STR_h STR_e STR_x "\0"
+#define STRING_ahom0 STR_a STR_h STR_o STR_m "\0"
+#define STRING_alpha0 STR_a STR_l STR_p STR_h STR_a "\0"
+#define STRING_alphabetic0 STR_a STR_l STR_p STR_h STR_a STR_b STR_e STR_t STR_i STR_c "\0"
+#define STRING_anatolianhieroglyphs0 STR_a STR_n STR_a STR_t STR_o STR_l STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_any0 STR_a STR_n STR_y "\0"
+#define STRING_arab0 STR_a STR_r STR_a STR_b "\0"
+#define STRING_arabic0 STR_a STR_r STR_a STR_b STR_i STR_c "\0"
+#define STRING_armenian0 STR_a STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
+#define STRING_armi0 STR_a STR_r STR_m STR_i "\0"
+#define STRING_armn0 STR_a STR_r STR_m STR_n "\0"
+#define STRING_ascii0 STR_a STR_s STR_c STR_i STR_i "\0"
+#define STRING_asciihexdigit0 STR_a STR_s STR_c STR_i STR_i STR_h STR_e STR_x STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_avestan0 STR_a STR_v STR_e STR_s STR_t STR_a STR_n "\0"
+#define STRING_avst0 STR_a STR_v STR_s STR_t "\0"
+#define STRING_bali0 STR_b STR_a STR_l STR_i "\0"
+#define STRING_balinese0 STR_b STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
+#define STRING_bamu0 STR_b STR_a STR_m STR_u "\0"
+#define STRING_bamum0 STR_b STR_a STR_m STR_u STR_m "\0"
+#define STRING_bass0 STR_b STR_a STR_s STR_s "\0"
+#define STRING_bassavah0 STR_b STR_a STR_s STR_s STR_a STR_v STR_a STR_h "\0"
+#define STRING_batak0 STR_b STR_a STR_t STR_a STR_k "\0"
+#define STRING_batk0 STR_b STR_a STR_t STR_k "\0"
+#define STRING_beng0 STR_b STR_e STR_n STR_g "\0"
+#define STRING_bengali0 STR_b STR_e STR_n STR_g STR_a STR_l STR_i "\0"
+#define STRING_bhaiksuki0 STR_b STR_h STR_a STR_i STR_k STR_s STR_u STR_k STR_i "\0"
+#define STRING_bhks0 STR_b STR_h STR_k STR_s "\0"
+#define STRING_bidial0 STR_b STR_i STR_d STR_i STR_a STR_l "\0"
+#define STRING_bidian0 STR_b STR_i STR_d STR_i STR_a STR_n "\0"
+#define STRING_bidib0 STR_b STR_i STR_d STR_i STR_b "\0"
+#define STRING_bidibn0 STR_b STR_i STR_d STR_i STR_b STR_n "\0"
+#define STRING_bidic0 STR_b STR_i STR_d STR_i STR_c "\0"
+#define STRING_bidicontrol0 STR_b STR_i STR_d STR_i STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0"
+#define STRING_bidics0 STR_b STR_i STR_d STR_i STR_c STR_s "\0"
+#define STRING_bidien0 STR_b STR_i STR_d STR_i STR_e STR_n "\0"
+#define STRING_bidies0 STR_b STR_i STR_d STR_i STR_e STR_s "\0"
+#define STRING_bidiet0 STR_b STR_i STR_d STR_i STR_e STR_t "\0"
+#define STRING_bidifsi0 STR_b STR_i STR_d STR_i STR_f STR_s STR_i "\0"
+#define STRING_bidil0 STR_b STR_i STR_d STR_i STR_l "\0"
+#define STRING_bidilre0 STR_b STR_i STR_d STR_i STR_l STR_r STR_e "\0"
+#define STRING_bidilri0 STR_b STR_i STR_d STR_i STR_l STR_r STR_i "\0"
+#define STRING_bidilro0 STR_b STR_i STR_d STR_i STR_l STR_r STR_o "\0"
+#define STRING_bidim0 STR_b STR_i STR_d STR_i STR_m "\0"
+#define STRING_bidimirrored0 STR_b STR_i STR_d STR_i STR_m STR_i STR_r STR_r STR_o STR_r STR_e STR_d "\0"
+#define STRING_bidinsm0 STR_b STR_i STR_d STR_i STR_n STR_s STR_m "\0"
+#define STRING_bidion0 STR_b STR_i STR_d STR_i STR_o STR_n "\0"
+#define STRING_bidipdf0 STR_b STR_i STR_d STR_i STR_p STR_d STR_f "\0"
+#define STRING_bidipdi0 STR_b STR_i STR_d STR_i STR_p STR_d STR_i "\0"
+#define STRING_bidir0 STR_b STR_i STR_d STR_i STR_r "\0"
+#define STRING_bidirle0 STR_b STR_i STR_d STR_i STR_r STR_l STR_e "\0"
+#define STRING_bidirli0 STR_b STR_i STR_d STR_i STR_r STR_l STR_i "\0"
+#define STRING_bidirlo0 STR_b STR_i STR_d STR_i STR_r STR_l STR_o "\0"
+#define STRING_bidis0 STR_b STR_i STR_d STR_i STR_s "\0"
+#define STRING_bidiws0 STR_b STR_i STR_d STR_i STR_w STR_s "\0"
+#define STRING_bopo0 STR_b STR_o STR_p STR_o "\0"
+#define STRING_bopomofo0 STR_b STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
+#define STRING_brah0 STR_b STR_r STR_a STR_h "\0"
+#define STRING_brahmi0 STR_b STR_r STR_a STR_h STR_m STR_i "\0"
+#define STRING_brai0 STR_b STR_r STR_a STR_i "\0"
+#define STRING_braille0 STR_b STR_r STR_a STR_i STR_l STR_l STR_e "\0"
+#define STRING_bugi0 STR_b STR_u STR_g STR_i "\0"
+#define STRING_buginese0 STR_b STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
+#define STRING_buhd0 STR_b STR_u STR_h STR_d "\0"
+#define STRING_buhid0 STR_b STR_u STR_h STR_i STR_d "\0"
+#define STRING_c0 STR_c "\0"
+#define STRING_cakm0 STR_c STR_a STR_k STR_m "\0"
+#define STRING_canadianaboriginal0 STR_c STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_a STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
+#define STRING_cans0 STR_c STR_a STR_n STR_s "\0"
+#define STRING_cari0 STR_c STR_a STR_r STR_i "\0"
+#define STRING_carian0 STR_c STR_a STR_r STR_i STR_a STR_n "\0"
+#define STRING_cased0 STR_c STR_a STR_s STR_e STR_d "\0"
+#define STRING_caseignorable0 STR_c STR_a STR_s STR_e STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e "\0"
+#define STRING_caucasianalbanian0 STR_c STR_a STR_u STR_c STR_a STR_s STR_i STR_a STR_n STR_a STR_l STR_b STR_a STR_n STR_i STR_a STR_n "\0"
+#define STRING_cc0 STR_c STR_c "\0"
+#define STRING_cf0 STR_c STR_f "\0"
+#define STRING_chakma0 STR_c STR_h STR_a STR_k STR_m STR_a "\0"
+#define STRING_cham0 STR_c STR_h STR_a STR_m "\0"
+#define STRING_changeswhencasefolded0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_f STR_o STR_l STR_d STR_e STR_d "\0"
+#define STRING_changeswhencasemapped0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_c STR_a STR_s STR_e STR_m STR_a STR_p STR_p STR_e STR_d "\0"
+#define STRING_changeswhenlowercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_l STR_o STR_w STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0"
+#define STRING_changeswhentitlecased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_t STR_i STR_t STR_l STR_e STR_c STR_a STR_s STR_e STR_d "\0"
+#define STRING_changeswhenuppercased0 STR_c STR_h STR_a STR_n STR_g STR_e STR_s STR_w STR_h STR_e STR_n STR_u STR_p STR_p STR_e STR_r STR_c STR_a STR_s STR_e STR_d "\0"
+#define STRING_cher0 STR_c STR_h STR_e STR_r "\0"
+#define STRING_cherokee0 STR_c STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
+#define STRING_chorasmian0 STR_c STR_h STR_o STR_r STR_a STR_s STR_m STR_i STR_a STR_n "\0"
+#define STRING_chrs0 STR_c STR_h STR_r STR_s "\0"
+#define STRING_ci0 STR_c STR_i "\0"
+#define STRING_cn0 STR_c STR_n "\0"
+#define STRING_co0 STR_c STR_o "\0"
+#define STRING_common0 STR_c STR_o STR_m STR_m STR_o STR_n "\0"
+#define STRING_copt0 STR_c STR_o STR_p STR_t "\0"
+#define STRING_coptic0 STR_c STR_o STR_p STR_t STR_i STR_c "\0"
+#define STRING_cpmn0 STR_c STR_p STR_m STR_n "\0"
+#define STRING_cprt0 STR_c STR_p STR_r STR_t "\0"
+#define STRING_cs0 STR_c STR_s "\0"
+#define STRING_cuneiform0 STR_c STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
+#define STRING_cwcf0 STR_c STR_w STR_c STR_f "\0"
+#define STRING_cwcm0 STR_c STR_w STR_c STR_m "\0"
+#define STRING_cwl0 STR_c STR_w STR_l "\0"
+#define STRING_cwt0 STR_c STR_w STR_t "\0"
+#define STRING_cwu0 STR_c STR_w STR_u "\0"
+#define STRING_cypriot0 STR_c STR_y STR_p STR_r STR_i STR_o STR_t "\0"
+#define STRING_cyprominoan0 STR_c STR_y STR_p STR_r STR_o STR_m STR_i STR_n STR_o STR_a STR_n "\0"
+#define STRING_cyrillic0 STR_c STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
+#define STRING_cyrl0 STR_c STR_y STR_r STR_l "\0"
+#define STRING_dash0 STR_d STR_a STR_s STR_h "\0"
+#define STRING_defaultignorablecodepoint0 STR_d STR_e STR_f STR_a STR_u STR_l STR_t STR_i STR_g STR_n STR_o STR_r STR_a STR_b STR_l STR_e STR_c STR_o STR_d STR_e STR_p STR_o STR_i STR_n STR_t "\0"
+#define STRING_dep0 STR_d STR_e STR_p "\0"
+#define STRING_deprecated0 STR_d STR_e STR_p STR_r STR_e STR_c STR_a STR_t STR_e STR_d "\0"
+#define STRING_deseret0 STR_d STR_e STR_s STR_e STR_r STR_e STR_t "\0"
+#define STRING_deva0 STR_d STR_e STR_v STR_a "\0"
+#define STRING_devanagari0 STR_d STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_di0 STR_d STR_i "\0"
+#define STRING_dia0 STR_d STR_i STR_a "\0"
+#define STRING_diacritic0 STR_d STR_i STR_a STR_c STR_r STR_i STR_t STR_i STR_c "\0"
+#define STRING_diak0 STR_d STR_i STR_a STR_k "\0"
+#define STRING_divesakuru0 STR_d STR_i STR_v STR_e STR_s STR_a STR_k STR_u STR_r STR_u "\0"
+#define STRING_dogr0 STR_d STR_o STR_g STR_r "\0"
+#define STRING_dogra0 STR_d STR_o STR_g STR_r STR_a "\0"
+#define STRING_dsrt0 STR_d STR_s STR_r STR_t "\0"
+#define STRING_dupl0 STR_d STR_u STR_p STR_l "\0"
+#define STRING_duployan0 STR_d STR_u STR_p STR_l STR_o STR_y STR_a STR_n "\0"
+#define STRING_ebase0 STR_e STR_b STR_a STR_s STR_e "\0"
+#define STRING_ecomp0 STR_e STR_c STR_o STR_m STR_p "\0"
+#define STRING_egyp0 STR_e STR_g STR_y STR_p "\0"
+#define STRING_egyptianhieroglyphs0 STR_e STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_elba0 STR_e STR_l STR_b STR_a "\0"
+#define STRING_elbasan0 STR_e STR_l STR_b STR_a STR_s STR_a STR_n "\0"
+#define STRING_elym0 STR_e STR_l STR_y STR_m "\0"
+#define STRING_elymaic0 STR_e STR_l STR_y STR_m STR_a STR_i STR_c "\0"
+#define STRING_emod0 STR_e STR_m STR_o STR_d "\0"
+#define STRING_emoji0 STR_e STR_m STR_o STR_j STR_i "\0"
+#define STRING_emojicomponent0 STR_e STR_m STR_o STR_j STR_i STR_c STR_o STR_m STR_p STR_o STR_n STR_e STR_n STR_t "\0"
+#define STRING_emojimodifier0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r "\0"
+#define STRING_emojimodifierbase0 STR_e STR_m STR_o STR_j STR_i STR_m STR_o STR_d STR_i STR_f STR_i STR_e STR_r STR_b STR_a STR_s STR_e "\0"
+#define STRING_emojipresentation0 STR_e STR_m STR_o STR_j STR_i STR_p STR_r STR_e STR_s STR_e STR_n STR_t STR_a STR_t STR_i STR_o STR_n "\0"
+#define STRING_epres0 STR_e STR_p STR_r STR_e STR_s "\0"
+#define STRING_ethi0 STR_e STR_t STR_h STR_i "\0"
+#define STRING_ethiopic0 STR_e STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
+#define STRING_ext0 STR_e STR_x STR_t "\0"
+#define STRING_extendedpictographic0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_d STR_p STR_i STR_c STR_t STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
+#define STRING_extender0 STR_e STR_x STR_t STR_e STR_n STR_d STR_e STR_r "\0"
+#define STRING_extpict0 STR_e STR_x STR_t STR_p STR_i STR_c STR_t "\0"
+#define STRING_geor0 STR_g STR_e STR_o STR_r "\0"
+#define STRING_georgian0 STR_g STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
+#define STRING_glag0 STR_g STR_l STR_a STR_g "\0"
+#define STRING_glagolitic0 STR_g STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
+#define STRING_gong0 STR_g STR_o STR_n STR_g "\0"
+#define STRING_gonm0 STR_g STR_o STR_n STR_m "\0"
+#define STRING_goth0 STR_g STR_o STR_t STR_h "\0"
+#define STRING_gothic0 STR_g STR_o STR_t STR_h STR_i STR_c "\0"
+#define STRING_gran0 STR_g STR_r STR_a STR_n "\0"
+#define STRING_grantha0 STR_g STR_r STR_a STR_n STR_t STR_h STR_a "\0"
+#define STRING_graphemebase0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_b STR_a STR_s STR_e "\0"
+#define STRING_graphemeextend0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_e STR_x STR_t STR_e STR_n STR_d "\0"
+#define STRING_graphemelink0 STR_g STR_r STR_a STR_p STR_h STR_e STR_m STR_e STR_l STR_i STR_n STR_k "\0"
+#define STRING_grbase0 STR_g STR_r STR_b STR_a STR_s STR_e "\0"
+#define STRING_greek0 STR_g STR_r STR_e STR_e STR_k "\0"
+#define STRING_grek0 STR_g STR_r STR_e STR_k "\0"
+#define STRING_grext0 STR_g STR_r STR_e STR_x STR_t "\0"
+#define STRING_grlink0 STR_g STR_r STR_l STR_i STR_n STR_k "\0"
+#define STRING_gujarati0 STR_g STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
+#define STRING_gujr0 STR_g STR_u STR_j STR_r "\0"
+#define STRING_gunjalagondi0 STR_g STR_u STR_n STR_j STR_a STR_l STR_a STR_g STR_o STR_n STR_d STR_i "\0"
+#define STRING_gurmukhi0 STR_g STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
+#define STRING_guru0 STR_g STR_u STR_r STR_u "\0"
+#define STRING_han0 STR_h STR_a STR_n "\0"
+#define STRING_hang0 STR_h STR_a STR_n STR_g "\0"
+#define STRING_hangul0 STR_h STR_a STR_n STR_g STR_u STR_l "\0"
+#define STRING_hani0 STR_h STR_a STR_n STR_i "\0"
+#define STRING_hanifirohingya0 STR_h STR_a STR_n STR_i STR_f STR_i STR_r STR_o STR_h STR_i STR_n STR_g STR_y STR_a "\0"
+#define STRING_hano0 STR_h STR_a STR_n STR_o "\0"
+#define STRING_hanunoo0 STR_h STR_a STR_n STR_u STR_n STR_o STR_o "\0"
+#define STRING_hatr0 STR_h STR_a STR_t STR_r "\0"
+#define STRING_hatran0 STR_h STR_a STR_t STR_r STR_a STR_n "\0"
+#define STRING_hebr0 STR_h STR_e STR_b STR_r "\0"
+#define STRING_hebrew0 STR_h STR_e STR_b STR_r STR_e STR_w "\0"
+#define STRING_hex0 STR_h STR_e STR_x "\0"
+#define STRING_hexdigit0 STR_h STR_e STR_x STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_hira0 STR_h STR_i STR_r STR_a "\0"
+#define STRING_hiragana0 STR_h STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
+#define STRING_hluw0 STR_h STR_l STR_u STR_w "\0"
+#define STRING_hmng0 STR_h STR_m STR_n STR_g "\0"
+#define STRING_hmnp0 STR_h STR_m STR_n STR_p "\0"
+#define STRING_hung0 STR_h STR_u STR_n STR_g "\0"
+#define STRING_idc0 STR_i STR_d STR_c "\0"
+#define STRING_idcontinue0 STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
+#define STRING_ideo0 STR_i STR_d STR_e STR_o "\0"
+#define STRING_ideographic0 STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h STR_i STR_c "\0"
+#define STRING_ids0 STR_i STR_d STR_s "\0"
+#define STRING_idsb0 STR_i STR_d STR_s STR_b "\0"
+#define STRING_idsbinaryoperator0 STR_i STR_d STR_s STR_b STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
+#define STRING_idst0 STR_i STR_d STR_s STR_t "\0"
+#define STRING_idstart0 STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0"
+#define STRING_idstrinaryoperator0 STR_i STR_d STR_s STR_t STR_r STR_i STR_n STR_a STR_r STR_y STR_o STR_p STR_e STR_r STR_a STR_t STR_o STR_r "\0"
+#define STRING_imperialaramaic0 STR_i STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_a STR_r STR_a STR_m STR_a STR_i STR_c "\0"
+#define STRING_inherited0 STR_i STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
+#define STRING_inscriptionalpahlavi0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0"
+#define STRING_inscriptionalparthian0 STR_i STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_p STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
+#define STRING_ital0 STR_i STR_t STR_a STR_l "\0"
+#define STRING_java0 STR_j STR_a STR_v STR_a "\0"
+#define STRING_javanese0 STR_j STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
+#define STRING_joinc0 STR_j STR_o STR_i STR_n STR_c "\0"
+#define STRING_joincontrol0 STR_j STR_o STR_i STR_n STR_c STR_o STR_n STR_t STR_r STR_o STR_l "\0"
+#define STRING_kaithi0 STR_k STR_a STR_i STR_t STR_h STR_i "\0"
+#define STRING_kali0 STR_k STR_a STR_l STR_i "\0"
+#define STRING_kana0 STR_k STR_a STR_n STR_a "\0"
+#define STRING_kannada0 STR_k STR_a STR_n STR_n STR_a STR_d STR_a "\0"
+#define STRING_katakana0 STR_k STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
+#define STRING_kayahli0 STR_k STR_a STR_y STR_a STR_h STR_l STR_i "\0"
+#define STRING_khar0 STR_k STR_h STR_a STR_r "\0"
+#define STRING_kharoshthi0 STR_k STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
+#define STRING_khitansmallscript0 STR_k STR_h STR_i STR_t STR_a STR_n STR_s STR_m STR_a STR_l STR_l STR_s STR_c STR_r STR_i STR_p STR_t "\0"
+#define STRING_khmer0 STR_k STR_h STR_m STR_e STR_r "\0"
+#define STRING_khmr0 STR_k STR_h STR_m STR_r "\0"
+#define STRING_khoj0 STR_k STR_h STR_o STR_j "\0"
+#define STRING_khojki0 STR_k STR_h STR_o STR_j STR_k STR_i "\0"
+#define STRING_khudawadi0 STR_k STR_h STR_u STR_d STR_a STR_w STR_a STR_d STR_i "\0"
+#define STRING_kits0 STR_k STR_i STR_t STR_s "\0"
+#define STRING_knda0 STR_k STR_n STR_d STR_a "\0"
+#define STRING_kthi0 STR_k STR_t STR_h STR_i "\0"
+#define STRING_l0 STR_l "\0"
+#define STRING_l_AMPERSAND0 STR_l STR_AMPERSAND "\0"
+#define STRING_lana0 STR_l STR_a STR_n STR_a "\0"
+#define STRING_lao0 STR_l STR_a STR_o "\0"
+#define STRING_laoo0 STR_l STR_a STR_o STR_o "\0"
+#define STRING_latin0 STR_l STR_a STR_t STR_i STR_n "\0"
+#define STRING_latn0 STR_l STR_a STR_t STR_n "\0"
+#define STRING_lc0 STR_l STR_c "\0"
+#define STRING_lepc0 STR_l STR_e STR_p STR_c "\0"
+#define STRING_lepcha0 STR_l STR_e STR_p STR_c STR_h STR_a "\0"
+#define STRING_limb0 STR_l STR_i STR_m STR_b "\0"
+#define STRING_limbu0 STR_l STR_i STR_m STR_b STR_u "\0"
+#define STRING_lina0 STR_l STR_i STR_n STR_a "\0"
+#define STRING_linb0 STR_l STR_i STR_n STR_b "\0"
+#define STRING_lineara0 STR_l STR_i STR_n STR_e STR_a STR_r STR_a "\0"
+#define STRING_linearb0 STR_l STR_i STR_n STR_e STR_a STR_r STR_b "\0"
+#define STRING_lisu0 STR_l STR_i STR_s STR_u "\0"
+#define STRING_ll0 STR_l STR_l "\0"
+#define STRING_lm0 STR_l STR_m "\0"
+#define STRING_lo0 STR_l STR_o "\0"
+#define STRING_loe0 STR_l STR_o STR_e "\0"
+#define STRING_logicalorderexception0 STR_l STR_o STR_g STR_i STR_c STR_a STR_l STR_o STR_r STR_d STR_e STR_r STR_e STR_x STR_c STR_e STR_p STR_t STR_i STR_o STR_n "\0"
+#define STRING_lower0 STR_l STR_o STR_w STR_e STR_r "\0"
+#define STRING_lowercase0 STR_l STR_o STR_w STR_e STR_r STR_c STR_a STR_s STR_e "\0"
+#define STRING_lt0 STR_l STR_t "\0"
+#define STRING_lu0 STR_l STR_u "\0"
+#define STRING_lyci0 STR_l STR_y STR_c STR_i "\0"
+#define STRING_lycian0 STR_l STR_y STR_c STR_i STR_a STR_n "\0"
+#define STRING_lydi0 STR_l STR_y STR_d STR_i "\0"
+#define STRING_lydian0 STR_l STR_y STR_d STR_i STR_a STR_n "\0"
+#define STRING_m0 STR_m "\0"
+#define STRING_mahajani0 STR_m STR_a STR_h STR_a STR_j STR_a STR_n STR_i "\0"
+#define STRING_mahj0 STR_m STR_a STR_h STR_j "\0"
+#define STRING_maka0 STR_m STR_a STR_k STR_a "\0"
+#define STRING_makasar0 STR_m STR_a STR_k STR_a STR_s STR_a STR_r "\0"
+#define STRING_malayalam0 STR_m STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
+#define STRING_mand0 STR_m STR_a STR_n STR_d "\0"
+#define STRING_mandaic0 STR_m STR_a STR_n STR_d STR_a STR_i STR_c "\0"
+#define STRING_mani0 STR_m STR_a STR_n STR_i "\0"
+#define STRING_manichaean0 STR_m STR_a STR_n STR_i STR_c STR_h STR_a STR_e STR_a STR_n "\0"
+#define STRING_marc0 STR_m STR_a STR_r STR_c "\0"
+#define STRING_marchen0 STR_m STR_a STR_r STR_c STR_h STR_e STR_n "\0"
+#define STRING_masaramgondi0 STR_m STR_a STR_s STR_a STR_r STR_a STR_m STR_g STR_o STR_n STR_d STR_i "\0"
+#define STRING_math0 STR_m STR_a STR_t STR_h "\0"
+#define STRING_mc0 STR_m STR_c "\0"
+#define STRING_me0 STR_m STR_e "\0"
+#define STRING_medefaidrin0 STR_m STR_e STR_d STR_e STR_f STR_a STR_i STR_d STR_r STR_i STR_n "\0"
+#define STRING_medf0 STR_m STR_e STR_d STR_f "\0"
+#define STRING_meeteimayek0 STR_m STR_e STR_e STR_t STR_e STR_i STR_m STR_a STR_y STR_e STR_k "\0"
+#define STRING_mend0 STR_m STR_e STR_n STR_d "\0"
+#define STRING_mendekikakui0 STR_m STR_e STR_n STR_d STR_e STR_k STR_i STR_k STR_a STR_k STR_u STR_i "\0"
+#define STRING_merc0 STR_m STR_e STR_r STR_c "\0"
+#define STRING_mero0 STR_m STR_e STR_r STR_o "\0"
+#define STRING_meroiticcursive0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_c STR_u STR_r STR_s STR_i STR_v STR_e "\0"
+#define STRING_meroitichieroglyphs0 STR_m STR_e STR_r STR_o STR_i STR_t STR_i STR_c STR_h STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_miao0 STR_m STR_i STR_a STR_o "\0"
+#define STRING_mlym0 STR_m STR_l STR_y STR_m "\0"
+#define STRING_mn0 STR_m STR_n "\0"
+#define STRING_modi0 STR_m STR_o STR_d STR_i "\0"
+#define STRING_mong0 STR_m STR_o STR_n STR_g "\0"
+#define STRING_mongolian0 STR_m STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
+#define STRING_mro0 STR_m STR_r STR_o "\0"
+#define STRING_mroo0 STR_m STR_r STR_o STR_o "\0"
+#define STRING_mtei0 STR_m STR_t STR_e STR_i "\0"
+#define STRING_mult0 STR_m STR_u STR_l STR_t "\0"
+#define STRING_multani0 STR_m STR_u STR_l STR_t STR_a STR_n STR_i "\0"
+#define STRING_myanmar0 STR_m STR_y STR_a STR_n STR_m STR_a STR_r "\0"
+#define STRING_mymr0 STR_m STR_y STR_m STR_r "\0"
+#define STRING_n0 STR_n "\0"
+#define STRING_nabataean0 STR_n STR_a STR_b STR_a STR_t STR_a STR_e STR_a STR_n "\0"
+#define STRING_nand0 STR_n STR_a STR_n STR_d "\0"
+#define STRING_nandinagari0 STR_n STR_a STR_n STR_d STR_i STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_narb0 STR_n STR_a STR_r STR_b "\0"
+#define STRING_nbat0 STR_n STR_b STR_a STR_t "\0"
+#define STRING_nchar0 STR_n STR_c STR_h STR_a STR_r "\0"
+#define STRING_nd0 STR_n STR_d "\0"
+#define STRING_newa0 STR_n STR_e STR_w STR_a "\0"
+#define STRING_newtailue0 STR_n STR_e STR_w STR_t STR_a STR_i STR_l STR_u STR_e "\0"
+#define STRING_nko0 STR_n STR_k STR_o "\0"
+#define STRING_nkoo0 STR_n STR_k STR_o STR_o "\0"
+#define STRING_nl0 STR_n STR_l "\0"
+#define STRING_no0 STR_n STR_o "\0"
+#define STRING_noncharactercodepoint0 STR_n STR_o STR_n STR_c STR_h STR_a STR_r STR_a STR_c STR_t STR_e STR_r STR_c STR_o STR_d STR_e STR_p STR_o STR_i STR_n STR_t "\0"
+#define STRING_nshu0 STR_n STR_s STR_h STR_u "\0"
+#define STRING_nushu0 STR_n STR_u STR_s STR_h STR_u "\0"
+#define STRING_nyiakengpuachuehmong0 STR_n STR_y STR_i STR_a STR_k STR_e STR_n STR_g STR_p STR_u STR_a STR_c STR_h STR_u STR_e STR_h STR_m STR_o STR_n STR_g "\0"
+#define STRING_ogam0 STR_o STR_g STR_a STR_m "\0"
+#define STRING_ogham0 STR_o STR_g STR_h STR_a STR_m "\0"
+#define STRING_olchiki0 STR_o STR_l STR_c STR_h STR_i STR_k STR_i "\0"
+#define STRING_olck0 STR_o STR_l STR_c STR_k "\0"
+#define STRING_oldhungarian0 STR_o STR_l STR_d STR_h STR_u STR_n STR_g STR_a STR_r STR_i STR_a STR_n "\0"
+#define STRING_olditalic0 STR_o STR_l STR_d STR_i STR_t STR_a STR_l STR_i STR_c "\0"
+#define STRING_oldnortharabian0 STR_o STR_l STR_d STR_n STR_o STR_r STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0"
+#define STRING_oldpermic0 STR_o STR_l STR_d STR_p STR_e STR_r STR_m STR_i STR_c "\0"
+#define STRING_oldpersian0 STR_o STR_l STR_d STR_p STR_e STR_r STR_s STR_i STR_a STR_n "\0"
+#define STRING_oldsogdian0 STR_o STR_l STR_d STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0"
+#define STRING_oldsoutharabian0 STR_o STR_l STR_d STR_s STR_o STR_u STR_t STR_h STR_a STR_r STR_a STR_b STR_i STR_a STR_n "\0"
+#define STRING_oldturkic0 STR_o STR_l STR_d STR_t STR_u STR_r STR_k STR_i STR_c "\0"
+#define STRING_olduyghur0 STR_o STR_l STR_d STR_u STR_y STR_g STR_h STR_u STR_r "\0"
+#define STRING_oriya0 STR_o STR_r STR_i STR_y STR_a "\0"
+#define STRING_orkh0 STR_o STR_r STR_k STR_h "\0"
+#define STRING_orya0 STR_o STR_r STR_y STR_a "\0"
+#define STRING_osage0 STR_o STR_s STR_a STR_g STR_e "\0"
+#define STRING_osge0 STR_o STR_s STR_g STR_e "\0"
+#define STRING_osma0 STR_o STR_s STR_m STR_a "\0"
+#define STRING_osmanya0 STR_o STR_s STR_m STR_a STR_n STR_y STR_a "\0"
+#define STRING_ougr0 STR_o STR_u STR_g STR_r "\0"
+#define STRING_p0 STR_p "\0"
+#define STRING_pahawhhmong0 STR_p STR_a STR_h STR_a STR_w STR_h STR_h STR_m STR_o STR_n STR_g "\0"
+#define STRING_palm0 STR_p STR_a STR_l STR_m "\0"
+#define STRING_palmyrene0 STR_p STR_a STR_l STR_m STR_y STR_r STR_e STR_n STR_e "\0"
+#define STRING_patsyn0 STR_p STR_a STR_t STR_s STR_y STR_n "\0"
+#define STRING_patternsyntax0 STR_p STR_a STR_t STR_t STR_e STR_r STR_n STR_s STR_y STR_n STR_t STR_a STR_x "\0"
+#define STRING_patternwhitespace0 STR_p STR_a STR_t STR_t STR_e STR_r STR_n STR_w STR_h STR_i STR_t STR_e STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_patws0 STR_p STR_a STR_t STR_w STR_s "\0"
+#define STRING_pauc0 STR_p STR_a STR_u STR_c "\0"
+#define STRING_paucinhau0 STR_p STR_a STR_u STR_c STR_i STR_n STR_h STR_a STR_u "\0"
+#define STRING_pc0 STR_p STR_c "\0"
+#define STRING_pcm0 STR_p STR_c STR_m "\0"
+#define STRING_pd0 STR_p STR_d "\0"
+#define STRING_pe0 STR_p STR_e "\0"
+#define STRING_perm0 STR_p STR_e STR_r STR_m "\0"
+#define STRING_pf0 STR_p STR_f "\0"
+#define STRING_phag0 STR_p STR_h STR_a STR_g "\0"
+#define STRING_phagspa0 STR_p STR_h STR_a STR_g STR_s STR_p STR_a "\0"
+#define STRING_phli0 STR_p STR_h STR_l STR_i "\0"
+#define STRING_phlp0 STR_p STR_h STR_l STR_p "\0"
+#define STRING_phnx0 STR_p STR_h STR_n STR_x "\0"
+#define STRING_phoenician0 STR_p STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
+#define STRING_pi0 STR_p STR_i "\0"
+#define STRING_plrd0 STR_p STR_l STR_r STR_d "\0"
+#define STRING_po0 STR_p STR_o "\0"
+#define STRING_prependedconcatenationmark0 STR_p STR_r STR_e STR_p STR_e STR_n STR_d STR_e STR_d STR_c STR_o STR_n STR_c STR_a STR_t STR_e STR_n STR_a STR_t STR_i STR_o STR_n STR_m STR_a STR_r STR_k "\0"
+#define STRING_prti0 STR_p STR_r STR_t STR_i "\0"
+#define STRING_ps0 STR_p STR_s "\0"
+#define STRING_psalterpahlavi0 STR_p STR_s STR_a STR_l STR_t STR_e STR_r STR_p STR_a STR_h STR_l STR_a STR_v STR_i "\0"
+#define STRING_qaac0 STR_q STR_a STR_a STR_c "\0"
+#define STRING_qaai0 STR_q STR_a STR_a STR_i "\0"
+#define STRING_qmark0 STR_q STR_m STR_a STR_r STR_k "\0"
+#define STRING_quotationmark0 STR_q STR_u STR_o STR_t STR_a STR_t STR_i STR_o STR_n STR_m STR_a STR_r STR_k "\0"
+#define STRING_radical0 STR_r STR_a STR_d STR_i STR_c STR_a STR_l "\0"
+#define STRING_regionalindicator0 STR_r STR_e STR_g STR_i STR_o STR_n STR_a STR_l STR_i STR_n STR_d STR_i STR_c STR_a STR_t STR_o STR_r "\0"
+#define STRING_rejang0 STR_r STR_e STR_j STR_a STR_n STR_g "\0"
+#define STRING_ri0 STR_r STR_i "\0"
+#define STRING_rjng0 STR_r STR_j STR_n STR_g "\0"
+#define STRING_rohg0 STR_r STR_o STR_h STR_g "\0"
+#define STRING_runic0 STR_r STR_u STR_n STR_i STR_c "\0"
+#define STRING_runr0 STR_r STR_u STR_n STR_r "\0"
+#define STRING_s0 STR_s "\0"
+#define STRING_samaritan0 STR_s STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
+#define STRING_samr0 STR_s STR_a STR_m STR_r "\0"
+#define STRING_sarb0 STR_s STR_a STR_r STR_b "\0"
+#define STRING_saur0 STR_s STR_a STR_u STR_r "\0"
+#define STRING_saurashtra0 STR_s STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
+#define STRING_sc0 STR_s STR_c "\0"
+#define STRING_sd0 STR_s STR_d "\0"
+#define STRING_sentenceterminal0 STR_s STR_e STR_n STR_t STR_e STR_n STR_c STR_e STR_t STR_e STR_r STR_m STR_i STR_n STR_a STR_l "\0"
+#define STRING_sgnw0 STR_s STR_g STR_n STR_w "\0"
+#define STRING_sharada0 STR_s STR_h STR_a STR_r STR_a STR_d STR_a "\0"
+#define STRING_shavian0 STR_s STR_h STR_a STR_v STR_i STR_a STR_n "\0"
+#define STRING_shaw0 STR_s STR_h STR_a STR_w "\0"
+#define STRING_shrd0 STR_s STR_h STR_r STR_d "\0"
+#define STRING_sidd0 STR_s STR_i STR_d STR_d "\0"
+#define STRING_siddham0 STR_s STR_i STR_d STR_d STR_h STR_a STR_m "\0"
+#define STRING_signwriting0 STR_s STR_i STR_g STR_n STR_w STR_r STR_i STR_t STR_i STR_n STR_g "\0"
+#define STRING_sind0 STR_s STR_i STR_n STR_d "\0"
+#define STRING_sinh0 STR_s STR_i STR_n STR_h "\0"
+#define STRING_sinhala0 STR_s STR_i STR_n STR_h STR_a STR_l STR_a "\0"
+#define STRING_sk0 STR_s STR_k "\0"
+#define STRING_sm0 STR_s STR_m "\0"
+#define STRING_so0 STR_s STR_o "\0"
+#define STRING_softdotted0 STR_s STR_o STR_f STR_t STR_d STR_o STR_t STR_t STR_e STR_d "\0"
+#define STRING_sogd0 STR_s STR_o STR_g STR_d "\0"
+#define STRING_sogdian0 STR_s STR_o STR_g STR_d STR_i STR_a STR_n "\0"
+#define STRING_sogo0 STR_s STR_o STR_g STR_o "\0"
+#define STRING_sora0 STR_s STR_o STR_r STR_a "\0"
+#define STRING_sorasompeng0 STR_s STR_o STR_r STR_a STR_s STR_o STR_m STR_p STR_e STR_n STR_g "\0"
+#define STRING_soyo0 STR_s STR_o STR_y STR_o "\0"
+#define STRING_soyombo0 STR_s STR_o STR_y STR_o STR_m STR_b STR_o "\0"
+#define STRING_space0 STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_sterm0 STR_s STR_t STR_e STR_r STR_m "\0"
+#define STRING_sund0 STR_s STR_u STR_n STR_d "\0"
+#define STRING_sundanese0 STR_s STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
+#define STRING_sylo0 STR_s STR_y STR_l STR_o "\0"
+#define STRING_sylotinagri0 STR_s STR_y STR_l STR_o STR_t STR_i STR_n STR_a STR_g STR_r STR_i "\0"
+#define STRING_syrc0 STR_s STR_y STR_r STR_c "\0"
+#define STRING_syriac0 STR_s STR_y STR_r STR_i STR_a STR_c "\0"
+#define STRING_tagalog0 STR_t STR_a STR_g STR_a STR_l STR_o STR_g "\0"
+#define STRING_tagb0 STR_t STR_a STR_g STR_b "\0"
+#define STRING_tagbanwa0 STR_t STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
+#define STRING_taile0 STR_t STR_a STR_i STR_l STR_e "\0"
+#define STRING_taitham0 STR_t STR_a STR_i STR_t STR_h STR_a STR_m "\0"
+#define STRING_taiviet0 STR_t STR_a STR_i STR_v STR_i STR_e STR_t "\0"
+#define STRING_takr0 STR_t STR_a STR_k STR_r "\0"
+#define STRING_takri0 STR_t STR_a STR_k STR_r STR_i "\0"
+#define STRING_tale0 STR_t STR_a STR_l STR_e "\0"
+#define STRING_talu0 STR_t STR_a STR_l STR_u "\0"
+#define STRING_tamil0 STR_t STR_a STR_m STR_i STR_l "\0"
+#define STRING_taml0 STR_t STR_a STR_m STR_l "\0"
+#define STRING_tang0 STR_t STR_a STR_n STR_g "\0"
+#define STRING_tangsa0 STR_t STR_a STR_n STR_g STR_s STR_a "\0"
+#define STRING_tangut0 STR_t STR_a STR_n STR_g STR_u STR_t "\0"
+#define STRING_tavt0 STR_t STR_a STR_v STR_t "\0"
+#define STRING_telu0 STR_t STR_e STR_l STR_u "\0"
+#define STRING_telugu0 STR_t STR_e STR_l STR_u STR_g STR_u "\0"
+#define STRING_term0 STR_t STR_e STR_r STR_m "\0"
+#define STRING_terminalpunctuation0 STR_t STR_e STR_r STR_m STR_i STR_n STR_a STR_l STR_p STR_u STR_n STR_c STR_t STR_u STR_a STR_t STR_i STR_o STR_n "\0"
+#define STRING_tfng0 STR_t STR_f STR_n STR_g "\0"
+#define STRING_tglg0 STR_t STR_g STR_l STR_g "\0"
+#define STRING_thaa0 STR_t STR_h STR_a STR_a "\0"
+#define STRING_thaana0 STR_t STR_h STR_a STR_a STR_n STR_a "\0"
+#define STRING_thai0 STR_t STR_h STR_a STR_i "\0"
+#define STRING_tibetan0 STR_t STR_i STR_b STR_e STR_t STR_a STR_n "\0"
+#define STRING_tibt0 STR_t STR_i STR_b STR_t "\0"
+#define STRING_tifinagh0 STR_t STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
+#define STRING_tirh0 STR_t STR_i STR_r STR_h "\0"
+#define STRING_tirhuta0 STR_t STR_i STR_r STR_h STR_u STR_t STR_a "\0"
+#define STRING_tnsa0 STR_t STR_n STR_s STR_a "\0"
+#define STRING_toto0 STR_t STR_o STR_t STR_o "\0"
+#define STRING_ugar0 STR_u STR_g STR_a STR_r "\0"
+#define STRING_ugaritic0 STR_u STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
+#define STRING_uideo0 STR_u STR_i STR_d STR_e STR_o "\0"
+#define STRING_unifiedideograph0 STR_u STR_n STR_i STR_f STR_i STR_e STR_d STR_i STR_d STR_e STR_o STR_g STR_r STR_a STR_p STR_h "\0"
+#define STRING_unknown0 STR_u STR_n STR_k STR_n STR_o STR_w STR_n "\0"
+#define STRING_upper0 STR_u STR_p STR_p STR_e STR_r "\0"
+#define STRING_uppercase0 STR_u STR_p STR_p STR_e STR_r STR_c STR_a STR_s STR_e "\0"
+#define STRING_vai0 STR_v STR_a STR_i "\0"
+#define STRING_vaii0 STR_v STR_a STR_i STR_i "\0"
+#define STRING_variationselector0 STR_v STR_a STR_r STR_i STR_a STR_t STR_i STR_o STR_n STR_s STR_e STR_l STR_e STR_c STR_t STR_o STR_r "\0"
+#define STRING_vith0 STR_v STR_i STR_t STR_h "\0"
+#define STRING_vithkuqi0 STR_v STR_i STR_t STR_h STR_k STR_u STR_q STR_i "\0"
+#define STRING_vs0 STR_v STR_s "\0"
+#define STRING_wancho0 STR_w STR_a STR_n STR_c STR_h STR_o "\0"
+#define STRING_wara0 STR_w STR_a STR_r STR_a "\0"
+#define STRING_warangciti0 STR_w STR_a STR_r STR_a STR_n STR_g STR_c STR_i STR_t STR_i "\0"
+#define STRING_wcho0 STR_w STR_c STR_h STR_o "\0"
+#define STRING_whitespace0 STR_w STR_h STR_i STR_t STR_e STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_wspace0 STR_w STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_xan0 STR_x STR_a STR_n "\0"
+#define STRING_xidc0 STR_x STR_i STR_d STR_c "\0"
+#define STRING_xidcontinue0 STR_x STR_i STR_d STR_c STR_o STR_n STR_t STR_i STR_n STR_u STR_e "\0"
+#define STRING_xids0 STR_x STR_i STR_d STR_s "\0"
+#define STRING_xidstart0 STR_x STR_i STR_d STR_s STR_t STR_a STR_r STR_t "\0"
+#define STRING_xpeo0 STR_x STR_p STR_e STR_o "\0"
+#define STRING_xps0 STR_x STR_p STR_s "\0"
+#define STRING_xsp0 STR_x STR_s STR_p "\0"
+#define STRING_xsux0 STR_x STR_s STR_u STR_x "\0"
+#define STRING_xuc0 STR_x STR_u STR_c "\0"
+#define STRING_xwd0 STR_x STR_w STR_d "\0"
+#define STRING_yezi0 STR_y STR_e STR_z STR_i "\0"
+#define STRING_yezidi0 STR_y STR_e STR_z STR_i STR_d STR_i "\0"
+#define STRING_yi0 STR_y STR_i "\0"
+#define STRING_yiii0 STR_y STR_i STR_i STR_i "\0"
+#define STRING_z0 STR_z "\0"
+#define STRING_zanabazarsquare0 STR_z STR_a STR_n STR_a STR_b STR_a STR_z STR_a STR_r STR_s STR_q STR_u STR_a STR_r STR_e "\0"
+#define STRING_zanb0 STR_z STR_a STR_n STR_b "\0"
+#define STRING_zinh0 STR_z STR_i STR_n STR_h "\0"
+#define STRING_zl0 STR_z STR_l "\0"
+#define STRING_zp0 STR_z STR_p "\0"
+#define STRING_zs0 STR_z STR_s "\0"
+#define STRING_zyyy0 STR_z STR_y STR_y STR_y "\0"
+#define STRING_zzzz0 STR_z STR_z STR_z STR_z "\0"
+
+const char PRIV(utt_names)[] =
+  STRING_adlam0
+  STRING_adlm0
+  STRING_aghb0
+  STRING_ahex0
+  STRING_ahom0
+  STRING_alpha0
+  STRING_alphabetic0
+  STRING_anatolianhieroglyphs0
+  STRING_any0
+  STRING_arab0
+  STRING_arabic0
+  STRING_armenian0
+  STRING_armi0
+  STRING_armn0
+  STRING_ascii0
+  STRING_asciihexdigit0
+  STRING_avestan0
+  STRING_avst0
+  STRING_bali0
+  STRING_balinese0
+  STRING_bamu0
+  STRING_bamum0
+  STRING_bass0
+  STRING_bassavah0
+  STRING_batak0
+  STRING_batk0
+  STRING_beng0
+  STRING_bengali0
+  STRING_bhaiksuki0
+  STRING_bhks0
+  STRING_bidial0
+  STRING_bidian0
+  STRING_bidib0
+  STRING_bidibn0
+  STRING_bidic0
+  STRING_bidicontrol0
+  STRING_bidics0
+  STRING_bidien0
+  STRING_bidies0
+  STRING_bidiet0
+  STRING_bidifsi0
+  STRING_bidil0
+  STRING_bidilre0
+  STRING_bidilri0
+  STRING_bidilro0
+  STRING_bidim0
+  STRING_bidimirrored0
+  STRING_bidinsm0
+  STRING_bidion0
+  STRING_bidipdf0
+  STRING_bidipdi0
+  STRING_bidir0
+  STRING_bidirle0
+  STRING_bidirli0
+  STRING_bidirlo0
+  STRING_bidis0
+  STRING_bidiws0
+  STRING_bopo0
+  STRING_bopomofo0
+  STRING_brah0
+  STRING_brahmi0
+  STRING_brai0
+  STRING_braille0
+  STRING_bugi0
+  STRING_buginese0
+  STRING_buhd0
+  STRING_buhid0
+  STRING_c0
+  STRING_cakm0
+  STRING_canadianaboriginal0
+  STRING_cans0
+  STRING_cari0
+  STRING_carian0
+  STRING_cased0
+  STRING_caseignorable0
+  STRING_caucasianalbanian0
+  STRING_cc0
+  STRING_cf0
+  STRING_chakma0
+  STRING_cham0
+  STRING_changeswhencasefolded0
+  STRING_changeswhencasemapped0
+  STRING_changeswhenlowercased0
+  STRING_changeswhentitlecased0
+  STRING_changeswhenuppercased0
+  STRING_cher0
+  STRING_cherokee0
+  STRING_chorasmian0
+  STRING_chrs0
+  STRING_ci0
+  STRING_cn0
+  STRING_co0
+  STRING_common0
+  STRING_copt0
+  STRING_coptic0
+  STRING_cpmn0
+  STRING_cprt0
+  STRING_cs0
+  STRING_cuneiform0
+  STRING_cwcf0
+  STRING_cwcm0
+  STRING_cwl0
+  STRING_cwt0
+  STRING_cwu0
+  STRING_cypriot0
+  STRING_cyprominoan0
+  STRING_cyrillic0
+  STRING_cyrl0
+  STRING_dash0
+  STRING_defaultignorablecodepoint0
+  STRING_dep0
+  STRING_deprecated0
+  STRING_deseret0
+  STRING_deva0
+  STRING_devanagari0
+  STRING_di0
+  STRING_dia0
+  STRING_diacritic0
+  STRING_diak0
+  STRING_divesakuru0
+  STRING_dogr0
+  STRING_dogra0
+  STRING_dsrt0
+  STRING_dupl0
+  STRING_duployan0
+  STRING_ebase0
+  STRING_ecomp0
+  STRING_egyp0
+  STRING_egyptianhieroglyphs0
+  STRING_elba0
+  STRING_elbasan0
+  STRING_elym0
+  STRING_elymaic0
+  STRING_emod0
+  STRING_emoji0
+  STRING_emojicomponent0
+  STRING_emojimodifier0
+  STRING_emojimodifierbase0
+  STRING_emojipresentation0
+  STRING_epres0
+  STRING_ethi0
+  STRING_ethiopic0
+  STRING_ext0
+  STRING_extendedpictographic0
+  STRING_extender0
+  STRING_extpict0
+  STRING_geor0
+  STRING_georgian0
+  STRING_glag0
+  STRING_glagolitic0
+  STRING_gong0
+  STRING_gonm0
+  STRING_goth0
+  STRING_gothic0
+  STRING_gran0
+  STRING_grantha0
+  STRING_graphemebase0
+  STRING_graphemeextend0
+  STRING_graphemelink0
+  STRING_grbase0
+  STRING_greek0
+  STRING_grek0
+  STRING_grext0
+  STRING_grlink0
+  STRING_gujarati0
+  STRING_gujr0
+  STRING_gunjalagondi0
+  STRING_gurmukhi0
+  STRING_guru0
+  STRING_han0
+  STRING_hang0
+  STRING_hangul0
+  STRING_hani0
+  STRING_hanifirohingya0
+  STRING_hano0
+  STRING_hanunoo0
+  STRING_hatr0
+  STRING_hatran0
+  STRING_hebr0
+  STRING_hebrew0
+  STRING_hex0
+  STRING_hexdigit0
+  STRING_hira0
+  STRING_hiragana0
+  STRING_hluw0
+  STRING_hmng0
+  STRING_hmnp0
+  STRING_hung0
+  STRING_idc0
+  STRING_idcontinue0
+  STRING_ideo0
+  STRING_ideographic0
+  STRING_ids0
+  STRING_idsb0
+  STRING_idsbinaryoperator0
+  STRING_idst0
+  STRING_idstart0
+  STRING_idstrinaryoperator0
+  STRING_imperialaramaic0
+  STRING_inherited0
+  STRING_inscriptionalpahlavi0
+  STRING_inscriptionalparthian0
+  STRING_ital0
+  STRING_java0
+  STRING_javanese0
+  STRING_joinc0
+  STRING_joincontrol0
+  STRING_kaithi0
+  STRING_kali0
+  STRING_kana0
+  STRING_kannada0
+  STRING_katakana0
+  STRING_kayahli0
+  STRING_khar0
+  STRING_kharoshthi0
+  STRING_khitansmallscript0
+  STRING_khmer0
+  STRING_khmr0
+  STRING_khoj0
+  STRING_khojki0
+  STRING_khudawadi0
+  STRING_kits0
+  STRING_knda0
+  STRING_kthi0
+  STRING_l0
+  STRING_l_AMPERSAND0
+  STRING_lana0
+  STRING_lao0
+  STRING_laoo0
+  STRING_latin0
+  STRING_latn0
+  STRING_lc0
+  STRING_lepc0
+  STRING_lepcha0
+  STRING_limb0
+  STRING_limbu0
+  STRING_lina0
+  STRING_linb0
+  STRING_lineara0
+  STRING_linearb0
+  STRING_lisu0
+  STRING_ll0
+  STRING_lm0
+  STRING_lo0
+  STRING_loe0
+  STRING_logicalorderexception0
+  STRING_lower0
+  STRING_lowercase0
+  STRING_lt0
+  STRING_lu0
+  STRING_lyci0
+  STRING_lycian0
+  STRING_lydi0
+  STRING_lydian0
+  STRING_m0
+  STRING_mahajani0
+  STRING_mahj0
+  STRING_maka0
+  STRING_makasar0
+  STRING_malayalam0
+  STRING_mand0
+  STRING_mandaic0
+  STRING_mani0
+  STRING_manichaean0
+  STRING_marc0
+  STRING_marchen0
+  STRING_masaramgondi0
+  STRING_math0
+  STRING_mc0
+  STRING_me0
+  STRING_medefaidrin0
+  STRING_medf0
+  STRING_meeteimayek0
+  STRING_mend0
+  STRING_mendekikakui0
+  STRING_merc0
+  STRING_mero0
+  STRING_meroiticcursive0
+  STRING_meroitichieroglyphs0
+  STRING_miao0
+  STRING_mlym0
+  STRING_mn0
+  STRING_modi0
+  STRING_mong0
+  STRING_mongolian0
+  STRING_mro0
+  STRING_mroo0
+  STRING_mtei0
+  STRING_mult0
+  STRING_multani0
+  STRING_myanmar0
+  STRING_mymr0
+  STRING_n0
+  STRING_nabataean0
+  STRING_nand0
+  STRING_nandinagari0
+  STRING_narb0
+  STRING_nbat0
+  STRING_nchar0
+  STRING_nd0
+  STRING_newa0
+  STRING_newtailue0
+  STRING_nko0
+  STRING_nkoo0
+  STRING_nl0
+  STRING_no0
+  STRING_noncharactercodepoint0
+  STRING_nshu0
+  STRING_nushu0
+  STRING_nyiakengpuachuehmong0
+  STRING_ogam0
+  STRING_ogham0
+  STRING_olchiki0
+  STRING_olck0
+  STRING_oldhungarian0
+  STRING_olditalic0
+  STRING_oldnortharabian0
+  STRING_oldpermic0
+  STRING_oldpersian0
+  STRING_oldsogdian0
+  STRING_oldsoutharabian0
+  STRING_oldturkic0
+  STRING_olduyghur0
+  STRING_oriya0
+  STRING_orkh0
+  STRING_orya0
+  STRING_osage0
+  STRING_osge0
+  STRING_osma0
+  STRING_osmanya0
+  STRING_ougr0
+  STRING_p0
+  STRING_pahawhhmong0
+  STRING_palm0
+  STRING_palmyrene0
+  STRING_patsyn0
+  STRING_patternsyntax0
+  STRING_patternwhitespace0
+  STRING_patws0
+  STRING_pauc0
+  STRING_paucinhau0
+  STRING_pc0
+  STRING_pcm0
+  STRING_pd0
+  STRING_pe0
+  STRING_perm0
+  STRING_pf0
+  STRING_phag0
+  STRING_phagspa0
+  STRING_phli0
+  STRING_phlp0
+  STRING_phnx0
+  STRING_phoenician0
+  STRING_pi0
+  STRING_plrd0
+  STRING_po0
+  STRING_prependedconcatenationmark0
+  STRING_prti0
+  STRING_ps0
+  STRING_psalterpahlavi0
+  STRING_qaac0
+  STRING_qaai0
+  STRING_qmark0
+  STRING_quotationmark0
+  STRING_radical0
+  STRING_regionalindicator0
+  STRING_rejang0
+  STRING_ri0
+  STRING_rjng0
+  STRING_rohg0
+  STRING_runic0
+  STRING_runr0
+  STRING_s0
+  STRING_samaritan0
+  STRING_samr0
+  STRING_sarb0
+  STRING_saur0
+  STRING_saurashtra0
+  STRING_sc0
+  STRING_sd0
+  STRING_sentenceterminal0
+  STRING_sgnw0
+  STRING_sharada0
+  STRING_shavian0
+  STRING_shaw0
+  STRING_shrd0
+  STRING_sidd0
+  STRING_siddham0
+  STRING_signwriting0
+  STRING_sind0
+  STRING_sinh0
+  STRING_sinhala0
+  STRING_sk0
+  STRING_sm0
+  STRING_so0
+  STRING_softdotted0
+  STRING_sogd0
+  STRING_sogdian0
+  STRING_sogo0
+  STRING_sora0
+  STRING_sorasompeng0
+  STRING_soyo0
+  STRING_soyombo0
+  STRING_space0
+  STRING_sterm0
+  STRING_sund0
+  STRING_sundanese0
+  STRING_sylo0
+  STRING_sylotinagri0
+  STRING_syrc0
+  STRING_syriac0
+  STRING_tagalog0
+  STRING_tagb0
+  STRING_tagbanwa0
+  STRING_taile0
+  STRING_taitham0
+  STRING_taiviet0
+  STRING_takr0
+  STRING_takri0
+  STRING_tale0
+  STRING_talu0
+  STRING_tamil0
+  STRING_taml0
+  STRING_tang0
+  STRING_tangsa0
+  STRING_tangut0
+  STRING_tavt0
+  STRING_telu0
+  STRING_telugu0
+  STRING_term0
+  STRING_terminalpunctuation0
+  STRING_tfng0
+  STRING_tglg0
+  STRING_thaa0
+  STRING_thaana0
+  STRING_thai0
+  STRING_tibetan0
+  STRING_tibt0
+  STRING_tifinagh0
+  STRING_tirh0
+  STRING_tirhuta0
+  STRING_tnsa0
+  STRING_toto0
+  STRING_ugar0
+  STRING_ugaritic0
+  STRING_uideo0
+  STRING_unifiedideograph0
+  STRING_unknown0
+  STRING_upper0
+  STRING_uppercase0
+  STRING_vai0
+  STRING_vaii0
+  STRING_variationselector0
+  STRING_vith0
+  STRING_vithkuqi0
+  STRING_vs0
+  STRING_wancho0
+  STRING_wara0
+  STRING_warangciti0
+  STRING_wcho0
+  STRING_whitespace0
+  STRING_wspace0
+  STRING_xan0
+  STRING_xidc0
+  STRING_xidcontinue0
+  STRING_xids0
+  STRING_xidstart0
+  STRING_xpeo0
+  STRING_xps0
+  STRING_xsp0
+  STRING_xsux0
+  STRING_xuc0
+  STRING_xwd0
+  STRING_yezi0
+  STRING_yezidi0
+  STRING_yi0
+  STRING_yiii0
+  STRING_z0
+  STRING_zanabazarsquare0
+  STRING_zanb0
+  STRING_zinh0
+  STRING_zl0
+  STRING_zp0
+  STRING_zs0
+  STRING_zyyy0
+  STRING_zzzz0;
+
+const ucp_type_table PRIV(utt)[] = {
+  {   0, PT_SCX, ucp_Adlam },
+  {   6, PT_SCX, ucp_Adlam },
+  {  11, PT_SC, ucp_Caucasian_Albanian },
+  {  16, PT_BOOL, ucp_ASCII_Hex_Digit },
+  {  21, PT_SC, ucp_Ahom },
+  {  26, PT_BOOL, ucp_Alphabetic },
+  {  32, PT_BOOL, ucp_Alphabetic },
+  {  43, PT_SC, ucp_Anatolian_Hieroglyphs },
+  {  64, PT_ANY, 0 },
+  {  68, PT_SCX, ucp_Arabic },
+  {  73, PT_SCX, ucp_Arabic },
+  {  80, PT_SC, ucp_Armenian },
+  {  89, PT_SC, ucp_Imperial_Aramaic },
+  {  94, PT_SC, ucp_Armenian },
+  {  99, PT_BOOL, ucp_ASCII },
+  { 105, PT_BOOL, ucp_ASCII_Hex_Digit },
+  { 119, PT_SC, ucp_Avestan },
+  { 127, PT_SC, ucp_Avestan },
+  { 132, PT_SC, ucp_Balinese },
+  { 137, PT_SC, ucp_Balinese },
+  { 146, PT_SC, ucp_Bamum },
+  { 151, PT_SC, ucp_Bamum },
+  { 157, PT_SC, ucp_Bassa_Vah },
+  { 162, PT_SC, ucp_Bassa_Vah },
+  { 171, PT_SC, ucp_Batak },
+  { 177, PT_SC, ucp_Batak },
+  { 182, PT_SCX, ucp_Bengali },
+  { 187, PT_SCX, ucp_Bengali },
+  { 195, PT_SC, ucp_Bhaiksuki },
+  { 205, PT_SC, ucp_Bhaiksuki },
+  { 210, PT_BIDICL, ucp_bidiAL },
+  { 217, PT_BIDICL, ucp_bidiAN },
+  { 224, PT_BIDICL, ucp_bidiB },
+  { 230, PT_BIDICL, ucp_bidiBN },
+  { 237, PT_BOOL, ucp_Bidi_Control },
+  { 243, PT_BOOL, ucp_Bidi_Control },
+  { 255, PT_BIDICL, ucp_bidiCS },
+  { 262, PT_BIDICL, ucp_bidiEN },
+  { 269, PT_BIDICL, ucp_bidiES },
+  { 276, PT_BIDICL, ucp_bidiET },
+  { 283, PT_BIDICL, ucp_bidiFSI },
+  { 291, PT_BIDICL, ucp_bidiL },
+  { 297, PT_BIDICL, ucp_bidiLRE },
+  { 305, PT_BIDICL, ucp_bidiLRI },
+  { 313, PT_BIDICL, ucp_bidiLRO },
+  { 321, PT_BOOL, ucp_Bidi_Mirrored },
+  { 327, PT_BOOL, ucp_Bidi_Mirrored },
+  { 340, PT_BIDICL, ucp_bidiNSM },
+  { 348, PT_BIDICL, ucp_bidiON },
+  { 355, PT_BIDICL, ucp_bidiPDF },
+  { 363, PT_BIDICL, ucp_bidiPDI },
+  { 371, PT_BIDICL, ucp_bidiR },
+  { 377, PT_BIDICL, ucp_bidiRLE },
+  { 385, PT_BIDICL, ucp_bidiRLI },
+  { 393, PT_BIDICL, ucp_bidiRLO },
+  { 401, PT_BIDICL, ucp_bidiS },
+  { 407, PT_BIDICL, ucp_bidiWS },
+  { 414, PT_SCX, ucp_Bopomofo },
+  { 419, PT_SCX, ucp_Bopomofo },
+  { 428, PT_SC, ucp_Brahmi },
+  { 433, PT_SC, ucp_Brahmi },
+  { 440, PT_SC, ucp_Braille },
+  { 445, PT_SC, ucp_Braille },
+  { 453, PT_SCX, ucp_Buginese },
+  { 458, PT_SCX, ucp_Buginese },
+  { 467, PT_SCX, ucp_Buhid },
+  { 472, PT_SCX, ucp_Buhid },
+  { 478, PT_GC, ucp_C },
+  { 480, PT_SCX, ucp_Chakma },
+  { 485, PT_SC, ucp_Canadian_Aboriginal },
+  { 504, PT_SC, ucp_Canadian_Aboriginal },
+  { 509, PT_SC, ucp_Carian },
+  { 514, PT_SC, ucp_Carian },
+  { 521, PT_BOOL, ucp_Cased },
+  { 527, PT_BOOL, ucp_Case_Ignorable },
+  { 541, PT_SC, ucp_Caucasian_Albanian },
+  { 559, PT_PC, ucp_Cc },
+  { 562, PT_PC, ucp_Cf },
+  { 565, PT_SCX, ucp_Chakma },
+  { 572, PT_SC, ucp_Cham },
+  { 577, PT_BOOL, ucp_Changes_When_Casefolded },
+  { 599, PT_BOOL, ucp_Changes_When_Casemapped },
+  { 621, PT_BOOL, ucp_Changes_When_Lowercased },
+  { 643, PT_BOOL, ucp_Changes_When_Titlecased },
+  { 665, PT_BOOL, ucp_Changes_When_Uppercased },
+  { 687, PT_SC, ucp_Cherokee },
+  { 692, PT_SC, ucp_Cherokee },
+  { 701, PT_SC, ucp_Chorasmian },
+  { 712, PT_SC, ucp_Chorasmian },
+  { 717, PT_BOOL, ucp_Case_Ignorable },
+  { 720, PT_PC, ucp_Cn },
+  { 723, PT_PC, ucp_Co },
+  { 726, PT_SC, ucp_Common },
+  { 733, PT_SCX, ucp_Coptic },
+  { 738, PT_SCX, ucp_Coptic },
+  { 745, PT_SCX, ucp_Cypro_Minoan },
+  { 750, PT_SCX, ucp_Cypriot },
+  { 755, PT_PC, ucp_Cs },
+  { 758, PT_SC, ucp_Cuneiform },
+  { 768, PT_BOOL, ucp_Changes_When_Casefolded },
+  { 773, PT_BOOL, ucp_Changes_When_Casemapped },
+  { 778, PT_BOOL, ucp_Changes_When_Lowercased },
+  { 782, PT_BOOL, ucp_Changes_When_Titlecased },
+  { 786, PT_BOOL, ucp_Changes_When_Uppercased },
+  { 790, PT_SCX, ucp_Cypriot },
+  { 798, PT_SCX, ucp_Cypro_Minoan },
+  { 810, PT_SCX, ucp_Cyrillic },
+  { 819, PT_SCX, ucp_Cyrillic },
+  { 824, PT_BOOL, ucp_Dash },
+  { 829, PT_BOOL, ucp_Default_Ignorable_Code_Point },
+  { 855, PT_BOOL, ucp_Deprecated },
+  { 859, PT_BOOL, ucp_Deprecated },
+  { 870, PT_SC, ucp_Deseret },
+  { 878, PT_SCX, ucp_Devanagari },
+  { 883, PT_SCX, ucp_Devanagari },
+  { 894, PT_BOOL, ucp_Default_Ignorable_Code_Point },
+  { 897, PT_BOOL, ucp_Diacritic },
+  { 901, PT_BOOL, ucp_Diacritic },
+  { 911, PT_SC, ucp_Dives_Akuru },
+  { 916, PT_SC, ucp_Dives_Akuru },
+  { 927, PT_SCX, ucp_Dogra },
+  { 932, PT_SCX, ucp_Dogra },
+  { 938, PT_SC, ucp_Deseret },
+  { 943, PT_SCX, ucp_Duployan },
+  { 948, PT_SCX, ucp_Duployan },
+  { 957, PT_BOOL, ucp_Emoji_Modifier_Base },
+  { 963, PT_BOOL, ucp_Emoji_Component },
+  { 969, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 974, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 994, PT_SC, ucp_Elbasan },
+  { 999, PT_SC, ucp_Elbasan },
+  { 1007, PT_SC, ucp_Elymaic },
+  { 1012, PT_SC, ucp_Elymaic },
+  { 1020, PT_BOOL, ucp_Emoji_Modifier },
+  { 1025, PT_BOOL, ucp_Emoji },
+  { 1031, PT_BOOL, ucp_Emoji_Component },
+  { 1046, PT_BOOL, ucp_Emoji_Modifier },
+  { 1060, PT_BOOL, ucp_Emoji_Modifier_Base },
+  { 1078, PT_BOOL, ucp_Emoji_Presentation },
+  { 1096, PT_BOOL, ucp_Emoji_Presentation },
+  { 1102, PT_SC, ucp_Ethiopic },
+  { 1107, PT_SC, ucp_Ethiopic },
+  { 1116, PT_BOOL, ucp_Extender },
+  { 1120, PT_BOOL, ucp_Extended_Pictographic },
+  { 1141, PT_BOOL, ucp_Extender },
+  { 1150, PT_BOOL, ucp_Extended_Pictographic },
+  { 1158, PT_SCX, ucp_Georgian },
+  { 1163, PT_SCX, ucp_Georgian },
+  { 1172, PT_SCX, ucp_Glagolitic },
+  { 1177, PT_SCX, ucp_Glagolitic },
+  { 1188, PT_SCX, ucp_Gunjala_Gondi },
+  { 1193, PT_SCX, ucp_Masaram_Gondi },
+  { 1198, PT_SC, ucp_Gothic },
+  { 1203, PT_SC, ucp_Gothic },
+  { 1210, PT_SCX, ucp_Grantha },
+  { 1215, PT_SCX, ucp_Grantha },
+  { 1223, PT_BOOL, ucp_Grapheme_Base },
+  { 1236, PT_BOOL, ucp_Grapheme_Extend },
+  { 1251, PT_BOOL, ucp_Grapheme_Link },
+  { 1264, PT_BOOL, ucp_Grapheme_Base },
+  { 1271, PT_SCX, ucp_Greek },
+  { 1277, PT_SCX, ucp_Greek },
+  { 1282, PT_BOOL, ucp_Grapheme_Extend },
+  { 1288, PT_BOOL, ucp_Grapheme_Link },
+  { 1295, PT_SCX, ucp_Gujarati },
+  { 1304, PT_SCX, ucp_Gujarati },
+  { 1309, PT_SCX, ucp_Gunjala_Gondi },
+  { 1322, PT_SCX, ucp_Gurmukhi },
+  { 1331, PT_SCX, ucp_Gurmukhi },
+  { 1336, PT_SCX, ucp_Han },
+  { 1340, PT_SCX, ucp_Hangul },
+  { 1345, PT_SCX, ucp_Hangul },
+  { 1352, PT_SCX, ucp_Han },
+  { 1357, PT_SCX, ucp_Hanifi_Rohingya },
+  { 1372, PT_SCX, ucp_Hanunoo },
+  { 1377, PT_SCX, ucp_Hanunoo },
+  { 1385, PT_SC, ucp_Hatran },
+  { 1390, PT_SC, ucp_Hatran },
+  { 1397, PT_SC, ucp_Hebrew },
+  { 1402, PT_SC, ucp_Hebrew },
+  { 1409, PT_BOOL, ucp_Hex_Digit },
+  { 1413, PT_BOOL, ucp_Hex_Digit },
+  { 1422, PT_SCX, ucp_Hiragana },
+  { 1427, PT_SCX, ucp_Hiragana },
+  { 1436, PT_SC, ucp_Anatolian_Hieroglyphs },
+  { 1441, PT_SC, ucp_Pahawh_Hmong },
+  { 1446, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 1451, PT_SC, ucp_Old_Hungarian },
+  { 1456, PT_BOOL, ucp_ID_Continue },
+  { 1460, PT_BOOL, ucp_ID_Continue },
+  { 1471, PT_BOOL, ucp_Ideographic },
+  { 1476, PT_BOOL, ucp_Ideographic },
+  { 1488, PT_BOOL, ucp_ID_Start },
+  { 1492, PT_BOOL, ucp_IDS_Binary_Operator },
+  { 1497, PT_BOOL, ucp_IDS_Binary_Operator },
+  { 1515, PT_BOOL, ucp_IDS_Trinary_Operator },
+  { 1520, PT_BOOL, ucp_ID_Start },
+  { 1528, PT_BOOL, ucp_IDS_Trinary_Operator },
+  { 1547, PT_SC, ucp_Imperial_Aramaic },
+  { 1563, PT_SC, ucp_Inherited },
+  { 1573, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 1594, PT_SC, ucp_Inscriptional_Parthian },
+  { 1616, PT_SC, ucp_Old_Italic },
+  { 1621, PT_SCX, ucp_Javanese },
+  { 1626, PT_SCX, ucp_Javanese },
+  { 1635, PT_BOOL, ucp_Join_Control },
+  { 1641, PT_BOOL, ucp_Join_Control },
+  { 1653, PT_SCX, ucp_Kaithi },
+  { 1660, PT_SCX, ucp_Kayah_Li },
+  { 1665, PT_SCX, ucp_Katakana },
+  { 1670, PT_SCX, ucp_Kannada },
+  { 1678, PT_SCX, ucp_Katakana },
+  { 1687, PT_SCX, ucp_Kayah_Li },
+  { 1695, PT_SC, ucp_Kharoshthi },
+  { 1700, PT_SC, ucp_Kharoshthi },
+  { 1711, PT_SC, ucp_Khitan_Small_Script },
+  { 1729, PT_SC, ucp_Khmer },
+  { 1735, PT_SC, ucp_Khmer },
+  { 1740, PT_SCX, ucp_Khojki },
+  { 1745, PT_SCX, ucp_Khojki },
+  { 1752, PT_SCX, ucp_Khudawadi },
+  { 1762, PT_SC, ucp_Khitan_Small_Script },
+  { 1767, PT_SCX, ucp_Kannada },
+  { 1772, PT_SCX, ucp_Kaithi },
+  { 1777, PT_GC, ucp_L },
+  { 1779, PT_LAMP, 0 },
+  { 1782, PT_SC, ucp_Tai_Tham },
+  { 1787, PT_SC, ucp_Lao },
+  { 1791, PT_SC, ucp_Lao },
+  { 1796, PT_SCX, ucp_Latin },
+  { 1802, PT_SCX, ucp_Latin },
+  { 1807, PT_LAMP, 0 },
+  { 1810, PT_SC, ucp_Lepcha },
+  { 1815, PT_SC, ucp_Lepcha },
+  { 1822, PT_SCX, ucp_Limbu },
+  { 1827, PT_SCX, ucp_Limbu },
+  { 1833, PT_SCX, ucp_Linear_A },
+  { 1838, PT_SCX, ucp_Linear_B },
+  { 1843, PT_SCX, ucp_Linear_A },
+  { 1851, PT_SCX, ucp_Linear_B },
+  { 1859, PT_SC, ucp_Lisu },
+  { 1864, PT_PC, ucp_Ll },
+  { 1867, PT_PC, ucp_Lm },
+  { 1870, PT_PC, ucp_Lo },
+  { 1873, PT_BOOL, ucp_Logical_Order_Exception },
+  { 1877, PT_BOOL, ucp_Logical_Order_Exception },
+  { 1899, PT_BOOL, ucp_Lowercase },
+  { 1905, PT_BOOL, ucp_Lowercase },
+  { 1915, PT_PC, ucp_Lt },
+  { 1918, PT_PC, ucp_Lu },
+  { 1921, PT_SC, ucp_Lycian },
+  { 1926, PT_SC, ucp_Lycian },
+  { 1933, PT_SC, ucp_Lydian },
+  { 1938, PT_SC, ucp_Lydian },
+  { 1945, PT_GC, ucp_M },
+  { 1947, PT_SCX, ucp_Mahajani },
+  { 1956, PT_SCX, ucp_Mahajani },
+  { 1961, PT_SC, ucp_Makasar },
+  { 1966, PT_SC, ucp_Makasar },
+  { 1974, PT_SCX, ucp_Malayalam },
+  { 1984, PT_SCX, ucp_Mandaic },
+  { 1989, PT_SCX, ucp_Mandaic },
+  { 1997, PT_SCX, ucp_Manichaean },
+  { 2002, PT_SCX, ucp_Manichaean },
+  { 2013, PT_SC, ucp_Marchen },
+  { 2018, PT_SC, ucp_Marchen },
+  { 2026, PT_SCX, ucp_Masaram_Gondi },
+  { 2039, PT_BOOL, ucp_Math },
+  { 2044, PT_PC, ucp_Mc },
+  { 2047, PT_PC, ucp_Me },
+  { 2050, PT_SC, ucp_Medefaidrin },
+  { 2062, PT_SC, ucp_Medefaidrin },
+  { 2067, PT_SC, ucp_Meetei_Mayek },
+  { 2079, PT_SC, ucp_Mende_Kikakui },
+  { 2084, PT_SC, ucp_Mende_Kikakui },
+  { 2097, PT_SC, ucp_Meroitic_Cursive },
+  { 2102, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 2107, PT_SC, ucp_Meroitic_Cursive },
+  { 2123, PT_SC, ucp_Meroitic_Hieroglyphs },
+  { 2143, PT_SC, ucp_Miao },
+  { 2148, PT_SCX, ucp_Malayalam },
+  { 2153, PT_PC, ucp_Mn },
+  { 2156, PT_SCX, ucp_Modi },
+  { 2161, PT_SCX, ucp_Mongolian },
+  { 2166, PT_SCX, ucp_Mongolian },
+  { 2176, PT_SC, ucp_Mro },
+  { 2180, PT_SC, ucp_Mro },
+  { 2185, PT_SC, ucp_Meetei_Mayek },
+  { 2190, PT_SCX, ucp_Multani },
+  { 2195, PT_SCX, ucp_Multani },
+  { 2203, PT_SCX, ucp_Myanmar },
+  { 2211, PT_SCX, ucp_Myanmar },
+  { 2216, PT_GC, ucp_N },
+  { 2218, PT_SC, ucp_Nabataean },
+  { 2228, PT_SCX, ucp_Nandinagari },
+  { 2233, PT_SCX, ucp_Nandinagari },
+  { 2245, PT_SC, ucp_Old_North_Arabian },
+  { 2250, PT_SC, ucp_Nabataean },
+  { 2255, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2261, PT_PC, ucp_Nd },
+  { 2264, PT_SC, ucp_Newa },
+  { 2269, PT_SC, ucp_New_Tai_Lue },
+  { 2279, PT_SCX, ucp_Nko },
+  { 2283, PT_SCX, ucp_Nko },
+  { 2288, PT_PC, ucp_Nl },
+  { 2291, PT_PC, ucp_No },
+  { 2294, PT_BOOL, ucp_Noncharacter_Code_Point },
+  { 2316, PT_SC, ucp_Nushu },
+  { 2321, PT_SC, ucp_Nushu },
+  { 2327, PT_SC, ucp_Nyiakeng_Puachue_Hmong },
+  { 2348, PT_SC, ucp_Ogham },
+  { 2353, PT_SC, ucp_Ogham },
+  { 2359, PT_SC, ucp_Ol_Chiki },
+  { 2367, PT_SC, ucp_Ol_Chiki },
+  { 2372, PT_SC, ucp_Old_Hungarian },
+  { 2385, PT_SC, ucp_Old_Italic },
+  { 2395, PT_SC, ucp_Old_North_Arabian },
+  { 2411, PT_SCX, ucp_Old_Permic },
+  { 2421, PT_SC, ucp_Old_Persian },
+  { 2432, PT_SC, ucp_Old_Sogdian },
+  { 2443, PT_SC, ucp_Old_South_Arabian },
+  { 2459, PT_SC, ucp_Old_Turkic },
+  { 2469, PT_SCX, ucp_Old_Uyghur },
+  { 2479, PT_SCX, ucp_Oriya },
+  { 2485, PT_SC, ucp_Old_Turkic },
+  { 2490, PT_SCX, ucp_Oriya },
+  { 2495, PT_SC, ucp_Osage },
+  { 2501, PT_SC, ucp_Osage },
+  { 2506, PT_SC, ucp_Osmanya },
+  { 2511, PT_SC, ucp_Osmanya },
+  { 2519, PT_SCX, ucp_Old_Uyghur },
+  { 2524, PT_GC, ucp_P },
+  { 2526, PT_SC, ucp_Pahawh_Hmong },
+  { 2538, PT_SC, ucp_Palmyrene },
+  { 2543, PT_SC, ucp_Palmyrene },
+  { 2553, PT_BOOL, ucp_Pattern_Syntax },
+  { 2560, PT_BOOL, ucp_Pattern_Syntax },
+  { 2574, PT_BOOL, ucp_Pattern_White_Space },
+  { 2592, PT_BOOL, ucp_Pattern_White_Space },
+  { 2598, PT_SC, ucp_Pau_Cin_Hau },
+  { 2603, PT_SC, ucp_Pau_Cin_Hau },
+  { 2613, PT_PC, ucp_Pc },
+  { 2616, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2620, PT_PC, ucp_Pd },
+  { 2623, PT_PC, ucp_Pe },
+  { 2626, PT_SCX, ucp_Old_Permic },
+  { 2631, PT_PC, ucp_Pf },
+  { 2634, PT_SCX, ucp_Phags_Pa },
+  { 2639, PT_SCX, ucp_Phags_Pa },
+  { 2647, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 2652, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2657, PT_SC, ucp_Phoenician },
+  { 2662, PT_SC, ucp_Phoenician },
+  { 2673, PT_PC, ucp_Pi },
+  { 2676, PT_SC, ucp_Miao },
+  { 2681, PT_PC, ucp_Po },
+  { 2684, PT_BOOL, ucp_Prepended_Concatenation_Mark },
+  { 2711, PT_SC, ucp_Inscriptional_Parthian },
+  { 2716, PT_PC, ucp_Ps },
+  { 2719, PT_SCX, ucp_Psalter_Pahlavi },
+  { 2734, PT_SCX, ucp_Coptic },
+  { 2739, PT_SC, ucp_Inherited },
+  { 2744, PT_BOOL, ucp_Quotation_Mark },
+  { 2750, PT_BOOL, ucp_Quotation_Mark },
+  { 2764, PT_BOOL, ucp_Radical },
+  { 2772, PT_BOOL, ucp_Regional_Indicator },
+  { 2790, PT_SC, ucp_Rejang },
+  { 2797, PT_BOOL, ucp_Regional_Indicator },
+  { 2800, PT_SC, ucp_Rejang },
+  { 2805, PT_SCX, ucp_Hanifi_Rohingya },
+  { 2810, PT_SC, ucp_Runic },
+  { 2816, PT_SC, ucp_Runic },
+  { 2821, PT_GC, ucp_S },
+  { 2823, PT_SC, ucp_Samaritan },
+  { 2833, PT_SC, ucp_Samaritan },
+  { 2838, PT_SC, ucp_Old_South_Arabian },
+  { 2843, PT_SC, ucp_Saurashtra },
+  { 2848, PT_SC, ucp_Saurashtra },
+  { 2859, PT_PC, ucp_Sc },
+  { 2862, PT_BOOL, ucp_Soft_Dotted },
+  { 2865, PT_BOOL, ucp_Sentence_Terminal },
+  { 2882, PT_SC, ucp_SignWriting },
+  { 2887, PT_SCX, ucp_Sharada },
+  { 2895, PT_SC, ucp_Shavian },
+  { 2903, PT_SC, ucp_Shavian },
+  { 2908, PT_SCX, ucp_Sharada },
+  { 2913, PT_SC, ucp_Siddham },
+  { 2918, PT_SC, ucp_Siddham },
+  { 2926, PT_SC, ucp_SignWriting },
+  { 2938, PT_SCX, ucp_Khudawadi },
+  { 2943, PT_SCX, ucp_Sinhala },
+  { 2948, PT_SCX, ucp_Sinhala },
+  { 2956, PT_PC, ucp_Sk },
+  { 2959, PT_PC, ucp_Sm },
+  { 2962, PT_PC, ucp_So },
+  { 2965, PT_BOOL, ucp_Soft_Dotted },
+  { 2976, PT_SCX, ucp_Sogdian },
+  { 2981, PT_SCX, ucp_Sogdian },
+  { 2989, PT_SC, ucp_Old_Sogdian },
+  { 2994, PT_SC, ucp_Sora_Sompeng },
+  { 2999, PT_SC, ucp_Sora_Sompeng },
+  { 3011, PT_SC, ucp_Soyombo },
+  { 3016, PT_SC, ucp_Soyombo },
+  { 3024, PT_BOOL, ucp_White_Space },
+  { 3030, PT_BOOL, ucp_Sentence_Terminal },
+  { 3036, PT_SC, ucp_Sundanese },
+  { 3041, PT_SC, ucp_Sundanese },
+  { 3051, PT_SCX, ucp_Syloti_Nagri },
+  { 3056, PT_SCX, ucp_Syloti_Nagri },
+  { 3068, PT_SCX, ucp_Syriac },
+  { 3073, PT_SCX, ucp_Syriac },
+  { 3080, PT_SCX, ucp_Tagalog },
+  { 3088, PT_SCX, ucp_Tagbanwa },
+  { 3093, PT_SCX, ucp_Tagbanwa },
+  { 3102, PT_SCX, ucp_Tai_Le },
+  { 3108, PT_SC, ucp_Tai_Tham },
+  { 3116, PT_SC, ucp_Tai_Viet },
+  { 3124, PT_SCX, ucp_Takri },
+  { 3129, PT_SCX, ucp_Takri },
+  { 3135, PT_SCX, ucp_Tai_Le },
+  { 3140, PT_SC, ucp_New_Tai_Lue },
+  { 3145, PT_SCX, ucp_Tamil },
+  { 3151, PT_SCX, ucp_Tamil },
+  { 3156, PT_SC, ucp_Tangut },
+  { 3161, PT_SC, ucp_Tangsa },
+  { 3168, PT_SC, ucp_Tangut },
+  { 3175, PT_SC, ucp_Tai_Viet },
+  { 3180, PT_SCX, ucp_Telugu },
+  { 3185, PT_SCX, ucp_Telugu },
+  { 3192, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3197, PT_BOOL, ucp_Terminal_Punctuation },
+  { 3217, PT_SC, ucp_Tifinagh },
+  { 3222, PT_SCX, ucp_Tagalog },
+  { 3227, PT_SCX, ucp_Thaana },
+  { 3232, PT_SCX, ucp_Thaana },
+  { 3239, PT_SC, ucp_Thai },
+  { 3244, PT_SC, ucp_Tibetan },
+  { 3252, PT_SC, ucp_Tibetan },
+  { 3257, PT_SC, ucp_Tifinagh },
+  { 3266, PT_SCX, ucp_Tirhuta },
+  { 3271, PT_SCX, ucp_Tirhuta },
+  { 3279, PT_SC, ucp_Tangsa },
+  { 3284, PT_SC, ucp_Toto },
+  { 3289, PT_SC, ucp_Ugaritic },
+  { 3294, PT_SC, ucp_Ugaritic },
+  { 3303, PT_BOOL, ucp_Unified_Ideograph },
+  { 3309, PT_BOOL, ucp_Unified_Ideograph },
+  { 3326, PT_SC, ucp_Unknown },
+  { 3334, PT_BOOL, ucp_Uppercase },
+  { 3340, PT_BOOL, ucp_Uppercase },
+  { 3350, PT_SC, ucp_Vai },
+  { 3354, PT_SC, ucp_Vai },
+  { 3359, PT_BOOL, ucp_Variation_Selector },
+  { 3377, PT_SC, ucp_Vithkuqi },
+  { 3382, PT_SC, ucp_Vithkuqi },
+  { 3391, PT_BOOL, ucp_Variation_Selector },
+  { 3394, PT_SC, ucp_Wancho },
+  { 3401, PT_SC, ucp_Warang_Citi },
+  { 3406, PT_SC, ucp_Warang_Citi },
+  { 3417, PT_SC, ucp_Wancho },
+  { 3422, PT_BOOL, ucp_White_Space },
+  { 3433, PT_BOOL, ucp_White_Space },
+  { 3440, PT_ALNUM, 0 },
+  { 3444, PT_BOOL, ucp_XID_Continue },
+  { 3449, PT_BOOL, ucp_XID_Continue },
+  { 3461, PT_BOOL, ucp_XID_Start },
+  { 3466, PT_BOOL, ucp_XID_Start },
+  { 3475, PT_SC, ucp_Old_Persian },
+  { 3480, PT_PXSPACE, 0 },
+  { 3484, PT_SPACE, 0 },
+  { 3488, PT_SC, ucp_Cuneiform },
+  { 3493, PT_UCNC, 0 },
+  { 3497, PT_WORD, 0 },
+  { 3501, PT_SCX, ucp_Yezidi },
+  { 3506, PT_SCX, ucp_Yezidi },
+  { 3513, PT_SCX, ucp_Yi },
+  { 3516, PT_SCX, ucp_Yi },
+  { 3521, PT_GC, ucp_Z },
+  { 3523, PT_SC, ucp_Zanabazar_Square },
+  { 3539, PT_SC, ucp_Zanabazar_Square },
+  { 3544, PT_SC, ucp_Inherited },
+  { 3549, PT_PC, ucp_Zl },
+  { 3552, PT_PC, ucp_Zp },
+  { 3555, PT_PC, ucp_Zs },
+  { 3558, PT_SC, ucp_Common },
+  { 3563, PT_SC, ucp_Unknown }
+};
+
+const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
+
+#endif /* SUPPORT_UNICODE */
+
+/* End of pcre2_ucptables.c */

+ 19 - 1
thirdparty/pcre2/src/pcre2_xclass.c

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
      Original API code Copyright (c) 1997-2012 University of Cambridge
-          New API code Copyright (c) 2016-2019 University of Cambridge
+          New API code Copyright (c) 2016-2022 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -135,6 +135,7 @@ while ((t = *data++) != XCL_END)
     {
     const ucd_record *prop = GET_UCD(c);
     BOOL isprop = t == XCL_PROP;
+    BOOL ok;
 
     switch(*data)
       {
@@ -160,6 +161,12 @@ while ((t = *data++) != XCL_END)
       if ((data[1] == prop->script) == isprop) return !negated;
       break;
 
+      case PT_SCX:
+      ok = (data[1] == prop->script ||
+            MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), data[1]) != 0);
+      if (ok == isprop) return !negated;
+      break;
+
       case PT_ALNUM:
       if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
            PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
@@ -207,6 +214,17 @@ while ((t = *data++) != XCL_END)
         }
       break;
 
+      case PT_BIDICL:
+      if ((UCD_BIDICLASS_PROP(prop) == data[1]) == isprop)
+        return !negated;
+      break;
+
+      case PT_BOOL:
+      ok = MAPBIT(PRIV(ucd_boolprop_sets) +
+        UCD_BPROPS_PROP(prop), data[1]) != 0;
+      if (ok == isprop) return !negated;
+      break;
+
       /* The following three properties can occur only in an XCLASS, as there
       is no \p or \P coding for them. */
 

+ 75 - 37
thirdparty/pcre2/src/sljit/sljitConfigInternal.h

@@ -60,7 +60,7 @@ extern "C" {
      SLJIT_LITTLE_ENDIAN : little endian architecture
      SLJIT_BIG_ENDIAN : big endian architecture
      SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!)
-     SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information
+     SLJIT_INDIRECT_CALL : see SLJIT_FUNC_ADDR() for more information
 
    Constants:
      SLJIT_NUMBER_OF_REGISTERS : number of available registers
@@ -148,7 +148,7 @@ extern "C" {
 #endif
 #elif defined (__aarch64__)
 #define SLJIT_CONFIG_ARM_64 1
-#elif defined(__ppc64__) || defined(__powerpc64__) || defined(_ARCH_PPC64) || (defined(_POWER) && defined(__64BIT__))
+#elif defined(__ppc64__) || defined(__powerpc64__) || (defined(_ARCH_PPC64) && defined(__64BIT__)) || (defined(_POWER) && defined(__64BIT__))
 #define SLJIT_CONFIG_PPC_64 1
 #elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(_ARCH_PWR) || defined(_ARCH_PWR2) || defined(_POWER)
 #define SLJIT_CONFIG_PPC_32 1
@@ -156,7 +156,7 @@ extern "C" {
 #define SLJIT_CONFIG_MIPS_32 1
 #elif defined(__mips64)
 #define SLJIT_CONFIG_MIPS_64 1
-#elif defined(__sparc__) || defined(__sparc)
+#elif (defined(__sparc__) || defined(__sparc)) && !defined(_LP64)
 #define SLJIT_CONFIG_SPARC_32 1
 #elif defined(__s390x__)
 #define SLJIT_CONFIG_S390X 1
@@ -274,9 +274,13 @@ extern "C" {
 
 #ifndef SLJIT_INLINE
 /* Inline functions. Some old compilers do not support them. */
-#if defined(__SUNPRO_C) && __SUNPRO_C <= 0x510
+#ifdef __SUNPRO_C
+#if __SUNPRO_C < 0x560
 #define SLJIT_INLINE
 #else
+#define SLJIT_INLINE inline
+#endif /* __SUNPRO_C */
+#else
 #define SLJIT_INLINE __inline
 #endif
 #endif /* !SLJIT_INLINE */
@@ -319,18 +323,36 @@ extern "C" {
 /* Instruction cache flush. */
 /****************************/
 
+/*
+ * TODO:
+ *
+ * clang >= 15 could be safe to enable below
+ * older versions are known to abort in some targets
+ * https://github.com/PhilipHazel/pcre2/issues/92
+ *
+ * beware APPLE is known to have removed the code in iOS so
+ * it will need to be excempted or result in broken builds
+ */
 #if (!defined SLJIT_CACHE_FLUSH && defined __has_builtin)
-#if __has_builtin(__builtin___clear_cache)
+#if __has_builtin(__builtin___clear_cache) && !defined(__clang__)
 
+/*
+ * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=91248
+ * https://gcc.gnu.org/bugzilla//show_bug.cgi?id=93811
+ * gcc's clear_cache builtin for power and sparc are broken
+ */
+#if !defined(SLJIT_CONFIG_PPC) && !defined(SLJIT_CONFIG_SPARC_32)
 #define SLJIT_CACHE_FLUSH(from, to) \
 	__builtin___clear_cache((char*)(from), (char*)(to))
+#endif
 
-#endif /* __has_builtin(__builtin___clear_cache) */
+#endif /* gcc >= 10 */
 #endif /* (!defined SLJIT_CACHE_FLUSH && defined __has_builtin) */
 
 #ifndef SLJIT_CACHE_FLUSH
 
-#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
 
 /* Not required to implement on archs with unified caches. */
 #define SLJIT_CACHE_FLUSH(from, to)
@@ -340,9 +362,9 @@ extern "C" {
 /* Supported by all macs since Mac OS 10.5.
    However, it does not work on non-jailbroken iOS devices,
    although the compilation is successful. */
-
+#include <libkern/OSCacheControl.h>
 #define SLJIT_CACHE_FLUSH(from, to) \
-	sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from))
+	sys_icache_invalidate((void*)(from), (size_t)((char*)(to) - (char*)(from)))
 
 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 
@@ -351,33 +373,33 @@ extern "C" {
 	ppc_cache_flush((from), (to))
 #define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 
-#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 
+/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
 #define SLJIT_CACHE_FLUSH(from, to) \
-	__builtin___clear_cache((char*)(from), (char*)(to))
-
-#elif defined __ANDROID__
+	sparc_cache_flush((from), (to))
+#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
 
-/* Android lacks __clear_cache; instead, cacheflush should be used. */
+#elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || defined(__clang__)
 
 #define SLJIT_CACHE_FLUSH(from, to) \
-    cacheflush((long)(from), (long)(to), 0)
+	__builtin___clear_cache((char*)(from), (char*)(to))
 
-#elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
+#elif defined __ANDROID__
 
-/* The __clear_cache() implementation of GCC is a dummy function on Sparc. */
+/* Android ARMv7 with gcc lacks __clear_cache; use cacheflush instead. */
+#include <sys/cachectl.h>
 #define SLJIT_CACHE_FLUSH(from, to) \
-	sparc_cache_flush((from), (to))
-#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
+	cacheflush((long)(from), (long)(to), 0)
 
 #elif defined _WIN32
 
 #define SLJIT_CACHE_FLUSH(from, to) \
-	FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
+	FlushInstructionCache(GetCurrentProcess(), (void*)(from), (char*)(to) - (char*)(from))
 
 #else
 
-/* Calls __ARM_NR_cacheflush on ARM-Linux. */
+/* Call __ARM_NR_cacheflush on ARM-Linux or the corresponding MIPS syscall. */
 #define SLJIT_CACHE_FLUSH(from, to) \
 	__clear_cache((char*)(from), (char*)(to))
 
@@ -645,18 +667,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 
 #define SLJIT_NUMBER_OF_REGISTERS 12
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 9
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
 #define SLJIT_PREF_SHIFT_REG SLJIT_R2
 
 #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 
 #define SLJIT_NUMBER_OF_REGISTERS 13
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
 #ifndef _WIN64
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 #else /* _WIN64 */
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
-#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset)
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 10
+#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
 #endif /* !_WIN64 */
 #define SLJIT_PREF_SHIFT_REG SLJIT_R3
 
@@ -664,31 +691,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 
 #define SLJIT_NUMBER_OF_REGISTERS 12
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_LOCALS_OFFSET_BASE 0
 
 #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2)
 
 #define SLJIT_NUMBER_OF_REGISTERS 12
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_LOCALS_OFFSET_BASE 0
 
 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
 
 #define SLJIT_NUMBER_OF_REGISTERS 26
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE 0
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
+#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
 
 #elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
 
 #define SLJIT_NUMBER_OF_REGISTERS 23
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
-#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
 #elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
 /* Add +1 for double alignment. */
-#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1) * (sljit_s32)sizeof(sljit_sw))
 #else
-#define SLJIT_LOCALS_OFFSET_BASE (3 * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE (3 * (sljit_s32)sizeof(sljit_sw))
 #endif /* SLJIT_CONFIG_PPC_64 || _AIX */
 
 #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
@@ -696,19 +731,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_REGISTERS 21
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
-#define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE (4 * (sljit_s32)sizeof(sljit_sw))
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 13
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 6
 #else
 #define SLJIT_LOCALS_OFFSET_BASE 0
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #endif
 
 #elif (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC)
 
 #define SLJIT_NUMBER_OF_REGISTERS 18
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 14
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 /* saved registers (16), return struct pointer (1), space for 6 argument words (1),
    4th double arg (2), double alignment (1). */
-#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE ((16 + 1 + 6 + 2 + 1) * (sljit_s32)sizeof(sljit_sw))
 #endif
 
 #elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
@@ -736,12 +777,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 
 #define SLJIT_NUMBER_OF_REGISTERS 12
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
 #define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
 
 #elif (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
 
 #define SLJIT_NUMBER_OF_REGISTERS 0
 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
+#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
+#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
 #define SLJIT_LOCALS_OFFSET_BASE 0
 
 #endif
@@ -751,13 +796,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 #define SLJIT_NUMBER_OF_SCRATCH_REGISTERS \
 	(SLJIT_NUMBER_OF_REGISTERS - SLJIT_NUMBER_OF_SAVED_REGISTERS)
 
-#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 6
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && (defined _WIN64)
-#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 1
-#else
-#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
-#endif
-
 #define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
 	(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)
 
@@ -765,8 +803,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
 /* CPU status flags management. */
 /********************************/
 
-#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
-	|| (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) \
+#if (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) \
+	|| (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \
 	|| (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) \
 	|| (defined SLJIT_CONFIG_SPARC && SLJIT_CONFIG_SPARC) \
 	|| (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)

+ 12 - 4
thirdparty/pcre2/src/sljit/sljitExecAllocator.c

@@ -66,7 +66,7 @@
 /* --------------------------------------------------------------------- */
 
 /* 64 KByte. */
-#define CHUNK_SIZE	0x10000
+#define CHUNK_SIZE	(sljit_uw)0x10000u
 
 /*
    alloc_chunk / free_chunk :
@@ -112,7 +112,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
 
 static SLJIT_INLINE int get_map_jit_flag()
 {
-	sljit_sw page_size;
+	size_t page_size;
 	void *ptr;
 	struct utsname name;
 	static int map_jit_flag = -1;
@@ -139,8 +139,9 @@ static SLJIT_INLINE int get_map_jit_flag()
 #endif /* MAP_ANON */
 #else /* !SLJIT_CONFIG_X86 */
 #if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
-#error Unsupported architecture
+#error "Unsupported architecture"
 #endif /* SLJIT_CONFIG_ARM */
+#include <AvailabilityMacros.h>
 #include <pthread.h>
 
 #define SLJIT_MAP_JIT	(MAP_JIT)
@@ -149,7 +150,11 @@ static SLJIT_INLINE int get_map_jit_flag()
 
 static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec)
 {
+#if MAC_OS_X_VERSION_MIN_REQUIRED >= 110000
 	pthread_jit_write_protect_np(enable_exec);
+#else
+#error "Must target Big Sur or newer"
+#endif /* BigSur */
 }
 #endif /* SLJIT_CONFIG_X86 */
 #else /* !TARGET_OS_OSX */
@@ -187,10 +192,13 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
 	if (retval == MAP_FAILED)
 		return NULL;
 
+#ifdef __FreeBSD__
+        /* HardenedBSD's mmap lies, so check permissions again */
 	if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) {
 		munmap(retval, size);
 		return NULL;
 	}
+#endif /* FreeBSD */
 
 	SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0);
 
@@ -227,7 +235,7 @@ struct free_block {
 #define AS_FREE_BLOCK(base, offset) \
 	((struct free_block*)(((sljit_u8*)base) + offset))
 #define MEM_START(base)		((void*)(((sljit_u8*)base) + sizeof(struct block_header)))
-#define ALIGN_SIZE(size)	(((size) + sizeof(struct block_header) + 7) & ~7)
+#define ALIGN_SIZE(size)	(((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7)
 
 static struct free_block* free_blocks;
 static sljit_uw allocated_size;

文件差异内容过多而无法显示
+ 268 - 251
thirdparty/pcre2/src/sljit/sljitLir.c


+ 241 - 193
thirdparty/pcre2/src/sljit/sljitLir.h

@@ -163,13 +163,6 @@ extern "C" {
         is not available at all.
 */
 
-/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1
-   or sljit_emit_op2 operations the result is discarded. Some status
-   flags must be set when the destination is SLJIT_UNUSED, because the
-   operation would have no effect otherwise. Other SLJIT operations do
-   not support SLJIT_UNUSED as a destination operand. */
-#define SLJIT_UNUSED		0
-
 /* Scratch registers. */
 #define SLJIT_R0	1
 #define SLJIT_R1	2
@@ -231,9 +224,6 @@ extern "C" {
    value. The FR and FS register sets are overlap in the same way as R
    and S register sets. See above. */
 
-/* Note: SLJIT_UNUSED as destination is not valid for floating point
-   operations, since they cannot be used for setting flags. */
-
 /* Floating point scratch registers. */
 #define SLJIT_FR0	1
 #define SLJIT_FR1	2
@@ -263,39 +253,38 @@ extern "C" {
 /*  Argument type definitions                                            */
 /* --------------------------------------------------------------------- */
 
-/* Argument type definitions.
-   Used by SLJIT_[DEF_]ARGx and SLJIT_[DEF]_RET macros. */
-
-#define SLJIT_ARG_TYPE_VOID 0
-#define SLJIT_ARG_TYPE_SW 1
-#define SLJIT_ARG_TYPE_UW 2
-#define SLJIT_ARG_TYPE_S32 3
-#define SLJIT_ARG_TYPE_U32 4
-#define SLJIT_ARG_TYPE_F32 5
-#define SLJIT_ARG_TYPE_F64 6
-
 /* The following argument type definitions are used by sljit_emit_enter,
    sljit_set_context, sljit_emit_call, and sljit_emit_icall functions.
-   The following return type definitions are used by sljit_emit_call
-   and sljit_emit_icall functions.
 
-   When a function is called, the first integer argument must be placed
-   in SLJIT_R0, the second in SLJIT_R1, and so on. Similarly the first
-   floating point argument must be placed in SLJIT_FR0, the second in
-   SLJIT_FR1, and so on.
+   As for sljit_emit_call and sljit_emit_icall, the first integer argument
+   must be placed into SLJIT_R0, the second one into SLJIT_R1, and so on.
+   Similarly the first floating point argument must be placed into SLJIT_FR0,
+   the second one into SLJIT_FR1, and so on.
+
+   As for sljit_emit_enter, the integer arguments can be stored in scratch
+   or saved registers. The first integer argument without _R postfix is
+   stored in SLJIT_S0, the next one in SLJIT_S1, and so on. The integer
+   arguments with _R postfix are placed into scratch registers. The index
+   of the scratch register is the count of the previous integer arguments
+   starting from SLJIT_R0. The floating point arguments are always placed
+   into SLJIT_FR0, SLJIT_FR1, and so on.
+
+   Note: if a function is called by sljit_emit_call/sljit_emit_icall and
+         an argument is stored in a scratch register by sljit_emit_enter,
+         that argument uses the same scratch register index for both
+         integer and floating point arguments.
 
    Example function definition:
-     sljit_f32 SLJIT_FUNC example_c_callback(sljit_sw arg_a,
+     sljit_f32 SLJIT_FUNC example_c_callback(void *arg_a,
          sljit_f64 arg_b, sljit_u32 arg_c, sljit_f32 arg_d);
 
    Argument type definition:
-     SLJIT_DEF_RET(SLJIT_ARG_TYPE_F32)
-        | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F64)
-        | SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_U32) | SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_F32)
+     SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_F32)
+        | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_P, 1) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F64, 2)
+        | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_32, 3) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 4)
 
    Short form of argument type definition:
-     SLJIT_RET(F32) | SLJIT_ARG1(SW) | SLJIT_ARG2(F64)
-        | SLJIT_ARG3(S32) | SLJIT_ARG4(F32)
+     SLJIT_ARGS4(32, P, F64, 32, F32)
 
    Argument passing:
      arg_a must be placed in SLJIT_R0
@@ -303,34 +292,73 @@ extern "C" {
      arg_b must be placed in SLJIT_FR0
      arg_d must be placed in SLJIT_FR1
 
-Note:
-   The SLJIT_ARG_TYPE_VOID type is only supported by
-   SLJIT_DEF_RET, and SLJIT_ARG_TYPE_VOID is also the
-   default value when SLJIT_DEF_RET is not specified. */
-#define SLJIT_DEF_SHIFT 4
-#define SLJIT_DEF_RET(type) (type)
-#define SLJIT_DEF_ARG1(type) ((type) << SLJIT_DEF_SHIFT)
-#define SLJIT_DEF_ARG2(type) ((type) << (2 * SLJIT_DEF_SHIFT))
-#define SLJIT_DEF_ARG3(type) ((type) << (3 * SLJIT_DEF_SHIFT))
-#define SLJIT_DEF_ARG4(type) ((type) << (4 * SLJIT_DEF_SHIFT))
+   Examples for argument processing by sljit_emit_enter:
+     SLJIT_ARGS4(VOID, P, 32_R, F32, W)
+     Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_FR0, SLJIT_S1
+
+     SLJIT_ARGS4(VOID, W, W_R, W, W_R)
+     Arguments are placed into: SLJIT_S0, SLJIT_R1, SLJIT_S1, SLJIT_R3
 
-/* Short form of the macros above.
+     SLJIT_ARGS4(VOID, F64, W, F32, W_R)
+     Arguments are placed into: SLJIT_FR0, SLJIT_S0, SLJIT_FR1, SLJIT_R1
 
-   For example the following definition:
-   SLJIT_DEF_RET(SLJIT_ARG_TYPE_SW) | SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_F32)
+     Note: it is recommended to pass the scratch arguments first
+     followed by the saved arguments:
+
+       SLJIT_ARGS4(VOID, W_R, W_R, W, W)
+       Arguments are placed into: SLJIT_R0, SLJIT_R1, SLJIT_S0, SLJIT_S1
+*/
+
+/* The following flag is only allowed for the integer arguments of
+   sljit_emit_enter. When the flag is set, the integer argument is
+   stored in a scratch register instead of a saved register. */
+#define SLJIT_ARG_TYPE_SCRATCH_REG 0x8
+
+/* Void result, can only be used by SLJIT_ARG_RETURN. */
+#define SLJIT_ARG_TYPE_VOID	0
+/* Machine word sized integer argument or result. */
+#define SLJIT_ARG_TYPE_W	1
+#define SLJIT_ARG_TYPE_W_R	(SLJIT_ARG_TYPE_W | SLJIT_ARG_TYPE_SCRATCH_REG)
+/* 32 bit integer argument or result. */
+#define SLJIT_ARG_TYPE_32	2
+#define SLJIT_ARG_TYPE_32_R	(SLJIT_ARG_TYPE_32 | SLJIT_ARG_TYPE_SCRATCH_REG)
+/* Pointer sized integer argument or result. */
+#define SLJIT_ARG_TYPE_P	3
+#define SLJIT_ARG_TYPE_P_R	(SLJIT_ARG_TYPE_P | SLJIT_ARG_TYPE_SCRATCH_REG)
+/* 64 bit floating point argument or result. */
+#define SLJIT_ARG_TYPE_F64	4
+/* 32 bit floating point argument or result. */
+#define SLJIT_ARG_TYPE_F32	5
+
+#define SLJIT_ARG_SHIFT 4
+#define SLJIT_ARG_RETURN(type) (type)
+#define SLJIT_ARG_VALUE(type, idx) ((type) << ((idx) * SLJIT_ARG_SHIFT))
+
+/* Simplified argument list definitions.
+
+   The following definition:
+       SLJIT_ARG_RETURN(SLJIT_ARG_TYPE_W) | SLJIT_ARG_VALUE(SLJIT_ARG_TYPE_F32, 1)
 
    can be shortened to:
-   SLJIT_RET(SW) | SLJIT_ARG1(F32)
-
-Note:
-   The VOID type is only supported by SLJIT_RET, and
-   VOID is also the default value when SLJIT_RET is
-   not specified. */
-#define SLJIT_RET(type) SLJIT_DEF_RET(SLJIT_ARG_TYPE_ ## type)
-#define SLJIT_ARG1(type) SLJIT_DEF_ARG1(SLJIT_ARG_TYPE_ ## type)
-#define SLJIT_ARG2(type) SLJIT_DEF_ARG2(SLJIT_ARG_TYPE_ ## type)
-#define SLJIT_ARG3(type) SLJIT_DEF_ARG3(SLJIT_ARG_TYPE_ ## type)
-#define SLJIT_ARG4(type) SLJIT_DEF_ARG4(SLJIT_ARG_TYPE_ ## type)
+       SLJIT_ARGS1(W, F32)
+*/
+
+#define SLJIT_ARG_TO_TYPE(type) SLJIT_ARG_TYPE_ ## type
+
+#define SLJIT_ARGS0(ret) \
+	SLJIT_ARG_RETURN(SLJIT_ARG_TO_TYPE(ret))
+
+#define SLJIT_ARGS1(ret, arg1) \
+	(SLJIT_ARGS0(ret) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg1), 1))
+
+#define SLJIT_ARGS2(ret, arg1, arg2) \
+	(SLJIT_ARGS1(ret, arg1) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg2), 2))
+
+#define SLJIT_ARGS3(ret, arg1, arg2, arg3) \
+	(SLJIT_ARGS2(ret, arg1, arg2) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg3), 3))
+
+#define SLJIT_ARGS4(ret, arg1, arg2, arg3, arg4) \
+	(SLJIT_ARGS3(ret, arg1, arg2, arg3) | SLJIT_ARG_VALUE(SLJIT_ARG_TO_TYPE(arg4), 4))
 
 /* --------------------------------------------------------------------- */
 /*  Main structures and functions                                        */
@@ -408,7 +436,7 @@ struct sljit_compiler {
 	/* Code size. */
 	sljit_uw size;
 	/* Relative offset of the executable mapping from the writable mapping. */
-	sljit_uw executable_offset;
+	sljit_sw executable_offset;
 	/* Executable size for statistical purposes. */
 	sljit_uw executable_size;
 
@@ -417,17 +445,13 @@ struct sljit_compiler {
 #endif
 
 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
-	sljit_s32 args;
+	sljit_s32 args_size;
 	sljit_s32 locals_offset;
-	sljit_s32 saveds_offset;
-	sljit_s32 stack_tmp_size;
+	sljit_s32 scratches_offset;
 #endif
 
 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
 	sljit_s32 mode32;
-#ifdef _WIN64
-	sljit_s32 locals_offset;
-#endif
 #endif
 
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
@@ -444,10 +468,14 @@ struct sljit_compiler {
 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
 	/* Temporary fields. */
 	sljit_uw shift_imm;
+#endif /* SLJIT_CONFIG_ARM_V5 || SLJIT_CONFIG_ARM_V7 */
+
+#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) && (defined __SOFTFP__)
+	sljit_uw args_size;
 #endif
 
 #if (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
-	sljit_sw imm;
+	sljit_u32 imm;
 #endif
 
 #if (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
@@ -456,6 +484,10 @@ struct sljit_compiler {
 	sljit_sw cache_argw;
 #endif
 
+#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
+	sljit_uw args_size;
+#endif
+
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 	sljit_s32 delay_slot;
 	sljit_s32 cache_arg;
@@ -476,7 +508,9 @@ struct sljit_compiler {
 	/* Flags specified by the last arithmetic instruction.
 	   It contains the type of the variable flag. */
 	sljit_s32 last_flags;
-	/* Local size passed to the functions. */
+	/* Return value type set by entry functions. */
+	sljit_s32 last_return;
+	/* Local size passed to entry functions. */
 	sljit_s32 logical_local_size;
 #endif
 
@@ -615,38 +649,43 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
    available options are listed before sljit_emit_enter.
 
    The function argument list is the combination of SLJIT_ARGx
-   (SLJIT_DEF_ARG1) macros. Currently maximum 3 SW / UW
-   (SLJIT_ARG_TYPE_SW / LJIT_ARG_TYPE_UW) arguments are supported.
-   The first argument goes to SLJIT_S0, the second goes to SLJIT_S1
-   and so on. The register set used by the function must be declared
-   as well. The number of scratch and saved registers used by the
-   function must be passed to sljit_emit_enter. Only R registers
-   between R0 and "scratches" argument can be used later. E.g. if
-   "scratches" is set to 2, the scratch register set will be limited
-   to SLJIT_R0 and SLJIT_R1. The S registers and the floating point
-   registers ("fscratches" and "fsaveds") are specified in a similar
-   manner. The sljit_emit_enter is also capable of allocating a stack
-   space for local variables. The "local_size" argument contains the
-   size in bytes of this local area and its staring address is stored
+   (SLJIT_DEF_ARG1) macros. Currently maximum 4 arguments are
+   supported. The first integer argument is loaded into SLJIT_S0,
+   the second one is loaded into SLJIT_S1, and so on. Similarly,
+   the first floating point argument is loaded into SLJIT_FR0,
+   the second one is loaded into SLJIT_FR1, and so on. Furthermore
+   the register set used by the function must be declared as well.
+   The number of scratch and saved registers used by the function
+   must be passed to sljit_emit_enter. Only R registers between R0
+   and "scratches" argument can be used later. E.g. if "scratches"
+   is set to 2, the scratch register set will be limited to SLJIT_R0
+    and SLJIT_R1. The S registers and the floating point registers
+   ("fscratches" and "fsaveds") are specified in a similar manner.
+   The sljit_emit_enter is also capable of allocating a stack space
+   for local variables. The "local_size" argument contains the size
+   in bytes of this local area and its staring address is stored
    in SLJIT_SP. The memory area between SLJIT_SP (inclusive) and
    SLJIT_SP + local_size (exclusive) can be modified freely until
    the function returns. The stack space is not initialized.
 
    Note: the following conditions must met:
          0 <= scratches <= SLJIT_NUMBER_OF_REGISTERS
-         0 <= saveds <= SLJIT_NUMBER_OF_REGISTERS
+         0 <= saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS
          scratches + saveds <= SLJIT_NUMBER_OF_REGISTERS
          0 <= fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
-         0 <= fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
+         0 <= fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS
          fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS
 
+   Note: the compiler can use saved registers as scratch registers,
+         but the opposite is not supported
+
    Note: every call of sljit_emit_enter and sljit_set_context
          overwrites the previous context.
 */
 
-/* The absolute address returned by sljit_get_local_base with
-offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */
-#define SLJIT_F64_ALIGNMENT 0x00000001
+/* The compiled function uses cdecl calling
+ * convention instead of SLJIT_FUNC. */
+#define SLJIT_ENTER_CDECL 0x00000001
 
 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
 #define SLJIT_MAX_LOCAL_SIZE	65536
@@ -657,7 +696,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 
 /* The machine code has a context (which contains the local stack space size,
    number of used registers, etc.) which initialized by sljit_emit_enter. Several
-   functions (like sljit_emit_return) requres this context to be able to generate
+   functions (such as sljit_emit_return) requres this context to be able to generate
    the appropriate code. However, some code fragments (like inline cache) may have
    no normal entry point so their context is unknown for the compiler. Their context
    can be provided to the compiler by the sljit_set_context function.
@@ -669,11 +708,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size);
 
-/* Return from machine code.  The op argument can be SLJIT_UNUSED which means the
-   function does not return with anything or any opcode between SLJIT_MOV and
-   SLJIT_MOV_P (see sljit_emit_op1). As for src and srcw they must be 0 if op
-   is SLJIT_UNUSED, otherwise see below the description about source and
-   destination arguments. */
+/* Return from machine code. The sljit_emit_return_void function does not return with
+   any value. The sljit_emit_return function returns with a single value which stores
+   the result of a data move instruction. The instruction is specified by the op
+   argument, and must be between SLJIT_MOV and SLJIT_MOV_P (see sljit_emit_op1). */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler);
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw);
@@ -766,7 +806,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
 #define SLJIT_MEM2(r1, r2)	(SLJIT_MEM | (r1) | ((r2) << 8))
 #define SLJIT_IMM		0x40
 
-/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on
+/* Sets 32 bit operation mode on 64 bit CPUs. This option is ignored on
    32 bit CPUs. When this option is set for an arithmetic operation, only
    the lower 32 bit of the input registers are used, and the CPU status
    flags are set according to the 32 bit result. Although the higher 32 bit
@@ -774,12 +814,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
    be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU
    requirements all source registers must be the result of those operations
    where this option was also set. Memory loads read 32 bit values rather
-   than 64 bit ones. In other words 32 bit and 64 bit operations cannot
-   be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
-   register can hold any 32 or 64 bit value, and it is converted to a 32 bit
-   compatible format first. This conversion is free (no instructions are
-   emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
-   value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
+   than 64 bit ones. In other words 32 bit and 64 bit operations cannot be
+   mixed. The only exception is SLJIT_MOV32 whose source register can hold
+   any 32 or 64 bit value, and it is converted to a 32 bit compatible format
+   first. This conversion is free (no instructions are emitted) on most CPUs.
+   A 32 bit value can also be converted to a 64 bit value by SLJIT_MOV_S32
+   (sign extension) or SLJIT_MOV_U32 (zero extension).
+
+   As for floating-point operations, this option sets 32 bit single
+   precision mode. Similar to the integer operations, all register arguments
+   must be the result of those operations where this option was also set.
 
    Note: memory addressing always uses 64 bit values on 64 bit systems so
          the result of a 32 bit operation must not be used with SLJIT_MEMx
@@ -788,22 +832,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
    This option is part of the instruction name, so there is no need to
    manually set it. E.g:
 
-     SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */
-#define SLJIT_I32_OP		0x100
-
-/* Set F32 (single) precision mode for floating-point computation. This
-   option is similar to SLJIT_I32_OP, it just applies to floating point
-   registers. When this option is passed, the CPU performs 32 bit floating
-   point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all
-   register arguments must be the result of those operations where this
-   option was also set.
-
-   This option is part of the instruction name, so there is no need to
-   manually set it. E.g:
-
-     SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP)
- */
-#define SLJIT_F32_OP		SLJIT_I32_OP
+     SLJIT_ADD32 == (SLJIT_ADD | SLJIT_32) */
+#define SLJIT_32		0x100
 
 /* Many CPUs (x86, ARM, PPC) have status flags which can be set according
    to the result of an operation. Other CPUs (MIPS) do not have status
@@ -887,7 +917,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
    The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
    Note: if SLJIT_R1 is 0, the behaviour is undefined. */
 #define SLJIT_DIVMOD_UW			(SLJIT_OP0_BASE + 4)
-#define SLJIT_DIVMOD_U32		(SLJIT_DIVMOD_UW | SLJIT_I32_OP)
+#define SLJIT_DIVMOD_U32		(SLJIT_DIVMOD_UW | SLJIT_32)
 /* Flags: - (may destroy flags)
    Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0 and the remainder into SLJIT_R1.
@@ -895,13 +925,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
    Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
          the behaviour is undefined. */
 #define SLJIT_DIVMOD_SW			(SLJIT_OP0_BASE + 5)
-#define SLJIT_DIVMOD_S32		(SLJIT_DIVMOD_SW | SLJIT_I32_OP)
+#define SLJIT_DIVMOD_S32		(SLJIT_DIVMOD_SW | SLJIT_32)
 /* Flags: - (may destroy flags)
    Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
    Note: if SLJIT_R1 is 0, the behaviour is undefined. */
 #define SLJIT_DIV_UW			(SLJIT_OP0_BASE + 6)
-#define SLJIT_DIV_U32			(SLJIT_DIV_UW | SLJIT_I32_OP)
+#define SLJIT_DIV_U32			(SLJIT_DIV_UW | SLJIT_32)
 /* Flags: - (may destroy flags)
    Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1.
    The result is placed into SLJIT_R0. SLJIT_R1 preserves its value.
@@ -909,7 +939,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *
    Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00),
          the behaviour is undefined. */
 #define SLJIT_DIV_SW			(SLJIT_OP0_BASE + 7)
-#define SLJIT_DIV_S32			(SLJIT_DIV_SW | SLJIT_I32_OP)
+#define SLJIT_DIV_S32			(SLJIT_DIV_SW | SLJIT_32)
 /* Flags: - (does not modify flags)
    ENDBR32 instruction for x86-32 and ENDBR64 instruction for x86-64
    when Intel Control-flow Enforcement Technology (CET) is enabled.
@@ -941,16 +971,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
 #define SLJIT_MOV			(SLJIT_OP1_BASE + 0)
 /* Flags: - (does not modify flags) */
 #define SLJIT_MOV_U8			(SLJIT_OP1_BASE + 1)
-#define SLJIT_MOV32_U8			(SLJIT_MOV_U8 | SLJIT_I32_OP)
+#define SLJIT_MOV32_U8			(SLJIT_MOV_U8 | SLJIT_32)
 /* Flags: - (does not modify flags) */
 #define SLJIT_MOV_S8			(SLJIT_OP1_BASE + 2)
-#define SLJIT_MOV32_S8			(SLJIT_MOV_S8 | SLJIT_I32_OP)
+#define SLJIT_MOV32_S8			(SLJIT_MOV_S8 | SLJIT_32)
 /* Flags: - (does not modify flags) */
 #define SLJIT_MOV_U16			(SLJIT_OP1_BASE + 3)
-#define SLJIT_MOV32_U16			(SLJIT_MOV_U16 | SLJIT_I32_OP)
+#define SLJIT_MOV32_U16			(SLJIT_MOV_U16 | SLJIT_32)
 /* Flags: - (does not modify flags) */
 #define SLJIT_MOV_S16			(SLJIT_OP1_BASE + 4)
-#define SLJIT_MOV32_S16			(SLJIT_MOV_S16 | SLJIT_I32_OP)
+#define SLJIT_MOV32_S16			(SLJIT_MOV_S16 | SLJIT_32)
 /* Flags: - (does not modify flags)
    Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */
 #define SLJIT_MOV_U32			(SLJIT_OP1_BASE + 5)
@@ -958,25 +988,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
    Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */
 #define SLJIT_MOV_S32			(SLJIT_OP1_BASE + 6)
 /* Flags: - (does not modify flags) */
-#define SLJIT_MOV32			(SLJIT_MOV_S32 | SLJIT_I32_OP)
+#define SLJIT_MOV32			(SLJIT_OP1_BASE + 7)
 /* Flags: - (does not modify flags)
    Note: load a pointer sized data, useful on x32 (a 32 bit mode on x86-64
          where all x64 features are available, e.g. 16 register) or similar
          compiling modes */
-#define SLJIT_MOV_P			(SLJIT_OP1_BASE + 7)
+#define SLJIT_MOV_P			(SLJIT_OP1_BASE + 8)
 /* Flags: Z
    Note: immediate source argument is not supported */
-#define SLJIT_NOT			(SLJIT_OP1_BASE + 8)
-#define SLJIT_NOT32			(SLJIT_NOT | SLJIT_I32_OP)
-/* Flags: Z | OVERFLOW
-   Note: immediate source argument is not supported */
-#define SLJIT_NEG			(SLJIT_OP1_BASE + 9)
-#define SLJIT_NEG32			(SLJIT_NEG | SLJIT_I32_OP)
+#define SLJIT_NOT			(SLJIT_OP1_BASE + 9)
+#define SLJIT_NOT32			(SLJIT_NOT | SLJIT_32)
 /* Count leading zeroes
    Flags: - (may destroy flags)
    Note: immediate source argument is not supported */
 #define SLJIT_CLZ			(SLJIT_OP1_BASE + 10)
-#define SLJIT_CLZ32			(SLJIT_CLZ | SLJIT_I32_OP)
+#define SLJIT_CLZ32			(SLJIT_CLZ | SLJIT_32)
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 dst, sljit_sw dstw,
@@ -987,58 +1013,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 
 /* Flags: Z | OVERFLOW | CARRY */
 #define SLJIT_ADD			(SLJIT_OP2_BASE + 0)
-#define SLJIT_ADD32			(SLJIT_ADD | SLJIT_I32_OP)
+#define SLJIT_ADD32			(SLJIT_ADD | SLJIT_32)
 /* Flags: CARRY */
 #define SLJIT_ADDC			(SLJIT_OP2_BASE + 1)
-#define SLJIT_ADDC32			(SLJIT_ADDC | SLJIT_I32_OP)
+#define SLJIT_ADDC32			(SLJIT_ADDC | SLJIT_32)
 /* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL
           SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER
           SIG_LESS_EQUAL | CARRY */
 #define SLJIT_SUB			(SLJIT_OP2_BASE + 2)
-#define SLJIT_SUB32			(SLJIT_SUB | SLJIT_I32_OP)
+#define SLJIT_SUB32			(SLJIT_SUB | SLJIT_32)
 /* Flags: CARRY */
 #define SLJIT_SUBC			(SLJIT_OP2_BASE + 3)
-#define SLJIT_SUBC32			(SLJIT_SUBC | SLJIT_I32_OP)
+#define SLJIT_SUBC32			(SLJIT_SUBC | SLJIT_32)
 /* Note: integer mul
    Flags: OVERFLOW */
 #define SLJIT_MUL			(SLJIT_OP2_BASE + 4)
-#define SLJIT_MUL32			(SLJIT_MUL | SLJIT_I32_OP)
+#define SLJIT_MUL32			(SLJIT_MUL | SLJIT_32)
 /* Flags: Z */
 #define SLJIT_AND			(SLJIT_OP2_BASE + 5)
-#define SLJIT_AND32			(SLJIT_AND | SLJIT_I32_OP)
+#define SLJIT_AND32			(SLJIT_AND | SLJIT_32)
 /* Flags: Z */
 #define SLJIT_OR			(SLJIT_OP2_BASE + 6)
-#define SLJIT_OR32			(SLJIT_OR | SLJIT_I32_OP)
+#define SLJIT_OR32			(SLJIT_OR | SLJIT_32)
 /* Flags: Z */
 #define SLJIT_XOR			(SLJIT_OP2_BASE + 7)
-#define SLJIT_XOR32			(SLJIT_XOR | SLJIT_I32_OP)
+#define SLJIT_XOR32			(SLJIT_XOR | SLJIT_32)
 /* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
    to bit_length - 1, the result is undefined. */
 #define SLJIT_SHL			(SLJIT_OP2_BASE + 8)
-#define SLJIT_SHL32			(SLJIT_SHL | SLJIT_I32_OP)
+#define SLJIT_SHL32			(SLJIT_SHL | SLJIT_32)
 /* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
    to bit_length - 1, the result is undefined. */
 #define SLJIT_LSHR			(SLJIT_OP2_BASE + 9)
-#define SLJIT_LSHR32			(SLJIT_LSHR | SLJIT_I32_OP)
+#define SLJIT_LSHR32			(SLJIT_LSHR | SLJIT_32)
 /* Flags: Z
    Let bit_length be the length of the shift operation: 32 or 64.
    If src2 is immediate, src2w is masked by (bit_length - 1).
    Otherwise, if the content of src2 is outside the range from 0
    to bit_length - 1, the result is undefined. */
 #define SLJIT_ASHR			(SLJIT_OP2_BASE + 10)
-#define SLJIT_ASHR32			(SLJIT_ASHR | SLJIT_I32_OP)
+#define SLJIT_ASHR32			(SLJIT_ASHR | SLJIT_32)
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src2, sljit_sw src2w);
 
+/* The sljit_emit_op2u function is the same as sljit_emit_op2 except the result is discarded. */
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w);
+
 /* Starting index of opcodes for sljit_emit_op2. */
 #define SLJIT_OP_SRC_BASE		128
 
@@ -1082,35 +1114,35 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
 
 /* Flags: - (does not modify flags) */
 #define SLJIT_MOV_F64			(SLJIT_FOP1_BASE + 0)
-#define SLJIT_MOV_F32			(SLJIT_MOV_F64 | SLJIT_F32_OP)
+#define SLJIT_MOV_F32			(SLJIT_MOV_F64 | SLJIT_32)
 /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE]
    SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int
    Rounding mode when the destination is W or I: round towards zero. */
-/* Flags: - (does not modify flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_CONV_F64_FROM_F32		(SLJIT_FOP1_BASE + 1)
-#define SLJIT_CONV_F32_FROM_F64		(SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_CONV_F32_FROM_F64		(SLJIT_CONV_F64_FROM_F32 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_CONV_SW_FROM_F64		(SLJIT_FOP1_BASE + 2)
-#define SLJIT_CONV_SW_FROM_F32		(SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_CONV_SW_FROM_F32		(SLJIT_CONV_SW_FROM_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_CONV_S32_FROM_F64		(SLJIT_FOP1_BASE + 3)
-#define SLJIT_CONV_S32_FROM_F32		(SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_CONV_S32_FROM_F32		(SLJIT_CONV_S32_FROM_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_CONV_F64_FROM_SW		(SLJIT_FOP1_BASE + 4)
-#define SLJIT_CONV_F32_FROM_SW		(SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_CONV_F32_FROM_SW		(SLJIT_CONV_F64_FROM_SW | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_CONV_F64_FROM_S32		(SLJIT_FOP1_BASE + 5)
-#define SLJIT_CONV_F32_FROM_S32		(SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP)
+#define SLJIT_CONV_F32_FROM_S32		(SLJIT_CONV_F64_FROM_S32 | SLJIT_32)
 /* Note: dst is the left and src is the right operand for SLJIT_CMPD.
    Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */
 #define SLJIT_CMP_F64			(SLJIT_FOP1_BASE + 6)
-#define SLJIT_CMP_F32			(SLJIT_CMP_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_CMP_F32			(SLJIT_CMP_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_NEG_F64			(SLJIT_FOP1_BASE + 7)
-#define SLJIT_NEG_F32			(SLJIT_NEG_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_NEG_F32			(SLJIT_NEG_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_ABS_F64			(SLJIT_FOP1_BASE + 8)
-#define SLJIT_ABS_F32			(SLJIT_ABS_F64 | SLJIT_F32_OP)
+#define SLJIT_ABS_F32			(SLJIT_ABS_F64 | SLJIT_32)
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 dst, sljit_sw dstw,
@@ -1119,18 +1151,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
 /* Starting index of opcodes for sljit_emit_fop2. */
 #define SLJIT_FOP2_BASE			192
 
-/* Flags: - (does not modify flags) */
+/* Flags: - (may destroy flags) */
 #define SLJIT_ADD_F64			(SLJIT_FOP2_BASE + 0)
-#define SLJIT_ADD_F32			(SLJIT_ADD_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_ADD_F32			(SLJIT_ADD_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_SUB_F64			(SLJIT_FOP2_BASE + 1)
-#define SLJIT_SUB_F32			(SLJIT_SUB_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_SUB_F32			(SLJIT_SUB_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_MUL_F64			(SLJIT_FOP2_BASE + 2)
-#define SLJIT_MUL_F32			(SLJIT_MUL_F64 | SLJIT_F32_OP)
-/* Flags: - (does not modify flags) */
+#define SLJIT_MUL_F32			(SLJIT_MUL_F64 | SLJIT_32)
+/* Flags: - (may destroy flags) */
 #define SLJIT_DIV_F64			(SLJIT_FOP2_BASE + 3)
-#define SLJIT_DIV_F32			(SLJIT_DIV_F64 | SLJIT_F32_OP)
+#define SLJIT_DIV_F32			(SLJIT_DIV_F64 | SLJIT_32)
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 dst, sljit_sw dstw,
@@ -1170,33 +1202,35 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
 #define SLJIT_SET_OVERFLOW		SLJIT_SET(SLJIT_OVERFLOW)
 #define SLJIT_NOT_OVERFLOW		11
 
-/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */
-#define SLJIT_SET_CARRY			SLJIT_SET(12)
+/* Unlike other flags, sljit_emit_jump may destroy this flag. */
+#define SLJIT_CARRY			12
+#define SLJIT_SET_CARRY			SLJIT_SET(SLJIT_CARRY)
+#define SLJIT_NOT_CARRY			13
 
 /* Floating point comparison types. */
 #define SLJIT_EQUAL_F64			14
-#define SLJIT_EQUAL_F32			(SLJIT_EQUAL_F64 | SLJIT_F32_OP)
+#define SLJIT_EQUAL_F32			(SLJIT_EQUAL_F64 | SLJIT_32)
 #define SLJIT_SET_EQUAL_F		SLJIT_SET(SLJIT_EQUAL_F64)
 #define SLJIT_NOT_EQUAL_F64		15
-#define SLJIT_NOT_EQUAL_F32		(SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP)
+#define SLJIT_NOT_EQUAL_F32		(SLJIT_NOT_EQUAL_F64 | SLJIT_32)
 #define SLJIT_SET_NOT_EQUAL_F		SLJIT_SET(SLJIT_NOT_EQUAL_F64)
 #define SLJIT_LESS_F64			16
-#define SLJIT_LESS_F32			(SLJIT_LESS_F64 | SLJIT_F32_OP)
+#define SLJIT_LESS_F32			(SLJIT_LESS_F64 | SLJIT_32)
 #define SLJIT_SET_LESS_F		SLJIT_SET(SLJIT_LESS_F64)
 #define SLJIT_GREATER_EQUAL_F64		17
-#define SLJIT_GREATER_EQUAL_F32		(SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP)
+#define SLJIT_GREATER_EQUAL_F32		(SLJIT_GREATER_EQUAL_F64 | SLJIT_32)
 #define SLJIT_SET_GREATER_EQUAL_F	SLJIT_SET(SLJIT_GREATER_EQUAL_F64)
 #define SLJIT_GREATER_F64		18
-#define SLJIT_GREATER_F32		(SLJIT_GREATER_F64 | SLJIT_F32_OP)
+#define SLJIT_GREATER_F32		(SLJIT_GREATER_F64 | SLJIT_32)
 #define SLJIT_SET_GREATER_F		SLJIT_SET(SLJIT_GREATER_F64)
 #define SLJIT_LESS_EQUAL_F64		19
-#define SLJIT_LESS_EQUAL_F32		(SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP)
+#define SLJIT_LESS_EQUAL_F32		(SLJIT_LESS_EQUAL_F64 | SLJIT_32)
 #define SLJIT_SET_LESS_EQUAL_F		SLJIT_SET(SLJIT_LESS_EQUAL_F64)
 #define SLJIT_UNORDERED_F64		20
-#define SLJIT_UNORDERED_F32		(SLJIT_UNORDERED_F64 | SLJIT_F32_OP)
+#define SLJIT_UNORDERED_F32		(SLJIT_UNORDERED_F64 | SLJIT_32)
 #define SLJIT_SET_UNORDERED_F		SLJIT_SET(SLJIT_UNORDERED_F64)
 #define SLJIT_ORDERED_F64		21
-#define SLJIT_ORDERED_F32		(SLJIT_ORDERED_F64 | SLJIT_F32_OP)
+#define SLJIT_ORDERED_F32		(SLJIT_ORDERED_F64 | SLJIT_32)
 #define SLJIT_SET_ORDERED_F		SLJIT_SET(SLJIT_ORDERED_F64)
 
 /* Unconditional jump types. */
@@ -1211,6 +1245,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
 
 /* The target can be changed during runtime (see: sljit_set_jump_addr). */
 #define SLJIT_REWRITABLE_JUMP		0x1000
+/* When this flag is passed, the execution of the current function ends and
+   the called function returns to the caller of the current function. The
+   stack usage is reduced before the call, but it is not necessarily reduced
+   to zero. In the latter case the compiler needs to allocate space for some
+   arguments and the return register must be kept as well.
+
+   This feature is highly experimental and not supported on SPARC platform
+   at the moment. */
+#define SLJIT_CALL_RETURN			0x2000
 
 /* Emit a jump instruction. The destination is not set, only the type of the jump.
     type must be between SLJIT_EQUAL and SLJIT_FAST_CALL
@@ -1221,15 +1264,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile
 
 /* Emit a C compiler (ABI) compatible function call.
     type must be SLJIT_CALL or SLJIT_CALL_CDECL
-    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP
+    type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP and SLJIT_CALL_RETURN
     arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
 
    Flags: destroy all flags. */
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 arg_types);
 
 /* Basic arithmetic comparison. In most architectures it is implemented as
-   an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting
-   appropriate flags) followed by a sljit_emit_jump. However some
+   an compare operation followed by a sljit_emit_jump. However some
    architectures (i.e: ARM64 or MIPS) may employ special optimizations here.
    It is suggested to use this comparison form when appropriate.
     type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL
@@ -1271,6 +1313,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
    Direct form: set src to SLJIT_IMM() and srcw to the address
    Indirect form: any other valid addressing mode
     type must be SLJIT_CALL or SLJIT_CALL_CDECL
+    type can be combined (or'ed) with SLJIT_CALL_RETURN
     arg_types is the combination of SLJIT_RET / SLJIT_ARGx (SLJIT_DEF_RET / SLJIT_DEF_ARGx) macros
 
    Flags: destroy all flags. */
@@ -1298,7 +1341,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 
    type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64
    dst_reg must be a valid register and it can be combined
-      with SLJIT_I32_OP to perform a 32 bit arithmetic operation
+      with SLJIT_32 to perform a 32 bit arithmetic operation
    src must be register or immediate (SLJIT_IMM)
 
    Flags: - (does not modify flags) */
@@ -1454,26 +1497,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st
 
 #if !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
 
-/* Get the entry address of a given function. */
-#define SLJIT_FUNC_OFFSET(func_name)	((sljit_sw)func_name)
+/* Get the entry address of a given function (signed, unsigned result). */
+#define SLJIT_FUNC_ADDR(func_name)	((sljit_sw)func_name)
+#define SLJIT_FUNC_UADDR(func_name)	((sljit_uw)func_name)
 
 #else /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
 
 /* All JIT related code should be placed in the same context (library, binary, etc.). */
 
-#define SLJIT_FUNC_OFFSET(func_name)	(*(sljit_sw*)(void*)func_name)
+/* Get the entry address of a given function (signed, unsigned result). */
+#define SLJIT_FUNC_ADDR(func_name)	(*(sljit_sw*)(void*)func_name)
+#define SLJIT_FUNC_UADDR(func_name)	(*(sljit_uw*)(void*)func_name)
 
 /* For powerpc64, the function pointers point to a context descriptor. */
 struct sljit_function_context {
-	sljit_sw addr;
-	sljit_sw r2;
-	sljit_sw r11;
+	sljit_uw addr;
+	sljit_uw r2;
+	sljit_uw r11;
 };
 
 /* Fill the context arguments using the addr and the function.
    If func_ptr is NULL, it will not be set to the address of context
    If addr is NULL, the function address also comes from the func pointer. */
-SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func);
+SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_uw addr, void* func);
 
 #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */
 
@@ -1516,17 +1562,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
    Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
-	void *instruction, sljit_s32 size);
+	void *instruction, sljit_u32 size);
 
 /* Flags were set by a 32 bit operation. */
-#define SLJIT_CURRENT_FLAGS_I32_OP		SLJIT_I32_OP
+#define SLJIT_CURRENT_FLAGS_32			SLJIT_32
 
-/* Flags were set by an ADD, ADDC, SUB, SUBC, or NEG operation. */
-#define SLJIT_CURRENT_FLAGS_ADD_SUB		0x01
+/* Flags were set by an ADD or ADDC operations. */
+#define SLJIT_CURRENT_FLAGS_ADD			0x01
+/* Flags were set by a SUB, SUBC, or NEG operation. */
+#define SLJIT_CURRENT_FLAGS_SUB			0x02
 
-/* Flags were set by a SUB with unused destination.
-   Must be combined with SLJIT_CURRENT_FLAGS_ADD_SUB. */
-#define SLJIT_CURRENT_FLAGS_COMPARE		0x02
+/* Flags were set by sljit_emit_op2u with SLJIT_SUB opcode.
+   Must be combined with SLJIT_CURRENT_FLAGS_SUB. */
+#define SLJIT_CURRENT_FLAGS_COMPARE		0x04
 
 /* Define the currently available CPU status flags. It is usually used after
    an sljit_emit_label or sljit_emit_op_custom operations to define which CPU

文件差异内容过多而无法显示
+ 381 - 155
thirdparty/pcre2/src/sljit/sljitNativeARM_32.c


文件差异内容过多而无法显示
+ 309 - 219
thirdparty/pcre2/src/sljit/sljitNativeARM_64.c


文件差异内容过多而无法显示
+ 398 - 188
thirdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c


+ 133 - 146
thirdparty/pcre2/src/sljit/sljitNativeMIPS_32.c

@@ -73,50 +73,49 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 
 	switch (GET_OPCODE(op)) {
 	case SLJIT_MOV:
-	case SLJIT_MOV_U32:
-	case SLJIT_MOV_S32:
-	case SLJIT_MOV_P:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if (dst != src2)
 			return push_inst(compiler, ADDU | S(src2) | TA(0) | D(dst), DR(dst));
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
 	case SLJIT_MOV_S8:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			if (op == SLJIT_MOV_S8) {
 #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
-				return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+			return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
 #else /* SLJIT_MIPS_REV < 1 */
-				FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
-				return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
+			FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+			return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(24), DR(dst));
 #endif /* SLJIT_MIPS_REV >= 1 */
-			}
-			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
-		}
-		else {
-			SLJIT_ASSERT(dst == src2);
 		}
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
 	case SLJIT_MOV_S16:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			if (op == SLJIT_MOV_S16) {
 #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
-				return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+			return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
 #else /* SLJIT_MIPS_REV < 1 */
-				FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
-				return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
+			FAIL_IF(push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+			return push_inst(compiler, SRA | T(dst) | D(dst) | SH_IMM(16), DR(dst));
 #endif /* SLJIT_MIPS_REV >= 1 */
-			}
-			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
-		}
-		else {
-			SLJIT_ASSERT(dst == src2);
 		}
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_NOT:
@@ -438,131 +437,120 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-	sljit_set_jump_addr(addr, new_constant, executable_offset);
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
 }
 
-static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
+static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr, sljit_u32 *extra_space)
 {
-	sljit_s32 stack_offset = 0;
-	sljit_s32 arg_count = 0;
+	sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
+	sljit_u32 offset = 0;
 	sljit_s32 float_arg_count = 0;
 	sljit_s32 word_arg_count = 0;
 	sljit_s32 types = 0;
-	sljit_s32 arg_count_save, types_save;
 	sljit_ins prev_ins = NOP;
 	sljit_ins ins = NOP;
 	sljit_u8 offsets[4];
+	sljit_u8 *offsets_ptr = offsets;
 
 	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
 
-	arg_types >>= SLJIT_DEF_SHIFT;
+	arg_types >>= SLJIT_ARG_SHIFT;
+
+	/* See ABI description in sljit_emit_enter. */
 
 	while (arg_types) {
-		types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
+		*offsets_ptr = (sljit_u8)offset;
 
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			offsets[arg_count] = (sljit_u8)stack_offset;
+		switch (arg_types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (offset & 0x7) {
+				offset += sizeof(sljit_sw);
+				*offsets_ptr = (sljit_u8)offset;
+			}
 
-			if (word_arg_count == 0 && arg_count <= 1)
-				offsets[arg_count] = 254 + arg_count;
+			if (word_arg_count == 0 && float_arg_count <= 1)
+				*offsets_ptr = (sljit_u8)(254 + float_arg_count);
 
-			stack_offset += sizeof(sljit_f32);
-			arg_count++;
+			offset += sizeof(sljit_f64);
 			float_arg_count++;
 			break;
-		case SLJIT_ARG_TYPE_F64:
-			if (stack_offset & 0x7)
-				stack_offset += sizeof(sljit_sw);
-			offsets[arg_count] = (sljit_u8)stack_offset;
-
-			if (word_arg_count == 0 && arg_count <= 1)
-				offsets[arg_count] = 254 + arg_count;
+		case SLJIT_ARG_TYPE_F32:
+			if (word_arg_count == 0 && float_arg_count <= 1)
+				*offsets_ptr = (sljit_u8)(254 + float_arg_count);
 
-			stack_offset += sizeof(sljit_f64);
-			arg_count++;
+			offset += sizeof(sljit_f32);
 			float_arg_count++;
 			break;
 		default:
-			offsets[arg_count] = (sljit_u8)stack_offset;
-			stack_offset += sizeof(sljit_sw);
-			arg_count++;
+			offset += sizeof(sljit_sw);
 			word_arg_count++;
 			break;
 		}
 
-		arg_types >>= SLJIT_DEF_SHIFT;
+		arg_types >>= SLJIT_ARG_SHIFT;
+		offsets_ptr++;
 	}
 
-	/* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
-	if (stack_offset > 16)
-		FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP)));
+	/* Stack is aligned to 16 bytes. */
+	SLJIT_ASSERT(offset <= 8 * sizeof(sljit_sw));
 
-	types_save = types;
-	arg_count_save = arg_count;
+	if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
+		if (is_tail_call) {
+			offset = (offset + sizeof(sljit_sw) + 15) & ~(sljit_uw)0xf;
+			FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset, &prev_ins));
+			*extra_space = offset;
+		} else {
+			FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-16), DR(SLJIT_SP)));
+			*extra_space = 16;
+		}
+	} else {
+		if (is_tail_call)
+			FAIL_IF(emit_stack_frame_release(compiler, 0, &prev_ins));
+		*extra_space = 0;
+	}
 
 	while (types) {
-		switch (types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			arg_count--;
-			if (offsets[arg_count] < 254)
-				ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
-			float_arg_count--;
-			break;
-		case SLJIT_ARG_TYPE_F64:
-			arg_count--;
-			if (offsets[arg_count] < 254)
-				ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(offsets[arg_count]);
-			float_arg_count--;
-			break;
-		default:
-			if (offsets[arg_count - 1] >= 16)
-				ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(offsets[arg_count - 1]);
-			else if (arg_count != word_arg_count)
-				ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
-			else if (arg_count == 1)
-				ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
+		--offsets_ptr;
 
-			arg_count--;
-			word_arg_count--;
-			break;
-		}
-
-		if (ins != NOP) {
-			if (prev_ins != NOP)
-				FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
-			prev_ins = ins;
-			ins = NOP;
-		}
+		switch (types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (*offsets_ptr < 4 * sizeof (sljit_sw)) {
+				if (prev_ins != NOP)
+					FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
 
-		types >>= SLJIT_DEF_SHIFT;
-	}
+				/* Must be preceded by at least one other argument,
+				 * and its starting offset must be 8 because of alignment. */
+				SLJIT_ASSERT((*offsets_ptr >> 2) == 2);
 
-	types = types_save;
-	arg_count = arg_count_save;
+				prev_ins = MFC1 | TA(6) | FS(float_arg_count) | (1 << 11);
+				ins = MFC1 | TA(7) | FS(float_arg_count);
+			} else if (*offsets_ptr < 254)
+				ins = SDC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
+			else if (*offsets_ptr == 254)
+				ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
 
-	while (types) {
-		switch (types & SLJIT_DEF_MASK) {
+			float_arg_count--;
+			break;
 		case SLJIT_ARG_TYPE_F32:
-			arg_count--;
-			if (offsets[arg_count] == 254)
+			if (*offsets_ptr < 4 * sizeof (sljit_sw))
+				ins = MFC1 | TA(4 + (*offsets_ptr >> 2)) | FS(float_arg_count);
+			else if (*offsets_ptr < 254)
+				ins = SWC1 | S(SLJIT_SP) | FT(float_arg_count) | IMM(*offsets_ptr);
+			else if (*offsets_ptr == 254)
 				ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
-			else if (offsets[arg_count] < 16)
-				ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
-			break;
-		case SLJIT_ARG_TYPE_F64:
-			arg_count--;
-			if (offsets[arg_count] == 254)
-				ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
-			else if (offsets[arg_count] < 16) {
-				if (prev_ins != NOP)
-					FAIL_IF(push_inst(compiler, prev_ins, MOVABLE_INS));
-				prev_ins = LW | S(SLJIT_SP) | TA(4 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count]);
-				ins = LW | S(SLJIT_SP) | TA(5 + (offsets[arg_count] >> 2)) | IMM(offsets[arg_count] + sizeof(sljit_sw));
-			}
+
+			float_arg_count--;
 			break;
 		default:
-			arg_count--;
+			if (*offsets_ptr >= 4 * sizeof (sljit_sw))
+				ins = SW | S(SLJIT_SP) | T(word_arg_count) | IMM(*offsets_ptr);
+			else if ((*offsets_ptr >> 2) != word_arg_count - 1)
+				ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (*offsets_ptr >> 2));
+			else if (*offsets_ptr == 0)
+				ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
+
+			word_arg_count--;
 			break;
 		}
 
@@ -573,7 +561,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			ins = NOP;
 		}
 
-		types >>= SLJIT_DEF_SHIFT;
+		types >>= SLJIT_ARG_SHIFT;
 	}
 
 	*ins_ptr = prev_ins;
@@ -581,41 +569,11 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	return SLJIT_SUCCESS;
 }
 
-static sljit_s32 post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
-{
-	sljit_s32 stack_offset = 0;
-
-	arg_types >>= SLJIT_DEF_SHIFT;
-
-	while (arg_types) {
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			stack_offset += sizeof(sljit_f32);
-			break;
-		case SLJIT_ARG_TYPE_F64:
-			if (stack_offset & 0x7)
-				stack_offset += sizeof(sljit_sw);
-			stack_offset += sizeof(sljit_f64);
-			break;
-		default:
-			stack_offset += sizeof(sljit_sw);
-			break;
-		}
-
-		arg_types >>= SLJIT_DEF_SHIFT;
-	}
-
-	/* Stack is aligned to 16 bytes, max two doubles can be placed on the stack. */
-	if (stack_offset > 16)
-		return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(16), DR(SLJIT_SP));
-
-	return SLJIT_SUCCESS;
-}
-
 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
 	sljit_s32 arg_types)
 {
 	struct sljit_jump *jump;
+	sljit_u32 extra_space = (sljit_u32)type;
 	sljit_ins ins;
 
 	CHECK_ERROR_PTR();
@@ -624,21 +582,34 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 	PTR_FAIL_IF(!jump);
 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
-	type &= 0xff;
 
-	PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
+	PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
 
 	SLJIT_ASSERT(DR(PIC_ADDR_REG) == 25 && PIC_ADDR_REG == TMP_REG2);
 
 	PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
 
-	jump->flags |= IS_JAL | IS_CALL;
-	PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	if (!(type & SLJIT_CALL_RETURN) || extra_space > 0) {
+		jump->flags |= IS_JAL | IS_CALL;
+		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	} else
+		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
+
 	jump->addr = compiler->size;
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 
-	PTR_FAIL_IF(post_call_with_args(compiler, arg_types));
+	if (extra_space == 0)
+		return jump;
+
+	if (type & SLJIT_CALL_RETURN)
+		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG,
+			SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw))));
+
+	if (type & SLJIT_CALL_RETURN)
+		PTR_FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
 
+	PTR_FAIL_IF(push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space),
+		(type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP)));
 	return jump;
 }
 
@@ -646,6 +617,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	sljit_s32 arg_types,
 	sljit_s32 src, sljit_sw srcw)
 {
+	sljit_u32 extra_space = (sljit_u32)type;
 	sljit_ins ins;
 
 	CHECK_ERROR();
@@ -662,10 +634,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
 	}
 
-	FAIL_IF(call_with_args(compiler, arg_types, &ins));
+	FAIL_IF(call_with_args(compiler, arg_types, &ins, &extra_space));
 
 	/* Register input. */
-	FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	if (!(type & SLJIT_CALL_RETURN) || extra_space > 0)
+		FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	else
+		FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 	FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
-	return post_call_with_args(compiler, arg_types);
+
+	if (extra_space == 0)
+		return SLJIT_SUCCESS;
+
+	if (type & SLJIT_CALL_RETURN)
+		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG,
+			SLJIT_MEM1(SLJIT_SP), (sljit_sw)(extra_space - sizeof(sljit_sw))));
+
+	if (type & SLJIT_CALL_RETURN)
+		FAIL_IF(push_inst(compiler, JR | SA(RETURN_ADDR_REG), UNMOVABLE_INS));
+
+	return push_inst(compiler, ADDIU | S(SLJIT_SP) | T(SLJIT_SP) | IMM(extra_space),
+		(type & SLJIT_CALL_RETURN) ? UNMOVABLE_INS : DR(SLJIT_SP));
 }

+ 88 - 55
thirdparty/pcre2/src/sljit/sljitNativeMIPS_64.c

@@ -46,9 +46,9 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
 	}
 
 	/* Zero extended number. */
-	uimm = imm;
+	uimm = (sljit_uw)imm;
 	if (imm < 0) {
-		uimm = ~imm;
+		uimm = ~(sljit_uw)imm;
 		inv = 1;
 	}
 
@@ -119,7 +119,7 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
 }
 
 #define SELECT_OP(a, b) \
-	(!(op & SLJIT_I32_OP) ? a : b)
+	(!(op & SLJIT_32) ? a : b)
 
 #define EMIT_LOGICAL(op_imm, op_norm) \
 	if (flags & SRC2_IMM) { \
@@ -138,19 +138,19 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a
 #define EMIT_SHIFT(op_dimm, op_dimm32, op_imm, op_dv, op_v) \
 	if (flags & SRC2_IMM) { \
 		if (src2 >= 32) { \
-			SLJIT_ASSERT(!(op & SLJIT_I32_OP)); \
+			SLJIT_ASSERT(!(op & SLJIT_32)); \
 			ins = op_dimm32; \
 			src2 -= 32; \
 		} \
 		else \
-			ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \
+			ins = (op & SLJIT_32) ? op_imm : op_dimm; \
 		if (op & SLJIT_SET_Z) \
 			FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \
 		if (!(flags & UNUSED_DEST)) \
 			FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \
 	} \
 	else { \
-		ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \
+		ins = (op & SLJIT_32) ? op_v : op_dv; \
 		if (op & SLJIT_SET_Z) \
 			FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \
 		if (!(flags & UNUSED_DEST)) \
@@ -165,50 +165,71 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 
 	switch (GET_OPCODE(op)) {
 	case SLJIT_MOV:
-	case SLJIT_MOV_P:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if (dst != src2)
 			return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(dst), DR(dst));
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U8:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
 	case SLJIT_MOV_S8:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			if (op == SLJIT_MOV_S8) {
-				FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
-				return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
-			}
-			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst));
-		}
-		else {
-			SLJIT_ASSERT(dst == src2);
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			if (op & SLJIT_32)
+				return push_inst(compiler, SEB | T(src2) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(24), DR(dst)));
+			return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(24), DR(dst));
 		}
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U16:
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
+			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
+
 	case SLJIT_MOV_S16:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
-			if (op == SLJIT_MOV_S16) {
-				FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
-				return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
-			}
-			return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst));
-		}
-		else {
-			SLJIT_ASSERT(dst == src2);
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
+			if (op & SLJIT_32)
+				return push_inst(compiler, SEH | T(src2) | D(dst), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 1 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(16), DR(dst)));
+			return push_inst(compiler, DSRA32 | T(dst) | D(dst) | SH_IMM(16), DR(dst));
 		}
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U32:
-		SLJIT_ASSERT(!(op & SLJIT_I32_OP));
-		FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
-		return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+#if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 2)
+			if (dst == src2)
+				return push_inst(compiler, DINSU | T(src2) | SA(0) | (31 << 11) | (0 << 11), DR(dst));
+#endif /* SLJIT_MIPS_REV >= 2 */
+			FAIL_IF(push_inst(compiler, DSLL32 | T(src2) | D(dst) | SH_IMM(0), DR(dst)));
+			return push_inst(compiler, DSRL32 | T(dst) | D(dst) | SH_IMM(0), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_S32:
-		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
+		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && !(op & SLJIT_32));
+		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
+			return push_inst(compiler, SLL | T(src2) | D(dst) | SH_IMM(0), DR(dst));
+		}
+		SLJIT_ASSERT(dst == src2);
+		return SLJIT_SUCCESS;
 
 	case SLJIT_NOT:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
@@ -234,7 +255,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src2) | TA(0) | D(TMP_REG1), DR(TMP_REG1)));
 		/* Check zero. */
 		FAIL_IF(push_inst(compiler, BEQ | S(TMP_REG1) | TA(0) | IMM(5), UNMOVABLE_INS));
-		FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_I32_OP) ? 32 : 64), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, ORI | SA(0) | T(dst) | IMM((op & SLJIT_32) ? 32 : 64), UNMOVABLE_INS));
 		FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | T(dst) | IMM(-1), DR(dst)));
 		/* Loop for searching the highest bit. */
 		FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst)));
@@ -462,7 +483,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 #if (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 6)
 			return push_inst(compiler, SELECT_OP(DMUL, MUL) | S(src1) | T(src2) | D(dst), DR(dst));
 #elif (defined SLJIT_MIPS_REV && SLJIT_MIPS_REV >= 1)
-			if (op & SLJIT_I32_OP)
+			if (op & SLJIT_32)
 				return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst));
 			FAIL_IF(push_inst(compiler, DMULT | S(src1) | T(src2), MOVABLE_INS));
 			return push_inst(compiler, MFLO | D(dst), DR(dst));
@@ -528,10 +549,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 	SLJIT_UNUSED_ARG(executable_offset);
 
 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0);
-	inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
-	inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
-	inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
-	inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff);
+	inst[0] = (inst[0] & 0xffff0000) | ((sljit_ins)(new_target >> 48) & 0xffff);
+	inst[1] = (inst[1] & 0xffff0000) | ((sljit_ins)(new_target >> 32) & 0xffff);
+	inst[3] = (inst[3] & 0xffff0000) | ((sljit_ins)(new_target >> 16) & 0xffff);
+	inst[5] = (inst[5] & 0xffff0000) | ((sljit_ins)new_target & 0xffff);
 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	SLJIT_CACHE_FLUSH(inst, inst + 6);
@@ -539,7 +560,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-	sljit_set_jump_addr(addr, new_constant, executable_offset);
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
 }
 
 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_ins *ins_ptr)
@@ -548,19 +569,19 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_s32 word_arg_count = 0;
 	sljit_s32 float_arg_count = 0;
 	sljit_s32 types = 0;
-	sljit_ins prev_ins = NOP;
+	sljit_ins prev_ins = *ins_ptr;
 	sljit_ins ins = NOP;
 
 	SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
 
-	arg_types >>= SLJIT_DEF_SHIFT;
+	arg_types >>= SLJIT_ARG_SHIFT;
 
 	while (arg_types) {
-		types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
 
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
+		switch (arg_types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
+		case SLJIT_ARG_TYPE_F32:
 			arg_count++;
 			float_arg_count++;
 			break;
@@ -570,24 +591,24 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			break;
 		}
 
-		arg_types >>= SLJIT_DEF_SHIFT;
+		arg_types >>= SLJIT_ARG_SHIFT;
 	}
 
 	while (types) {
-		switch (types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
+		switch (types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
 			if (arg_count != float_arg_count)
-				ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
+				ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
 			else if (arg_count == 1)
-				ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
+				ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
 			arg_count--;
 			float_arg_count--;
 			break;
-		case SLJIT_ARG_TYPE_F64:
+		case SLJIT_ARG_TYPE_F32:
 			if (arg_count != float_arg_count)
-				ins = MOV_S | FMT_D | FS(float_arg_count) | FD(arg_count);
+				ins = MOV_S | FMT_S | FS(float_arg_count) | FD(arg_count);
 			else if (arg_count == 1)
-				ins = MOV_S | FMT_D | FS(SLJIT_FR0) | FD(TMP_FREG1);
+				ins = MOV_S | FMT_S | FS(SLJIT_FR0) | FD(TMP_FREG1);
 			arg_count--;
 			float_arg_count--;
 			break;
@@ -608,7 +629,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			ins = NOP;
 		}
 
-		types >>= SLJIT_DEF_SHIFT;
+		types >>= SLJIT_ARG_SHIFT;
 	}
 
 	*ins_ptr = prev_ins;
@@ -620,7 +641,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	sljit_s32 arg_types)
 {
 	struct sljit_jump *jump;
-	sljit_ins ins;
+	sljit_ins ins = NOP;
 
 	CHECK_ERROR_PTR();
 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
@@ -628,7 +649,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 	PTR_FAIL_IF(!jump);
 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
-	type &= 0xff;
+
+	if (type & SLJIT_CALL_RETURN)
+		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
 
 	PTR_FAIL_IF(call_with_args(compiler, arg_types, &ins));
 
@@ -636,8 +659,12 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
 
 	PTR_FAIL_IF(emit_const(compiler, PIC_ADDR_REG, 0));
 
-	jump->flags |= IS_JAL | IS_CALL;
-	PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	if (!(type & SLJIT_CALL_RETURN)) {
+		jump->flags |= IS_JAL | IS_CALL;
+		PTR_FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	} else
+		PTR_FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
+
 	jump->addr = compiler->size;
 	PTR_FAIL_IF(push_inst(compiler, ins, UNMOVABLE_INS));
 
@@ -648,7 +675,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 	sljit_s32 arg_types,
 	sljit_s32 src, sljit_sw srcw)
 {
-	sljit_ins ins;
+	sljit_ins ins = NOP;
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -664,9 +691,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, DR(PIC_ADDR_REG), src, srcw));
 	}
 
+	if (type & SLJIT_CALL_RETURN)
+		FAIL_IF(emit_stack_frame_release(compiler, 0, &ins));
+
 	FAIL_IF(call_with_args(compiler, arg_types, &ins));
 
 	/* Register input. */
-	FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	if (!(type & SLJIT_CALL_RETURN))
+		FAIL_IF(push_inst(compiler, JALR | S(PIC_ADDR_REG) | DA(RETURN_ADDR_REG), UNMOVABLE_INS));
+	else
+		FAIL_IF(push_inst(compiler, JR | S(PIC_ADDR_REG), UNMOVABLE_INS));
 	return push_inst(compiler, ins, UNMOVABLE_INS);
 }

文件差异内容过多而无法显示
+ 379 - 175
thirdparty/pcre2/src/sljit/sljitNativeMIPS_common.c


+ 12 - 9
thirdparty/pcre2/src/sljit/sljitNativePPC_32.c

@@ -86,11 +86,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
 
-	case SLJIT_NEG:
-		SLJIT_ASSERT(src1 == TMP_REG1);
-		/* Setting XER SO is not enough, CR SO is also needed. */
-		return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
-
 	case SLJIT_CLZ:
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		return push_inst(compiler, CNTLZW | S(src2) | A(dst));
@@ -158,7 +153,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 
 		if (flags & ALT_FORM3) {
 			/* Setting XER SO is not enough, CR SO is also needed. */
-			return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+			if (src1 != TMP_ZERO)
+				return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+			return push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
 		}
 
 		if (flags & ALT_FORM4) {
@@ -167,11 +164,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
 		}
 
-		if (!(flags & ALT_SET_FLAGS))
+		if (!(flags & ALT_SET_FLAGS)) {
+			SLJIT_ASSERT(src1 != TMP_ZERO);
 			return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+		}
+
 		if (flags & ALT_FORM5)
 			return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
-		return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
+
+		if (src1 != TMP_ZERO)
+			return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+		return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
 
 	case SLJIT_SUBC:
 		return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1));
@@ -277,5 +280,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-	sljit_set_jump_addr(addr, new_constant, executable_offset);
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
 }

+ 46 - 44
thirdparty/pcre2/src/sljit/sljitNativePPC_64.c

@@ -57,20 +57,20 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 	}
 
 	/* Count leading zeroes. */
-	tmp = (imm >= 0) ? imm : ~imm;
+	tmp = (sljit_uw)((imm >= 0) ? imm : ~imm);
 	ASM_SLJIT_CLZ(tmp, shift);
 	SLJIT_ASSERT(shift > 0);
 	shift--;
-	tmp = (imm << shift);
+	tmp = ((sljit_uw)imm << shift);
 
 	if ((tmp & ~0xffff000000000000ul) == 0) {
-		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		shift += 15;
 		return PUSH_RLDICR(reg, shift);
 	}
 
 	if ((tmp & ~0xffffffff00000000ul) == 0) {
-		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(tmp >> 48)));
+		FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp >> 32)));
 		shift += 31;
 		return PUSH_RLDICR(reg, shift);
@@ -78,18 +78,18 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 
 	/* Cut out the 16 bit from immediate. */
 	shift += 15;
-	tmp2 = imm & ((1ul << (63 - shift)) - 1);
+	tmp2 = (sljit_uw)imm & (((sljit_uw)1 << (63 - shift)) - 1);
 
 	if (tmp2 <= 0xffff) {
-		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		FAIL_IF(PUSH_RLDICR(reg, shift));
-		return push_inst(compiler, ORI | S(reg) | A(reg) | tmp2);
+		return push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)tmp2);
 	}
 
 	if (tmp2 <= 0xffffffff) {
 		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
 		FAIL_IF(PUSH_RLDICR(reg, shift));
-		FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (tmp2 >> 16)));
+		FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 16)));
 		return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(tmp2)) : SLJIT_SUCCESS;
 	}
 
@@ -97,16 +97,16 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg,
 	tmp2 <<= shift2;
 
 	if ((tmp2 & ~0xffff000000000000ul) == 0) {
-		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | IMM(tmp >> 48)));
+		FAIL_IF(push_inst(compiler, ADDI | D(reg) | A(0) | (sljit_ins)(tmp >> 48)));
 		shift2 += 15;
 		shift += (63 - shift2);
 		FAIL_IF(PUSH_RLDICR(reg, shift));
-		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (tmp2 >> 48)));
+		FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | (sljit_ins)(tmp2 >> 48)));
 		return PUSH_RLDICR(reg, shift2);
 	}
 
 	/* The general version. */
-	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 48)));
+	FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | (sljit_ins)((sljit_uw)imm >> 48)));
 	FAIL_IF(push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm >> 32)));
 	FAIL_IF(PUSH_RLDICR(reg, 31));
 	FAIL_IF(push_inst(compiler, ORIS | S(reg) | A(reg) | IMM(imm >> 16)));
@@ -199,19 +199,6 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		UN_EXTS();
 		return push_inst(compiler, NOR | RC(flags) | S(src2) | A(dst) | B(src2));
 
-	case SLJIT_NEG:
-		SLJIT_ASSERT(src1 == TMP_REG1);
-
-		if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) {
-			FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
-			FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2)));
-			return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
-		}
-
-		UN_EXTS();
-		/* Setting XER SO is not enough, CR SO is also needed. */
-		return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2));
-
 	case SLJIT_CLZ:
 		SLJIT_ASSERT(src1 == TMP_REG1);
 		if (flags & ALT_FORM1)
@@ -299,13 +286,22 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 
 		if (flags & ALT_FORM3) {
 			if (flags & ALT_SIGN_EXT) {
-				FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
-				src1 = TMP_REG1;
-				FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
-				src2 = TMP_REG2;
+				if (src1 != TMP_ZERO) {
+					FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1)));
+					src1 = TMP_REG1;
+				}
+				if (src2 != TMP_ZERO) {
+					FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1)));
+					src2 = TMP_REG2;
+				}
 			}
+
 			/* Setting XER SO is not enough, CR SO is also needed. */
-			FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)));
+			if (src1 != TMP_ZERO)
+				FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)));
+			else
+				FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2)));
+
 			if (flags & ALT_SIGN_EXT)
 				return push_inst(compiler, RLDI(dst, dst, 32, 32, 0));
 			return SLJIT_SUCCESS;
@@ -317,12 +313,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm);
 		}
 
-		if (!(flags & ALT_SET_FLAGS))
+		if (!(flags & ALT_SET_FLAGS)) {
+			SLJIT_ASSERT(src1 != TMP_ZERO);
 			return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1));
+		}
+
 		BIN_EXTS();
 		if (flags & ALT_FORM5)
 			return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
-		return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1));
+
+		if (src1 != TMP_ZERO)
+			return push_inst(compiler, SUBF | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1));
+		return push_inst(compiler, NEG | RC(ALT_SET_FLAGS) | D(dst) | A(src2));
 
 	case SLJIT_SUBC:
 		BIN_EXTS();
@@ -432,14 +434,14 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	if (src)
 		reg = *src & REG_MASK;
 
-	arg_types >>= SLJIT_DEF_SHIFT;
+	arg_types >>= SLJIT_ARG_SHIFT;
 
 	while (arg_types) {
-		types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
 
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
+		switch (arg_types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
+		case SLJIT_ARG_TYPE_F32:
 			arg_count++;
 			break;
 		default:
@@ -453,13 +455,13 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			break;
 		}
 
-		arg_types >>= SLJIT_DEF_SHIFT;
+		arg_types >>= SLJIT_ARG_SHIFT;
 	}
 
 	while (types) {
-		switch (types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
+		switch (types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
+		case SLJIT_ARG_TYPE_F32:
 			arg_count--;
 			break;
 		default:
@@ -471,7 +473,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			break;
 		}
 
-		types >>= SLJIT_DEF_SHIFT;
+		types >>= SLJIT_ARG_SHIFT;
 	}
 
 	return SLJIT_SUCCESS;
@@ -492,10 +494,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 	SLJIT_UNUSED_ARG(executable_offset);
 
 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 0);
-	inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff);
-	inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff);
-	inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff);
-	inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff);
+	inst[0] = (inst[0] & 0xffff0000u) | ((sljit_ins)(new_target >> 48) & 0xffff);
+	inst[1] = (inst[1] & 0xffff0000u) | ((sljit_ins)(new_target >> 32) & 0xffff);
+	inst[3] = (inst[3] & 0xffff0000u) | ((sljit_ins)(new_target >> 16) & 0xffff);
+	inst[4] = (inst[4] & 0xffff0000u) | ((sljit_ins)new_target & 0xffff);
 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 5, 1);
 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
 	SLJIT_CACHE_FLUSH(inst, inst + 5);
@@ -503,5 +505,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-	sljit_set_jump_addr(addr, new_constant, executable_offset);
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
 }

文件差异内容过多而无法显示
+ 286 - 191
thirdparty/pcre2/src/sljit/sljitNativePPC_common.c


文件差异内容过多而无法显示
+ 262 - 171
thirdparty/pcre2/src/sljit/sljitNativeS390X.c


+ 52 - 55
thirdparty/pcre2/src/sljit/sljitNativeSPARC_32.c

@@ -35,16 +35,13 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
 
 #define ARG2(flags, src2) ((flags & SRC2_IMM) ? IMM(src2) : S2(src2))
 
-static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
+static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags,
 	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
 {
 	SLJIT_COMPILE_ASSERT(ICC_IS_SET == SET_FLAGS, icc_is_set_and_set_flags_must_be_the_same);
 
 	switch (op) {
 	case SLJIT_MOV:
-	case SLJIT_MOV_U32:
-	case SLJIT_MOV_S32:
-	case SLJIT_MOV_P:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
 		if (dst != src2)
 			return push_inst(compiler, OR | D(dst) | S1(0) | S2(src2), DR(dst));
@@ -59,8 +56,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(24), DR(dst)));
 			return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst));
 		}
-		else if (dst != src2)
-			SLJIT_UNREACHABLE();
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_MOV_U16:
@@ -70,13 +66,12 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 			FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src2) | IMM(16), DR(dst)));
 			return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst));
 		}
-		else if (dst != src2)
-			SLJIT_UNREACHABLE();
+		SLJIT_ASSERT(dst == src2);
 		return SLJIT_SUCCESS;
 
 	case SLJIT_NOT:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
-		return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DR(dst) | (flags & SET_FLAGS));
+		return push_inst(compiler, XNOR | (flags & SET_FLAGS) | D(dst) | S1(0) | S2(src2), DRF(dst, flags));
 
 	case SLJIT_CLZ:
 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
@@ -89,22 +84,24 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		/* Loop. */
 		FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS));
 		FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1)));
-		FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS));
+		FAIL_IF(push_inst(compiler, BICC | DA(0xe) | ((sljit_ins)-2 & DISP_MASK), UNMOVABLE_INS));
 		return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS);
 
 	case SLJIT_ADD:
-		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
-		return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
+		return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_ADDC:
-		return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
+		return push_inst(compiler, ADDC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_SUB:
-		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD_SUB;
-		return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
+		return push_inst(compiler, SUB | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_SUBC:
-		return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
+		return push_inst(compiler, SUBC | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_MUL:
 		compiler->status_flags_state = 0;
@@ -116,13 +113,13 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl
 		return push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(TMP_LINK), MOVABLE_INS | SET_FLAGS);
 
 	case SLJIT_AND:
-		return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		return push_inst(compiler, AND | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_OR:
-		return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		return push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_XOR:
-		return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS));
+		return push_inst(compiler, XOR | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DRF(dst, flags));
 
 	case SLJIT_SHL:
 		FAIL_IF(push_inst(compiler, SLL | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst)));
@@ -147,7 +144,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	sljit_s32 word_reg_index = 8;
 	sljit_s32 float_arg_index = 1;
 	sljit_s32 double_arg_count = 0;
-	sljit_s32 float_offset = (16 + 6) * sizeof(sljit_sw);
+	sljit_u32 float_offset = (16 + 6) * sizeof(sljit_sw);
 	sljit_s32 types = 0;
 	sljit_s32 reg = 0;
 	sljit_s32 move_to_tmp2 = 0;
@@ -155,18 +152,12 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 	if (src)
 		reg = reg_map[*src & REG_MASK];
 
-	arg_types >>= SLJIT_DEF_SHIFT;
+	arg_types >>= SLJIT_ARG_SHIFT;
 
 	while (arg_types) {
-		types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
+		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
 
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			float_arg_index++;
-			if (reg_index == reg)
-				move_to_tmp2 = 1;
-			reg_index++;
-			break;
+		switch (arg_types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
 			float_arg_index++;
 			double_arg_count++;
@@ -174,36 +165,37 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 				move_to_tmp2 = 1;
 			reg_index += 2;
 			break;
+		case SLJIT_ARG_TYPE_F32:
+			float_arg_index++;
+			if (reg_index == reg)
+				move_to_tmp2 = 1;
+			reg_index++;
+			break;
 		default:
-			if (reg_index != word_reg_index && reg_index < 14 && reg_index == reg)
+			if (reg_index != word_reg_index && reg_index == reg)
 				move_to_tmp2 = 1;
 			reg_index++;
 			word_reg_index++;
 			break;
 		}
 
-		if (move_to_tmp2) {
-			move_to_tmp2 = 0;
-			if (reg < 14)
-				FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1)));
-			*src = TMP_REG1;
-		}
+		arg_types >>= SLJIT_ARG_SHIFT;
+	}
 
-		arg_types >>= SLJIT_DEF_SHIFT;
+	if (move_to_tmp2) {
+		if (reg < 14)
+			FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2A(reg), DR(TMP_REG1)));
+		*src = TMP_REG1;
 	}
 
 	arg_types = types;
 
 	while (arg_types) {
-		switch (arg_types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			float_arg_index--;
-			FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
-			float_offset -= sizeof(sljit_f64);
-			break;
+		switch (arg_types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
 			float_arg_index--;
 			if (float_arg_index == 4 && double_arg_count == 4) {
+				/* The address is not doubleword aligned, so two instructions are required to store the double. */
 				FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM((16 + 7) * sizeof(sljit_sw)), MOVABLE_INS));
 				FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | (1 << 25) | S1(SLJIT_SP) | IMM((16 + 8) * sizeof(sljit_sw)), MOVABLE_INS));
 			}
@@ -211,36 +203,41 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 				FAIL_IF(push_inst(compiler, STDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
 			float_offset -= sizeof(sljit_f64);
 			break;
+		case SLJIT_ARG_TYPE_F32:
+			float_arg_index--;
+			FAIL_IF(push_inst(compiler, STF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			float_offset -= sizeof(sljit_f64);
+			break;
 		default:
 			break;
 		}
 
-		arg_types >>= SLJIT_DEF_SHIFT;
+		arg_types >>= SLJIT_ARG_SHIFT;
 	}
 
 	float_offset = (16 + 6) * sizeof(sljit_sw);
 
 	while (types) {
-		switch (types & SLJIT_DEF_MASK) {
-		case SLJIT_ARG_TYPE_F32:
-			reg_index--;
-			if (reg_index < 14)
-				FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
-			float_offset -= sizeof(sljit_f64);
-			break;
+		switch (types & SLJIT_ARG_MASK) {
 		case SLJIT_ARG_TYPE_F64:
 			reg_index -= 2;
 			if (reg_index < 14) {
 				if ((reg_index & 0x1) != 0) {
 					FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
-					if (reg_index < 13)
+					if (reg_index < 8 + 6 - 1)
 						FAIL_IF(push_inst(compiler, LDUW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), reg_index + 1));
 				}
-				else 
+				else
 					FAIL_IF(push_inst(compiler, LDD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
 			}
 			float_offset -= sizeof(sljit_f64);
 			break;
+		case SLJIT_ARG_TYPE_F32:
+			reg_index--;
+			if (reg_index < 8 + 6)
+				FAIL_IF(push_inst(compiler, LDUW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), reg_index));
+			float_offset -= sizeof(sljit_f64);
+			break;
 		default:
 			reg_index--;
 			word_reg_index--;
@@ -254,7 +251,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
 			break;
 		}
 
-		types >>= SLJIT_DEF_SHIFT;
+		types >>= SLJIT_ARG_SHIFT;
 	}
 
 	return SLJIT_SUCCESS;
@@ -282,5 +279,5 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_ta
 
 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
 {
-	sljit_set_jump_addr(addr, new_constant, executable_offset);
+	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
 }

+ 201 - 86
thirdparty/pcre2/src/sljit/sljitNativeSPARC_common.c

@@ -98,36 +98,37 @@ static void sparc_cache_flush(sljit_ins *from, sljit_ins *to)
 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
 
 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
-	0, 8, 9, 10, 11, 29, 28, 27, 23, 22, 21, 20, 19, 18, 17, 16, 26, 25, 24, 14, 1, 12, 13, 15
+	0, 8, 9, 10, 11, 23, 22, 21, 20, 19, 18, 17, 16, 29, 28, 27, 26, 25, 24, 14, 1, 12, 13, 15
 };
 
 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
-	0, 0, 2, 4, 6, 8, 10, 12, 14
+	0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
 };
 
 /* --------------------------------------------------------------------- */
 /*  Instrucion forms                                                     */
 /* --------------------------------------------------------------------- */
 
-#define D(d)		(reg_map[d] << 25)
-#define FD(d)		(freg_map[d] << 25)
-#define FDN(d)		((freg_map[d] | 0x1) << 25)
-#define DA(d)		((d) << 25)
-#define S1(s1)		(reg_map[s1] << 14)
-#define FS1(s1)		(freg_map[s1] << 14)
-#define S1A(s1)		((s1) << 14)
-#define S2(s2)		(reg_map[s2])
-#define FS2(s2)		(freg_map[s2])
-#define FS2N(s2)	(freg_map[s2] | 0x1)
-#define S2A(s2)		(s2)
+#define D(d)		((sljit_ins)reg_map[d] << 25)
+#define FD(d)		((sljit_ins)freg_map[d] << 25)
+#define FDN(d)		(((sljit_ins)freg_map[d] | 0x1) << 25)
+#define DA(d)		((sljit_ins)(d) << 25)
+#define S1(s1)		((sljit_ins)reg_map[s1] << 14)
+#define FS1(s1)		((sljit_ins)freg_map[s1] << 14)
+#define S1A(s1)		((sljit_ins)(s1) << 14)
+#define S2(s2)		((sljit_ins)reg_map[s2])
+#define FS2(s2)		((sljit_ins)freg_map[s2])
+#define FS2N(s2)	((sljit_ins)freg_map[s2] | 0x1)
+#define S2A(s2)		((sljit_ins)(s2))
 #define IMM_ARG		0x2000
-#define DOP(op)		((op) << 5)
-#define IMM(imm)	(((imm) & 0x1fff) | IMM_ARG)
+#define DOP(op)		((sljit_ins)(op) << 5)
+#define IMM(imm)	(((sljit_ins)(imm) & 0x1fff) | IMM_ARG)
 
 #define DR(dr)		(reg_map[dr])
-#define OPC1(opcode)	((opcode) << 30)
-#define OPC2(opcode)	((opcode) << 22)
-#define OPC3(opcode)	((opcode) << 19)
+#define DRF(dr, flags)	((sljit_s32)(reg_map[dr] | ((flags) & SET_FLAGS)))
+#define OPC1(opcode)	((sljit_ins)(opcode) << 30)
+#define OPC2(opcode)	((sljit_ins)(opcode) << 22)
+#define OPC3(opcode)	((sljit_ins)(opcode) << 19)
 #define SET_FLAGS	OPC3(0x10)
 
 #define ADD		(OPC1(0x2) | OPC3(0x00))
@@ -156,6 +157,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define FSUBS		(OPC1(0x2) | OPC3(0x34) | DOP(0x45))
 #define JMPL		(OPC1(0x2) | OPC3(0x38))
 #define LDD		(OPC1(0x3) | OPC3(0x03))
+#define LDDF		(OPC1(0x3) | OPC3(0x23))
+#define LDF		(OPC1(0x3) | OPC3(0x20))
 #define LDUW		(OPC1(0x3) | OPC3(0x00))
 #define NOP		(OPC1(0x0) | OPC2(0x04))
 #define OR		(OPC1(0x2) | OPC3(0x02))
@@ -170,6 +173,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #define SRAX		(OPC1(0x2) | OPC3(0x27) | (1 << 12))
 #define SRL		(OPC1(0x2) | OPC3(0x26))
 #define SRLX		(OPC1(0x2) | OPC3(0x26) | (1 << 12))
+#define STD		(OPC1(0x3) | OPC3(0x07))
 #define STDF		(OPC1(0x3) | OPC3(0x27))
 #define STF		(OPC1(0x3) | OPC3(0x24))
 #define STW		(OPC1(0x3) | OPC3(0x04))
@@ -183,7 +187,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 #define MAX_DISP	(0x1fffff)
 #define MIN_DISP	(-0x200000)
-#define DISP_MASK	(0x3fffff)
+#define DISP_MASK	((sljit_ins)0x3fffff)
 
 #define BICC		(OPC1(0x0) | OPC2(0x2))
 #define FBFCC		(OPC1(0x0) | OPC2(0x6))
@@ -274,7 +278,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i
 		}
 	}
 
-	diff += sizeof(sljit_ins);
+	diff += SSIZE_OF(ins);
 
 	if (diff <= MAX_DISP && diff >= MIN_DISP) {
 		jump->flags |= PATCH_B;
@@ -300,7 +304,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	sljit_uw word_count;
 	sljit_uw next_addr;
 	sljit_sw executable_offset;
-	sljit_uw addr;
+	sljit_sw addr;
 
 	struct sljit_label *label;
 	struct sljit_jump *jump;
@@ -340,7 +344,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 				if (label && label->size == word_count) {
 					/* Just recording the address. */
 					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-					label->size = code_ptr - code;
+					label->size = (sljit_uw)(code_ptr - code);
 					label = label->next;
 				}
 				if (jump && jump->addr == word_count) {
@@ -373,7 +377,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	if (label && label->size == word_count) {
 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
-		label->size = code_ptr - code;
+		label->size = (sljit_uw)(code_ptr - code);
 		label = label->next;
 	}
 
@@ -386,27 +390,27 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 	jump = compiler->jumps;
 	while (jump) {
 		do {
-			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
+			addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
 			buf_ptr = (sljit_ins *)jump->addr;
 
 			if (jump->flags & PATCH_CALL) {
-				addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
-				SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000);
-				buf_ptr[0] = CALL | (addr & 0x3fffffff);
+				addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
+				SLJIT_ASSERT(addr <= 0x1fffffff && addr >= -0x20000000);
+				buf_ptr[0] = CALL | ((sljit_ins)addr & 0x3fffffff);
 				break;
 			}
 			if (jump->flags & PATCH_B) {
-				addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
-				SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP);
-				buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK);
+				addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
+				SLJIT_ASSERT(addr <= MAX_DISP && addr >= MIN_DISP);
+				buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | ((sljit_ins)addr & DISP_MASK);
 				break;
 			}
 
 			/* Set the fields of immediate loads. */
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 			SLJIT_ASSERT(((buf_ptr[0] & 0xc1cfffff) == 0x01000000) && ((buf_ptr[1] & 0xc1f83fff) == 0x80102000));
-			buf_ptr[0] |= (addr >> 10) & 0x3fffff;
-			buf_ptr[1] |= addr & 0x3ff;
+			buf_ptr[0] |= (sljit_ins)(addr >> 10) & 0x3fffff;
+			buf_ptr[1] |= (sljit_ins)addr & 0x3ff;
 #else
 #error "Implementation required"
 #endif
@@ -416,7 +420,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	put_label = compiler->put_labels;
 	while (put_label) {
-		addr = put_label->label->addr;
+		addr = (sljit_sw)put_label->label->addr;
 		buf_ptr = (sljit_ins *)put_label->addr;
 
 #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
@@ -431,7 +435,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
 
 	compiler->error = SLJIT_ERR_COMPILED;
 	compiler->executable_offset = executable_offset;
-	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
+	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
 
 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
@@ -487,13 +491,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
 #define ALT_KEEP_CACHE	0x00040
 #define CUMULATIVE_OP	0x00080
 #define IMM_OP		0x00100
-#define SRC2_IMM	0x00200
+#define MOVE_OP		0x00200
+#define SRC2_IMM	0x00400
 
-#define REG_DEST	0x00400
-#define REG2_SOURCE	0x00800
-#define SLOW_SRC1	0x01000
-#define SLOW_SRC2	0x02000
-#define SLOW_DEST	0x04000
+#define REG_DEST	0x00800
+#define REG2_SOURCE	0x01000
+#define SLOW_SRC1	0x02000
+#define SLOW_SRC2	0x04000
+#define SLOW_DEST	0x08000
 
 /* SET_FLAGS (0x10 << 19) also belong here! */
 
@@ -507,6 +512,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
 {
+	sljit_s32 reg_index, types, tmp;
+	sljit_u32 float_offset, args_offset;
+	sljit_s32 saved_arg_index, scratch_arg_index, float_arg_index;
+
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
@@ -514,7 +523,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 7) & ~0x7;
 	compiler->local_size = local_size;
 
-	if (local_size <= SIMM_MAX) {
+	if (local_size <= -SIMM_MIN) {
 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | IMM(-local_size), UNMOVABLE_INS));
 	}
 	else {
@@ -522,7 +531,88 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
 		FAIL_IF(push_inst(compiler, SAVE | D(SLJIT_SP) | S1(SLJIT_SP) | S2(TMP_REG1), UNMOVABLE_INS));
 	}
 
-	/* Arguments are in their appropriate registers. */
+	arg_types >>= SLJIT_ARG_SHIFT;
+
+	types = arg_types;
+	float_offset = 16 * sizeof(sljit_sw);
+	reg_index = 24;
+
+	while (types && reg_index < 24 + 6) {
+		switch (types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (reg_index & 0x1) {
+				FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+				if (reg_index >= 24 + 6 - 1)
+					break;
+				FAIL_IF(push_inst(compiler, STW | DA(reg_index + 1) | S1(SLJIT_SP) | IMM(float_offset + sizeof(sljit_sw)), MOVABLE_INS));
+			} else
+				FAIL_IF(push_inst(compiler, STD | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+
+			float_offset += sizeof(sljit_f64);
+			reg_index++;
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			FAIL_IF(push_inst(compiler, STW | DA(reg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			float_offset += sizeof(sljit_f64);
+			break;
+		}
+
+		reg_index++;
+		types >>= SLJIT_ARG_SHIFT;
+	}
+
+	args_offset = (16 + 1 + 6) * sizeof(sljit_sw);
+	float_offset = 16 * sizeof(sljit_sw);
+	reg_index = 24;
+	saved_arg_index = 24;
+	scratch_arg_index = 8 - 1;
+	float_arg_index = 1;
+
+	while (arg_types) {
+		switch (arg_types & SLJIT_ARG_MASK) {
+		case SLJIT_ARG_TYPE_F64:
+			if (reg_index < 24 + 6 - 1) {
+				FAIL_IF(push_inst(compiler, LDDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			} else if (reg_index < 24 + 6) {
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+			} else {
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | (1 << 25) | S1A(30) | IMM(args_offset + sizeof(sljit_sw)), MOVABLE_INS));
+			}
+
+			float_arg_index++;
+			float_offset += sizeof(sljit_f64);
+			reg_index++;
+			break;
+		case SLJIT_ARG_TYPE_F32:
+			if (reg_index < 24 + 6)
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1(SLJIT_SP) | IMM(float_offset), MOVABLE_INS));
+			else
+				FAIL_IF(push_inst(compiler, LDF | FD(float_arg_index) | S1A(30) | IMM(args_offset), MOVABLE_INS));
+			float_arg_index++;
+			float_offset += sizeof(sljit_f64);
+			break;
+		default:
+			scratch_arg_index++;
+
+			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
+				tmp = saved_arg_index++;
+				if (tmp == reg_index)
+					break;
+			} else
+				tmp = scratch_arg_index;
+
+			if (reg_index < 24 + 6)
+				FAIL_IF(push_inst(compiler, OR | DA(tmp) | S1(0) | S2A(reg_index), tmp));
+			else
+				FAIL_IF(push_inst(compiler, LDUW | DA(tmp) | S1A(30) | IMM(args_offset), tmp));
+			break;
+		}
+
+		reg_index++;
+		arg_types >>= SLJIT_ARG_SHIFT;
+	}
 
 	return SLJIT_SUCCESS;
 }
@@ -539,12 +629,21 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
 	return SLJIT_SUCCESS;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_return_void(compiler));
+
+	FAIL_IF(push_inst(compiler, JMPL | D(0) | S1A(31) | IMM(8), UNMOVABLE_INS));
+	return push_inst(compiler, RESTORE | D(SLJIT_R0) | S1(SLJIT_R0) | S2(0), UNMOVABLE_INS);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
 {
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
 
-	if (op != SLJIT_MOV || !FAST_IS_REG(src)) {
+	if (TYPE_CAST_NEEDED(op) || !FAST_IS_REG(src)) {
 		FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
 		src = SLJIT_R0;
 	}
@@ -591,7 +690,7 @@ static const sljit_ins data_transfer_insts[16 + 4] = {
 #undef ARCH_32_64
 
 /* Can perform an operation using at most 1 instruction. */
-static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 {
 	SLJIT_ASSERT(arg & SLJIT_MEM);
 
@@ -632,7 +731,7 @@ static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, slj
 }
 
 /* Emit the necessary instructions. See can_cache above. */
-static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
+static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
 {
 	sljit_s32 base, arg2, delay_slot;
 	sljit_ins dest;
@@ -660,7 +759,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
 				arg2 = reg;
 			else /* It must be a mov operation, so tmp1 must be free to use. */
 				arg2 = TMP_REG1;
-			FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | argw, DR(arg2)));
+			FAIL_IF(push_inst(compiler, SLL_W | D(arg2) | S1(OFFS_REG(arg)) | IMM_ARG | (sljit_ins)argw, DR(arg2)));
 		}
 	}
 	else {
@@ -692,7 +791,7 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
 	return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | dest | S1(base) | S2(arg2), delay_slot);
 }
 
-static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
+static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
 {
 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
 		return compiler->error;
@@ -701,14 +800,14 @@ static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit
 	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
 }
 
-static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
+static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_u32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
 {
 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
 		return compiler->error;
 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
 }
 
-static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
+static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_u32 flags,
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src2, sljit_sw src2w)
@@ -727,11 +826,11 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 		compiler->cache_argw = 0;
 	}
 
-	if (dst != SLJIT_UNUSED) {
+	if (dst != TMP_REG2) {
 		if (FAST_IS_REG(dst)) {
 			dst_r = dst;
 			flags |= REG_DEST;
-			if (op >= SLJIT_MOV && op <= SLJIT_MOV_P)
+			if (flags & MOVE_OP)
 				sugg_src2_r = dst_r;
 		}
 		else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
@@ -782,7 +881,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 	if (FAST_IS_REG(src2)) {
 		src2_r = src2;
 		flags |= REG2_SOURCE;
-		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOV_P)
+		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
 			dst_r = src2_r;
 	}
 	else if (src2 & SLJIT_IMM) {
@@ -793,8 +892,12 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3
 			}
 			else {
 				src2_r = 0;
-				if ((op >= SLJIT_MOV && op <= SLJIT_MOV_P) && (dst & SLJIT_MEM))
-					dst_r = 0;
+				if (flags & MOVE_OP) {
+					if (dst & SLJIT_MEM)
+						dst_r = 0;
+					else
+						op = SLJIT_MOV;
+				}
 			}
 		}
 	}
@@ -888,7 +991,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 src, sljit_sw srcw)
 {
-	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
@@ -898,33 +1001,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
 	op = GET_OPCODE(op);
 	switch (op) {
 	case SLJIT_MOV:
-	case SLJIT_MOV_P:
-		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
-
+#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32)
 	case SLJIT_MOV_U32:
-		return emit_op(compiler, SLJIT_MOV_U32, flags | INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
-
 	case SLJIT_MOV_S32:
-		return emit_op(compiler, SLJIT_MOV_S32, flags | INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
+	case SLJIT_MOV32:
+#endif
+	case SLJIT_MOV_P:
+		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw);
 
 	case SLJIT_MOV_U8:
-		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
+		return emit_op(compiler, SLJIT_MOV_U8, flags | BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
 
 	case SLJIT_MOV_S8:
-		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
+		return emit_op(compiler, SLJIT_MOV_S8, flags | BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
 
 	case SLJIT_MOV_U16:
-		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
+		return emit_op(compiler, SLJIT_MOV_U16, flags | HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
 
 	case SLJIT_MOV_S16:
-		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
+		return emit_op(compiler, SLJIT_MOV_S16, flags | HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
 
 	case SLJIT_NOT:
 	case SLJIT_CLZ:
 		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
-
-	case SLJIT_NEG:
-		return emit_op(compiler, SLJIT_SUB, flags | IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
 	}
 
 	return SLJIT_SUCCESS;
@@ -935,17 +1034,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 	sljit_s32 src1, sljit_sw src1w,
 	sljit_s32 src2, sljit_sw src2w)
 {
-	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 
 	CHECK_ERROR();
-	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
+	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
 	ADJUST_LOCAL_OFFSET(dst, dstw);
 	ADJUST_LOCAL_OFFSET(src1, src1w);
 	ADJUST_LOCAL_OFFSET(src2, src2w);
 
-	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
-		return SLJIT_SUCCESS;
-
 	op = GET_OPCODE(op);
 	switch (op) {
 	case SLJIT_ADD:
@@ -975,6 +1071,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
 	return SLJIT_SUCCESS;
 }
 
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
+	sljit_s32 src1, sljit_sw src1w,
+	sljit_s32 src2, sljit_sw src2w)
+{
+	CHECK_ERROR();
+	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
+
+#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
+		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
+	compiler->skip_checks = 1;
+#endif
+	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
+}
+
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
 	sljit_s32 src, sljit_sw srcw)
 {
@@ -1015,7 +1125,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
 }
 
 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
-	void *instruction, sljit_s32 size)
+	void *instruction, sljit_u32 size)
 {
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
@@ -1027,8 +1137,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c
 /*  Floating point operators                                             */
 /* --------------------------------------------------------------------- */
 
-#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7))
-#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
+#define FLOAT_DATA(op) ((sljit_ins)DOUBLE_DATA | (((sljit_ins)(op) & SLJIT_32) >> 7))
+#define SELECT_FOP(op, single, double) ((op & SLJIT_32) ? single : double)
 #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw))
 
 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1108,11 +1218,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
 	compiler->cache_arg = 0;
 	compiler->cache_argw = 0;
 
-	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
+	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
 
 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
-		op ^= SLJIT_F32_OP;
+		op ^= SLJIT_32;
 
 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
 
@@ -1126,7 +1236,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
 		if (src != dst_r) {
 			if (dst_r != TMP_FREG1) {
 				FAIL_IF(push_inst(compiler, FMOVS | FD(dst_r) | FS2(src), MOVABLE_INS));
-				if (!(op & SLJIT_F32_OP))
+				if (!(op & SLJIT_32))
 					FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
 			}
 			else
@@ -1135,17 +1245,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
 		break;
 	case SLJIT_NEG_F64:
 		FAIL_IF(push_inst(compiler, FNEGS | FD(dst_r) | FS2(src), MOVABLE_INS));
-		if (dst_r != src && !(op & SLJIT_F32_OP))
+		if (dst_r != src && !(op & SLJIT_32))
 			FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
 		break;
 	case SLJIT_ABS_F64:
 		FAIL_IF(push_inst(compiler, FABSS | FD(dst_r) | FS2(src), MOVABLE_INS));
-		if (dst_r != src && !(op & SLJIT_F32_OP))
+		if (dst_r != src && !(op & SLJIT_32))
 			FAIL_IF(push_inst(compiler, FMOVS | FDN(dst_r) | FS2N(src), MOVABLE_INS));
 		break;
 	case SLJIT_CONV_F64_FROM_F32:
 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOD, FDTOS) | FD(dst_r) | FS2(src), MOVABLE_INS));
-		op ^= SLJIT_F32_OP;
+		op ^= SLJIT_32;
 		break;
 	}
 
@@ -1288,10 +1398,12 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 
 	case SLJIT_LESS:
 	case SLJIT_GREATER_F64: /* Unordered. */
+	case SLJIT_CARRY:
 		return DA(0x5);
 
 	case SLJIT_GREATER_EQUAL:
 	case SLJIT_LESS_EQUAL_F64:
+	case SLJIT_NOT_CARRY:
 		return DA(0xd);
 
 	case SLJIT_GREATER:
@@ -1315,15 +1427,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
 		return DA(0x2);
 
 	case SLJIT_OVERFLOW:
-		if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB))
+		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
 			return DA(0x9);
+		/* fallthrough */
 
 	case SLJIT_UNORDERED_F64:
 		return DA(0x7);
 
 	case SLJIT_NOT_OVERFLOW:
-		if (!(compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD_SUB))
+		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
 			return DA(0x1);
+		/* fallthrough */
 
 	case SLJIT_ORDERED_F64:
 		return DA(0xf);
@@ -1412,7 +1526,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
 		FAIL_IF(!jump);
 		set_jump(jump, compiler, JUMP_ADDR);
-		jump->u.target = srcw;
+		jump->u.target = (sljit_uw)srcw;
 
 		if ((compiler->delay_slot & DST_INS_MASK) != UNMOVABLE_INS)
 			jump->flags |= IS_MOVABLE;
@@ -1460,7 +1574,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
 	sljit_s32 dst, sljit_sw dstw,
 	sljit_s32 type)
 {
-	sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
+	sljit_s32 reg;
+	sljit_u32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
 
 	CHECK_ERROR();
 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));

文件差异内容过多而无法显示
+ 471 - 355
thirdparty/pcre2/src/sljit/sljitNativeX86_32.c


文件差异内容过多而无法显示
+ 420 - 371
thirdparty/pcre2/src/sljit/sljitNativeX86_64.c


文件差异内容过多而无法显示
+ 184 - 203
thirdparty/pcre2/src/sljit/sljitNativeX86_common.c


+ 3 - 3
thirdparty/pcre2/src/sljit/sljitProtExecAllocator.c

@@ -66,7 +66,7 @@
 /* --------------------------------------------------------------------- */
 
 /* 64 KByte. */
-#define CHUNK_SIZE	0x10000
+#define CHUNK_SIZE	(sljit_uw)0x10000
 
 struct chunk_header {
 	void *executable;
@@ -194,7 +194,7 @@ static SLJIT_INLINE struct chunk_header* alloc_chunk(sljit_uw size)
 	if (fd == -1)
 		return NULL;
 
-	if (ftruncate(fd, size)) {
+	if (ftruncate(fd, (off_t)size)) {
 		close(fd);
 		return NULL;
 	}
@@ -281,7 +281,7 @@ struct free_block {
 #define AS_FREE_BLOCK(base, offset) \
 	((struct free_block*)(((sljit_u8*)base) + offset))
 #define MEM_START(base)		((void*)((base) + 1))
-#define ALIGN_SIZE(size)	(((size) + sizeof(struct block_header) + 7) & ~7)
+#define ALIGN_SIZE(size)	(((size) + sizeof(struct block_header) + 7u) & ~(sljit_uw)7)
 
 static struct free_block* free_blocks;
 static sljit_uw allocated_size;

+ 17 - 14
thirdparty/pcre2/src/sljit/sljitUtils.c

@@ -131,12 +131,12 @@ static SLJIT_INLINE int open_dev_zero(void)
 
 #ifdef _WIN32
 
-static SLJIT_INLINE sljit_sw get_page_alignment(void) {
+static SLJIT_INLINE sljit_uw get_page_alignment(void) {
 	SYSTEM_INFO si;
-	static sljit_sw sljit_page_align;
+	static sljit_uw sljit_page_align = 0;
 	if (!sljit_page_align) {
 		GetSystemInfo(&si);
-		sljit_page_align = si.dwPageSize - 1;
+		sljit_page_align = (sljit_uw)si.dwPageSize - 1;
 	}
 	return sljit_page_align;
 }
@@ -145,18 +145,21 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) {
 
 #include <unistd.h>
 
-static SLJIT_INLINE sljit_sw get_page_alignment(void) {
-	static sljit_sw sljit_page_align = -1;
-	if (sljit_page_align < 0) {
+static SLJIT_INLINE sljit_uw get_page_alignment(void) {
+	static sljit_uw sljit_page_align = 0;
+
+	sljit_sw align;
+
+	if (!sljit_page_align) {
 #ifdef _SC_PAGESIZE
-		sljit_page_align = sysconf(_SC_PAGESIZE);
+		align = sysconf(_SC_PAGESIZE);
 #else
-		sljit_page_align = getpagesize();
+		align = getpagesize();
 #endif
 		/* Should never happen. */
-		if (sljit_page_align < 0)
-			sljit_page_align = 4096;
-		sljit_page_align--;
+		if (align < 0)
+			align = 4096;
+		sljit_page_align = (sljit_uw)align - 1;
 	}
 	return sljit_page_align;
 }
@@ -227,7 +230,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *st
 SLJIT_API_FUNC_ATTRIBUTE void SLJIT_FUNC sljit_free_stack(struct sljit_stack *stack, void *allocator_data)
 {
 	SLJIT_UNUSED_ARG(allocator_data);
-	munmap((void*)stack->min_start, stack->end - stack->min_start);
+	munmap((void*)stack->min_start, (size_t)(stack->end - stack->min_start));
 	SLJIT_FREE(stack, allocator_data);
 }
 
@@ -237,7 +240,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_FUNC sljit_allocate_stack(slj
 {
 	struct sljit_stack *stack;
 	void *ptr;
-	sljit_sw page_align;
+	sljit_uw page_align;
 
 	SLJIT_UNUSED_ARG(allocator_data);
 
@@ -295,7 +298,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_u8 *SLJIT_FUNC sljit_stack_resize(struct sljit_st
 #if defined _WIN32 || defined(POSIX_MADV_DONTNEED)
 	sljit_uw aligned_old_start;
 	sljit_uw aligned_new_start;
-	sljit_sw page_align;
+	sljit_uw page_align;
 #endif
 
 	if ((new_start < stack->min_start) || (new_start >= stack->end))

部分文件因为文件数量过多而无法显示