Browse Source

Merge pull request #54337 from bruvzg/icu_70_1

Rémi Verschelde 3 years ago
parent
commit
0ec7763197
100 changed files with 5703 additions and 4147 deletions
  1. 3 1
      modules/text_server_adv/SCsub
  2. 8 3
      thirdparty/README.md
  3. 15 17
      thirdparty/icu4c/LICENSE
  4. 29 10
      thirdparty/icu4c/common/brkeng.cpp
  5. 8 4
      thirdparty/icu4c/common/brkeng.h
  6. 4 4
      thirdparty/icu4c/common/brkiter.cpp
  7. 3 3
      thirdparty/icu4c/common/bytestriebuilder.cpp
  8. 3 3
      thirdparty/icu4c/common/caniter.cpp
  9. 28 0
      thirdparty/icu4c/common/characterproperties.cpp
  10. 1 1
      thirdparty/icu4c/common/cstr.h
  11. 26 21
      thirdparty/icu4c/common/dictbe.cpp
  12. 133 119
      thirdparty/icu4c/common/dictbe.h
  13. 5 5
      thirdparty/icu4c/common/dictionarydata.h
  14. 1 1
      thirdparty/icu4c/common/dtintrv.cpp
  15. 220 0
      thirdparty/icu4c/common/emojiprops.cpp
  16. 90 0
      thirdparty/icu4c/common/emojiprops.h
  17. 23 24
      thirdparty/icu4c/common/filteredbrk.cpp
  18. 4 4
      thirdparty/icu4c/common/icuplug.cpp
  19. 1 1
      thirdparty/icu4c/common/loadednormalizer2impl.cpp
  20. 8 8
      thirdparty/icu4c/common/localebuilder.cpp
  21. 3 3
      thirdparty/icu4c/common/localematcher.cpp
  22. 14 2
      thirdparty/icu4c/common/locdispnames.cpp
  23. 47 21
      thirdparty/icu4c/common/locdspnm.cpp
  24. 41 65
      thirdparty/icu4c/common/locid.cpp
  25. 23 18
      thirdparty/icu4c/common/loclikely.cpp
  26. 1 1
      thirdparty/icu4c/common/lsr.cpp
  27. 2 2
      thirdparty/icu4c/common/lsr.h
  28. 855 0
      thirdparty/icu4c/common/lstmbe.cpp
  29. 87 0
      thirdparty/icu4c/common/lstmbe.h
  30. 4 4
      thirdparty/icu4c/common/messagepattern.cpp
  31. 1 1
      thirdparty/icu4c/common/msvcres.h
  32. 383 380
      thirdparty/icu4c/common/norm2_nfc_data.h
  33. 2 2
      thirdparty/icu4c/common/normalizer2impl.cpp
  34. 2 2
      thirdparty/icu4c/common/normalizer2impl.h
  35. 1 1
      thirdparty/icu4c/common/normlzr.cpp
  36. 2 2
      thirdparty/icu4c/common/pluralmap.h
  37. 956 935
      thirdparty/icu4c/common/propname_data.h
  38. 10 1
      thirdparty/icu4c/common/putil.cpp
  39. 12 11
      thirdparty/icu4c/common/rbbi.cpp
  40. 5 5
      thirdparty/icu4c/common/rbbi_cache.cpp
  41. 6 6
      thirdparty/icu4c/common/rbbidata.cpp
  42. 1 1
      thirdparty/icu4c/common/rbbidata.h
  43. 2 1
      thirdparty/icu4c/common/rbbinode.cpp
  44. 2 2
      thirdparty/icu4c/common/rbbinode.h
  45. 3 3
      thirdparty/icu4c/common/rbbirb.h
  46. 5 5
      thirdparty/icu4c/common/rbbiscan.cpp
  47. 1 1
      thirdparty/icu4c/common/rbbiscan.h
  48. 1 1
      thirdparty/icu4c/common/rbbistbl.cpp
  49. 22 7
      thirdparty/icu4c/common/rbbitblb.cpp
  50. 2 2
      thirdparty/icu4c/common/rbbitblb.h
  51. 1 1
      thirdparty/icu4c/common/resbund.cpp
  52. 1 1
      thirdparty/icu4c/common/ruleiter.h
  53. 3 3
      thirdparty/icu4c/common/schriter.cpp
  54. 45 56
      thirdparty/icu4c/common/serv.cpp
  55. 9 9
      thirdparty/icu4c/common/serv.h
  56. 19 19
      thirdparty/icu4c/common/servloc.h
  57. 6 6
      thirdparty/icu4c/common/servls.cpp
  58. 1 1
      thirdparty/icu4c/common/servnotf.cpp
  59. 1 1
      thirdparty/icu4c/common/servnotf.h
  60. 24 24
      thirdparty/icu4c/common/stringtriebuilder.cpp
  61. 24 4
      thirdparty/icu4c/common/uassert.h
  62. 3 3
      thirdparty/icu4c/common/ubidi.cpp
  63. 695 676
      thirdparty/icu4c/common/ubidi_props_data.h
  64. 2 2
      thirdparty/icu4c/common/ubidiln.cpp
  65. 1 1
      thirdparty/icu4c/common/ucase.cpp
  66. 508 497
      thirdparty/icu4c/common/ucase_props_data.h
  67. 1014 1002
      thirdparty/icu4c/common/uchar_props_data.h
  68. 3 3
      thirdparty/icu4c/common/ucharstriebuilder.cpp
  69. 3 3
      thirdparty/icu4c/common/uchriter.cpp
  70. 1 0
      thirdparty/icu4c/common/ucln_cmn.h
  71. 1 1
      thirdparty/icu4c/common/ucmndata.cpp
  72. 2 2
      thirdparty/icu4c/common/ucmndata.h
  73. 7 7
      thirdparty/icu4c/common/ucnv2022.cpp
  74. 1 1
      thirdparty/icu4c/common/ucnv_cnv.h
  75. 2 2
      thirdparty/icu4c/common/ucnv_err.cpp
  76. 1 1
      thirdparty/icu4c/common/ucnv_imp.h
  77. 4 4
      thirdparty/icu4c/common/ucnv_lmb.cpp
  78. 3 3
      thirdparty/icu4c/common/ucnv_u32.cpp
  79. 8 8
      thirdparty/icu4c/common/ucnvisci.cpp
  80. 2 2
      thirdparty/icu4c/common/ucnvmbcs.cpp
  81. 2 2
      thirdparty/icu4c/common/ucnvscsu.cpp
  82. 1 1
      thirdparty/icu4c/common/ucptrie_impl.h
  83. 4 3
      thirdparty/icu4c/common/ucurr.cpp
  84. 5 2
      thirdparty/icu4c/common/uelement.h
  85. 2 2
      thirdparty/icu4c/common/uhash.cpp
  86. 5 5
      thirdparty/icu4c/common/uhash.h
  87. 52 19
      thirdparty/icu4c/common/uloc.cpp
  88. 7 5
      thirdparty/icu4c/common/uloc_keytype.cpp
  89. 8 4
      thirdparty/icu4c/common/uloc_tag.cpp
  90. 68 0
      thirdparty/icu4c/common/ulocimp.h
  91. 1 1
      thirdparty/icu4c/common/umapfile.h
  92. 5 5
      thirdparty/icu4c/common/unicode/appendable.h
  93. 4 4
      thirdparty/icu4c/common/unicode/brkiter.h
  94. 3 3
      thirdparty/icu4c/common/unicode/bytestream.h
  95. 19 19
      thirdparty/icu4c/common/unicode/bytestriebuilder.h
  96. 2 2
      thirdparty/icu4c/common/unicode/caniter.h
  97. 4 4
      thirdparty/icu4c/common/unicode/chariter.h
  98. 1 1
      thirdparty/icu4c/common/unicode/docmain.h
  99. 4 4
      thirdparty/icu4c/common/unicode/dtintrv.h
  100. 2 4
      thirdparty/icu4c/common/unicode/localebuilder.h

+ 3 - 1
modules/text_server_adv/SCsub

@@ -270,6 +270,7 @@ if env["builtin_icu"]:
         "common/dictionarydata.cpp",
         "common/dtintrv.cpp",
         "common/edits.cpp",
+        "common/emojiprops.cpp",
         "common/errorcode.cpp",
         "common/filteredbrk.cpp",
         "common/filterednormalizer2.cpp",
@@ -291,6 +292,7 @@ if env["builtin_icu"]:
         "common/locresdata.cpp",
         "common/locutil.cpp",
         "common/lsr.cpp",
+        "common/lstmbe.cpp",
         "common/messagepattern.cpp",
         "common/normalizer2.cpp",
         "common/normalizer2impl.cpp",
@@ -448,7 +450,7 @@ if env["builtin_icu"]:
     ]
     thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
 
-    icu_data_name = "icudt69l.dat"
+    icu_data_name = "icudt70l.dat"
 
     if env_icu["tools"]:
         env_icu.Depends("#thirdparty/icu4c/icudata.gen.h", "#thirdparty/icu4c/" + icu_data_name)

+ 8 - 3
thirdparty/README.md

@@ -201,7 +201,7 @@ Files extracted from upstream source:
 ## icu4c
 
 - Upstream: https://github.com/unicode-org/icu
-- Version: 69.1 (0e7b4428866f3133b4abba2d932ee3faa708db1d, 2021)
+- Version: 70.1 (a56dde820dc35665a66f2e9ee8ba58e75049b668, 2021)
 - License: Unicode
 
 Files extracted from upstream source:
@@ -211,9 +211,14 @@ Files extracted from upstream source:
 
 Files generated from upstream source:
 
-- the `icudt69l.dat` built with the provided `godot_data.json` config file (see
+- the `icudt70l.dat` built with the provided `godot_data.json` config file (see
   https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md
-  for instructions)
+  for instructions).
+
+- Step 1: Build ICU with default options - `./runConfigureICU {PLATFORM} && make`.
+- Step 2: Reconfigure ICU with custom data config - `ICU_DATA_FILTER_FILE={GODOT_SOURCE}/thirdparty/icu4c/godot_data.json ./runConfigureICU {PLATFORM} --with-data-packaging=common`.
+- Step 3: Delete `data/out` folder and rebuild data - `cd data && rm -rf ./out && make`.
+- Step 4: Copy `source/data/out/icudt70l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt70l.dat`.
 
 
 ## jpeg-compressor

+ 15 - 17
thirdparty/icu4c/LICENSE

@@ -281,31 +281,29 @@ property of their respective owners.
 
 3. Lao Word Break Dictionary Data (laodict.txt)
 
- #  Copyright (c) 2013 International Business Machines Corporation
- #  and others. All Rights Reserved.
+ # Copyright (C) 2016 and later: Unicode, Inc. and others.
+ # License & terms of use: http://www.unicode.org/copyright.html
+ # Copyright (c) 2015 International Business Machines Corporation
+ # and others. All Rights Reserved.
  #
- # Project: https://github.com/veer66/lao-dictionary
- # Dictionary: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary.txt
- # License: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary-LICENSE.txt
- #              (copied below)
+ # Project: https://github.com/rober42539/lao-dictionary
+ # Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt
+ # License: https://github.com/rober42539/lao-dictionary/LICENSE.txt
+ #          (copied below)
  #
- #  This file is derived from the above dictionary, with slight
- #  modifications.
+ #	This file is derived from the above dictionary version of Nov 22, 2020
  #  ----------------------------------------------------------------------
  #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
  #  All rights reserved.
  #
  #  Redistribution and use in source and binary forms, with or without
- #  modification,
- #  are permitted provided that the following conditions are met:
- #
- #
- # Redistributions of source code must retain the above copyright notice, this
- #  list of conditions and the following disclaimer. Redistributions in
- #  binary form must reproduce the above copyright notice, this list of
- #  conditions and the following disclaimer in the documentation and/or
- #  other materials provided with the distribution.
+ #  modification, are permitted provided that the following conditions are met:
  #
+ #  Redistributions of source code must retain the above copyright notice, this
+ #  list of conditions and the following disclaimer. Redistributions in binary
+ #  form must reproduce the above copyright notice, this list of conditions and
+ #  the following disclaimer in the documentation and/or other materials
+ #  provided with the distribution.
  #
  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+ 29 - 10
thirdparty/icu4c/common/brkeng.cpp

@@ -25,6 +25,7 @@
 #include "brkeng.h"
 #include "cmemory.h"
 #include "dictbe.h"
+#include "lstmbe.h"
 #include "charstr.h"
 #include "dictionarydata.h"
 #include "mutex.h"
@@ -77,7 +78,9 @@ int32_t
 UnhandledEngine::findBreaks( UText *text,
                              int32_t /* startPos */,
                              int32_t endPos,
-                             UVector32 &/*foundBreaks*/ ) const {
+                             UVector32 &/*foundBreaks*/,
+                             UErrorCode &status) const {
+    if (U_FAILURE(status)) return 0;
     UChar32 c = utext_current32(text); 
     while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
         utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
@@ -132,14 +135,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
     static UMutex gBreakEngineMutex;
     Mutex m(&gBreakEngineMutex);
 
-    if (fEngines == NULL) {
-        UStack  *engines = new UStack(_deleteEngine, NULL, status);
-        if (U_FAILURE(status) || engines == NULL) {
+    if (fEngines == nullptr) {
+        LocalPointer<UStack>  engines(new UStack(_deleteEngine, nullptr, status), status);
+        if (U_FAILURE(status) ) {
             // Note: no way to return error code to caller.
-            delete engines;
-            return NULL;
+            return nullptr;
         }
-        fEngines = engines;
+        fEngines = engines.orphan();
     } else {
         int32_t i = fEngines->size();
         while (--i >= 0) {
@@ -152,10 +154,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
     
     // We didn't find an engine. Create one.
     lbe = loadEngineFor(c);
-    if (lbe != NULL) {
+    if (lbe != nullptr) {
         fEngines->push((void *)lbe, status);
     }
-    return lbe;
+    return U_SUCCESS(status) ? lbe : nullptr;
 }
 
 const LanguageBreakEngine *
@@ -163,9 +165,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
     UErrorCode status = U_ZERO_ERROR;
     UScriptCode code = uscript_getScript(c, &status);
     if (U_SUCCESS(status)) {
+        const LanguageBreakEngine *engine = nullptr;
+        // Try to use LSTM first
+        const LSTMData *data = CreateLSTMDataForScript(code, status);
+        if (U_SUCCESS(status)) {
+            if (data != nullptr) {
+                engine = CreateLSTMBreakEngine(code, data, status);
+                if (U_SUCCESS(status) && engine != nullptr) {
+                    return engine;
+                }
+                if (engine != nullptr) {
+                    delete engine;
+                    engine = nullptr;
+                } else {
+                    DeleteLSTMData(data);
+                }
+            }
+        }
+        status = U_ZERO_ERROR;  // fallback to dictionary based
         DictionaryMatcher *m = loadDictionaryMatcherFor(code);
         if (m != NULL) {
-            const LanguageBreakEngine *engine = NULL;
             switch(code) {
             case USCRIPT_THAI:
                 engine = new ThaiBreakEngine(m, status);

+ 8 - 4
thirdparty/icu4c/common/brkeng.h

@@ -68,12 +68,14 @@ class LanguageBreakEngine : public UMemory {
   * @param startPos The start of the run within the supplied text.
   * @param endPos The end of the run within the supplied text.
   * @param foundBreaks A Vector of int32_t to receive the breaks.
+  * @param status Information on any errors encountered.
   * @return The number of breaks found.
   */
   virtual int32_t findBreaks( UText *text,
                               int32_t startPos,
                               int32_t endPos,
-                              UVector32 &foundBreaks ) const = 0;
+                              UVector32 &foundBreaks,
+                              UErrorCode &status) const = 0;
 
 };
 
@@ -174,7 +176,7 @@ class UnhandledEngine : public LanguageBreakEngine {
   * @return true if this engine handles the particular character and break
   * type.
   */
-  virtual UBool handles(UChar32 c) const;
+  virtual UBool handles(UChar32 c) const override;
 
  /**
   * <p>Find any breaks within a run in the supplied text.</p>
@@ -185,12 +187,14 @@ class UnhandledEngine : public LanguageBreakEngine {
   * @param startPos The start of the run within the supplied text.
   * @param endPos The end of the run within the supplied text.
   * @param foundBreaks An allocated C array of the breaks found, if any
+  * @param status Information on any errors encountered.
   * @return The number of breaks found.
   */
   virtual int32_t findBreaks( UText *text,
                               int32_t startPos,
                               int32_t endPos,
-                              UVector32 &foundBreaks ) const;
+                              UVector32 &foundBreaks,
+                              UErrorCode &status) const override;
 
  /**
   * <p>Tell the engine to handle a particular character and break type.</p>
@@ -243,7 +247,7 @@ class ICULanguageBreakFactory : public LanguageBreakFactory {
   * sought.
   * @return A LanguageBreakEngine with the desired characteristics, or 0.
   */
-  virtual const LanguageBreakEngine *getEngineFor(UChar32 c);
+  virtual const LanguageBreakEngine *getEngineFor(UChar32 c) override;
 
 protected:
  /**

+ 4 - 4
thirdparty/icu4c/common/brkiter.cpp

@@ -234,7 +234,7 @@ class ICUBreakIteratorFactory : public ICUResourceBundleFactory {
 public:
     virtual ~ICUBreakIteratorFactory();
 protected:
-    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const {
+    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const override {
         return BreakIterator::makeInstance(loc, kind, status);
     }
 };
@@ -254,11 +254,11 @@ public:
 
     virtual ~ICUBreakIteratorService();
 
-    virtual UObject* cloneInstance(UObject* instance) const {
+    virtual UObject* cloneInstance(UObject* instance) const override {
         return ((BreakIterator*)instance)->clone();
     }
 
-    virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const {
+    virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const override {
         LocaleKey& lkey = (LocaleKey&)key;
         int32_t kind = lkey.kind();
         Locale loc;
@@ -266,7 +266,7 @@ public:
         return BreakIterator::makeInstance(loc, kind, status);
     }
 
-    virtual UBool isDefault() const {
+    virtual UBool isDefault() const override {
         return countFactories() == 1;
     }
 };

+ 3 - 3
thirdparty/icu4c/common/bytestriebuilder.cpp

@@ -343,13 +343,13 @@ BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_
         static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len)));
 }
 
-UBool
+bool
 BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!LinearMatchNode::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
     return 0==uprv_memcmp(s, o.s, length);

+ 3 - 3
thirdparty/icu4c/common/caniter.cpp

@@ -208,10 +208,10 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
         goto CleanPartialInitialization;
     }
 
-    // i should initialy be the number of code units at the 
+    // i should initially be the number of code units at the 
     // start of the string
     i = U16_LENGTH(source.char32At(0));
-    //int32_t i = 1;
+    // int32_t i = 1;
     // find the segments
     // This code iterates through the source string and 
     // extracts segments that end up on a codepoint that
@@ -494,7 +494,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
 
 /**
  * See if the decomposition of cp2 is at segment starting at segmentPos 
- * (with canonical rearrangment!)
+ * (with canonical rearrangement!)
  * If so, take the remainder, and return the equivalents 
  */
 Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {

+ 28 - 0
thirdparty/icu4c/common/characterproperties.cpp

@@ -14,6 +14,7 @@
 #include "unicode/uscript.h"
 #include "unicode/uset.h"
 #include "cmemory.h"
+#include "emojiprops.h"
 #include "mutex.h"
 #include "normalizer2impl.h"
 #include "uassert.h"
@@ -170,6 +171,13 @@ void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
     case UPROPS_SRC_VO:
         uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
         break;
+    case UPROPS_SRC_EMOJI: {
+        const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
+        if (U_SUCCESS(errorCode)) {
+            ep->addPropertyStarts(&sa, errorCode);
+        }
+        break;
+    }
     default:
         errorCode = U_INTERNAL_PROGRAM_ERROR;
         break;
@@ -268,6 +276,26 @@ UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
         errorCode = U_MEMORY_ALLOCATION_ERROR;
         return nullptr;
     }
+    if (UCHAR_BASIC_EMOJI <= property && property <= UCHAR_RGI_EMOJI) {
+        // property of strings
+        const icu::EmojiProps *ep = icu::EmojiProps::getSingleton(errorCode);
+        if (U_FAILURE(errorCode)) { return nullptr; }
+        USetAdder sa = {
+            (USet *)set.getAlias(),
+            _set_add,
+            _set_addRange,
+            _set_addString,
+            nullptr, // don't need remove()
+            nullptr // don't need removeRange()
+        };
+        ep->addStrings(&sa, property, errorCode);
+        if (property != UCHAR_BASIC_EMOJI && property != UCHAR_RGI_EMOJI) {
+            // property of _only_ strings
+            set->freeze();
+            return set.orphan();
+        }
+    }
+
     const UnicodeSet *inclusions =
         icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
     if (U_FAILURE(errorCode)) { return nullptr; }

+ 1 - 1
thirdparty/icu4c/common/cstr.h

@@ -28,7 +28,7 @@
  * default code page conversion, which will do the best job possible,
  * but may be lossy, depending on the platform.
  *
- * If no other conversion is available, use invariant conversion and substitue
+ * If no other conversion is available, use invariant conversion and substitute
  * '?' for non-invariant characters.
  *
  * Example Usage:

+ 26 - 21
thirdparty/icu4c/common/dictbe.cpp

@@ -47,7 +47,9 @@ int32_t
 DictionaryBreakEngine::findBreaks( UText *text,
                                  int32_t startPos,
                                  int32_t endPos,
-                                 UVector32 &foundBreaks ) const {
+                                 UVector32 &foundBreaks,
+                                 UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
     (void)startPos;            // TODO: remove this param?
     int32_t result = 0;
 
@@ -66,7 +68,7 @@ DictionaryBreakEngine::findBreaks( UText *text,
     }
     rangeStart = start;
     rangeEnd = current;
-    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
+    result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status);
     utext_setNativeIndex(text, current);
     
     return result;
@@ -179,7 +181,7 @@ static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3;
 // dictionary word, with a preceding word
 static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3;
 
-// Ellision character
+// Elision character
 static const int32_t THAI_PAIYANNOI = 0x0E2F;
 
 // Repeat character
@@ -227,7 +229,9 @@ int32_t
 ThaiBreakEngine::divideUpDictionaryRange( UText *text,
                                                 int32_t rangeStart,
                                                 int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
     utext_setNativeIndex(text, rangeStart);
     utext_moveIndex32(text, THAI_MIN_WORD_SPAN);
     if (utext_getNativeIndex(text) >= rangeEnd) {
@@ -240,7 +244,6 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text,
     int32_t cpWordLength = 0;    // Word Length in Code Points.
     int32_t cuWordLength = 0;    // Word length in code units (UText native indexing)
     int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
     PossibleWord words[THAI_LOOKAHEAD];
     
     utext_setNativeIndex(text, rangeStart);
@@ -465,7 +468,9 @@ int32_t
 LaoBreakEngine::divideUpDictionaryRange( UText *text,
                                                 int32_t rangeStart,
                                                 int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
     if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) {
         return 0;       // Not enough characters for two words
     }
@@ -474,11 +479,10 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text,
     int32_t cpWordLength = 0;
     int32_t cuWordLength = 0;
     int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
     PossibleWord words[LAO_LOOKAHEAD];
-    
+
     utext_setNativeIndex(text, rangeStart);
-    
+
     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
         cuWordLength = 0;
         cpWordLength = 0;
@@ -527,7 +531,7 @@ foundBest:
         }
         
         // We come here after having either found a word or not. We look ahead to the
-        // next word. If it's not a dictionary word, we will combine it withe the word we
+        // next word. If it's not a dictionary word, we will combine it with the word we
         // just found (if there is one), but only if the preceding word does not exceed
         // the threshold.
         // The text iterator should now be positioned at the end of the word we found.
@@ -657,7 +661,9 @@ int32_t
 BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
                                                 int32_t rangeStart,
                                                 int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UErrorCode& status ) const {
+    if (U_FAILURE(status)) return 0;
     if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) {
         return 0;       // Not enough characters for two words
     }
@@ -666,11 +672,10 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text,
     int32_t cpWordLength = 0;
     int32_t cuWordLength = 0;
     int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
     PossibleWord words[BURMESE_LOOKAHEAD];
-    
+
     utext_setNativeIndex(text, rangeStart);
-    
+
     while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
         cuWordLength = 0;
         cpWordLength = 0;
@@ -719,7 +724,7 @@ foundBest:
         }
         
         // We come here after having either found a word or not. We look ahead to the
-        // next word. If it's not a dictionary word, we will combine it withe the word we
+        // next word. If it's not a dictionary word, we will combine it with the word we
         // just found (if there is one), but only if the preceding word does not exceed
         // the threshold.
         // The text iterator should now be positioned at the end of the word we found.
@@ -861,7 +866,9 @@ int32_t
 KhmerBreakEngine::divideUpDictionaryRange( UText *text,
                                                 int32_t rangeStart,
                                                 int32_t rangeEnd,
-                                                UVector32 &foundBreaks ) const {
+                                                UVector32 &foundBreaks,
+                                                UErrorCode& status ) const {
+    if (U_FAILURE(status)) return 0;
     if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
         return 0;       // Not enough characters for two words
     }
@@ -870,7 +877,6 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text,
     int32_t cpWordLength = 0;
     int32_t cuWordLength = 0;
     int32_t current;
-    UErrorCode status = U_ZERO_ERROR;
     PossibleWord words[KHMER_LOOKAHEAD];
 
     utext_setNativeIndex(text, rangeStart);
@@ -1110,7 +1116,9 @@ int32_t
 CjkBreakEngine::divideUpDictionaryRange( UText *inText,
         int32_t rangeStart,
         int32_t rangeEnd,
-        UVector32 &foundBreaks ) const {
+        UVector32 &foundBreaks,
+        UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
     if (rangeStart >= rangeEnd) {
         return 0;
     }
@@ -1122,9 +1130,6 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText,
     // If NULL then mapping is 1:1
     LocalPointer<UVector32>     inputMap;
 
-    UErrorCode     status      = U_ZERO_ERROR;
-
-
     // if UText has the input string as one contiguous UTF-16 chunk
     if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) &&
          inText->chunkNativeStart <= rangeStart &&

+ 133 - 119
thirdparty/icu4c/common/dictbe.h

@@ -62,23 +62,25 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
    * @return true if this engine handles the particular character and break
    * type.
    */
-  virtual UBool handles(UChar32 c) const;
+  virtual UBool handles(UChar32 c) const override;
 
   /**
    * <p>Find any breaks within a run in the supplied text.</p>
    *
    * @param text A UText representing the text. The iterator is left at
-   * the end of the run of characters which the engine is capable of handling 
+   * the end of the run of characters which the engine is capable of handling
    * that starts from the first character in the range.
    * @param startPos The start of the run within the supplied text.
    * @param endPos The end of the run within the supplied text.
    * @param foundBreaks vector of int32_t to receive the break positions
+   * @param status Information on any errors encountered.
    * @return The number of breaks found.
    */
   virtual int32_t findBreaks( UText *text,
                               int32_t startPos,
                               int32_t endPos,
-                              UVector32 &foundBreaks ) const;
+                              UVector32 &foundBreaks,
+                              UErrorCode& status ) const override;
 
  protected:
 
@@ -96,12 +98,14 @@ class DictionaryBreakEngine : public LanguageBreakEngine {
   * @param rangeStart The start of the range of dictionary characters
   * @param rangeEnd The end of the range of dictionary characters
   * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
   * @return The number of breaks found
   */
   virtual int32_t divideUpDictionaryRange( UText *text,
                                            int32_t rangeStart,
                                            int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const = 0;
+                                           UVector32 &foundBreaks,
+                                           UErrorCode& status) const = 0;
 
 };
 
@@ -153,12 +157,14 @@ class ThaiBreakEngine : public DictionaryBreakEngine {
   * @param rangeStart The start of the range of dictionary characters
   * @param rangeEnd The end of the range of dictionary characters
   * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
   * @return The number of breaks found
   */
   virtual int32_t divideUpDictionaryRange( UText *text,
                                            int32_t rangeStart,
                                            int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const;
+                                           UVector32 &foundBreaks,
+                                           UErrorCode& status) const override;
 
 };
 
@@ -209,127 +215,133 @@ class LaoBreakEngine : public DictionaryBreakEngine {
   * @param rangeStart The start of the range of dictionary characters
   * @param rangeEnd The end of the range of dictionary characters
   * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
   * @return The number of breaks found
   */
   virtual int32_t divideUpDictionaryRange( UText *text,
                                            int32_t rangeStart,
                                            int32_t rangeEnd,
-                                           UVector32 &foundBreaks ) const;
+                                           UVector32 &foundBreaks,
+                                           UErrorCode& status) const override;
+
+};
+
+/*******************************************************************
+ * BurmeseBreakEngine
+ */
+
+/**
+ * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
+ *
+ * <p>After it is constructed a BurmeseBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class BurmeseBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fBurmeseWordSet;
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~BurmeseBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks,
+                                           UErrorCode& status) const override;
+
+};
+
+/*******************************************************************
+ * KhmerBreakEngine
+ */
+
+/**
+ * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
+ *
+ * <p>After it is constructed a KhmerBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class KhmerBreakEngine : public DictionaryBreakEngine {
+ private:
+    /**
+     * The set of characters handled by this engine
+     * @internal
+     */
+
+  UnicodeSet                fKhmerWordSet;
+  UnicodeSet                fEndWordSet;
+  UnicodeSet                fBeginWordSet;
+  UnicodeSet                fMarkSet;
+  DictionaryMatcher  *fDictionary;
+
+ public:
+
+  /**
+   * <p>Default constructor.</p>
+   *
+   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
+   * engine is deleted.
+   */
+  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
+
+  /**
+   * <p>Virtual destructor.</p>
+   */
+  virtual ~KhmerBreakEngine();
+
+ protected:
+ /**
+  * <p>Divide up a range of known dictionary characters.</p>
+  *
+  * @param text A UText representing the text
+  * @param rangeStart The start of the range of dictionary characters
+  * @param rangeEnd The end of the range of dictionary characters
+  * @param foundBreaks Output of C array of int32_t break positions, or 0
+  * @param status Information on any errors encountered.
+  * @return The number of breaks found
+  */
+  virtual int32_t divideUpDictionaryRange( UText *text,
+                                           int32_t rangeStart,
+                                           int32_t rangeEnd,
+                                           UVector32 &foundBreaks,
+                                           UErrorCode& status) const override;
 
 };
 
-/******************************************************************* 
- * BurmeseBreakEngine 
- */ 
- 
-/** 
- * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a 
- * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> 
- * 
- * <p>After it is constructed a BurmeseBreakEngine may be shared between 
- * threads without synchronization.</p> 
- */ 
-class BurmeseBreakEngine : public DictionaryBreakEngine { 
- private: 
-    /** 
-     * The set of characters handled by this engine 
-     * @internal 
-     */ 
- 
-  UnicodeSet                fBurmeseWordSet; 
-  UnicodeSet                fEndWordSet; 
-  UnicodeSet                fBeginWordSet; 
-  UnicodeSet                fMarkSet; 
-  DictionaryMatcher  *fDictionary; 
- 
- public: 
- 
-  /** 
-   * <p>Default constructor.</p> 
-   * 
-   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
-   * engine is deleted. 
-   */ 
-  BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
- 
-  /** 
-   * <p>Virtual destructor.</p> 
-   */ 
-  virtual ~BurmeseBreakEngine(); 
- 
- protected: 
- /** 
-  * <p>Divide up a range of known dictionary characters.</p> 
-  * 
-  * @param text A UText representing the text 
-  * @param rangeStart The start of the range of dictionary characters 
-  * @param rangeEnd The end of the range of dictionary characters 
-  * @param foundBreaks Output of C array of int32_t break positions, or 0 
-  * @return The number of breaks found 
-  */ 
-  virtual int32_t divideUpDictionaryRange( UText *text, 
-                                           int32_t rangeStart, 
-                                           int32_t rangeEnd, 
-                                           UVector32 &foundBreaks ) const; 
- 
-}; 
- 
-/******************************************************************* 
- * KhmerBreakEngine 
- */ 
- 
-/** 
- * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a 
- * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> 
- * 
- * <p>After it is constructed a KhmerBreakEngine may be shared between 
- * threads without synchronization.</p> 
- */ 
-class KhmerBreakEngine : public DictionaryBreakEngine { 
- private: 
-    /** 
-     * The set of characters handled by this engine 
-     * @internal 
-     */ 
- 
-  UnicodeSet                fKhmerWordSet; 
-  UnicodeSet                fEndWordSet; 
-  UnicodeSet                fBeginWordSet; 
-  UnicodeSet                fMarkSet; 
-  DictionaryMatcher  *fDictionary; 
- 
- public: 
- 
-  /** 
-   * <p>Default constructor.</p> 
-   * 
-   * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the 
-   * engine is deleted. 
-   */ 
-  KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); 
- 
-  /** 
-   * <p>Virtual destructor.</p> 
-   */ 
-  virtual ~KhmerBreakEngine(); 
- 
- protected: 
- /** 
-  * <p>Divide up a range of known dictionary characters.</p> 
-  * 
-  * @param text A UText representing the text 
-  * @param rangeStart The start of the range of dictionary characters 
-  * @param rangeEnd The end of the range of dictionary characters 
-  * @param foundBreaks Output of C array of int32_t break positions, or 0 
-  * @return The number of breaks found 
-  */ 
-  virtual int32_t divideUpDictionaryRange( UText *text, 
-                                           int32_t rangeStart, 
-                                           int32_t rangeEnd, 
-                                           UVector32 &foundBreaks ) const; 
- 
-}; 
- 
 #if !UCONFIG_NO_NORMALIZATION
 
 /*******************************************************************
@@ -385,12 +397,14 @@ class CjkBreakEngine : public DictionaryBreakEngine {
      * @param rangeStart The start of the range of dictionary characters
      * @param rangeEnd The end of the range of dictionary characters
      * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @param status Information on any errors encountered.
      * @return The number of breaks found
      */
   virtual int32_t divideUpDictionaryRange( UText *text,
           int32_t rangeStart,
           int32_t rangeEnd,
-          UVector32 &foundBreaks ) const;
+          UVector32 &foundBreaks,
+          UErrorCode& status) const override;
 
 };
 

+ 5 - 5
thirdparty/icu4c/common/dictionarydata.h

@@ -107,8 +107,8 @@ public:
     virtual ~UCharsDictionaryMatcher();
     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
-                            int32_t *prefix) const;
-    virtual int32_t getType() const;
+                            int32_t *prefix) const override;
+    virtual int32_t getType() const override;
 private:
     const UChar *characters;
     UDataMemory *file;
@@ -125,8 +125,8 @@ public:
     virtual ~BytesDictionaryMatcher();
     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
-                            int32_t *prefix) const;
-    virtual int32_t getType() const;
+                            int32_t *prefix) const override;
+    virtual int32_t getType() const override;
 private:
     UChar32 transform(UChar32 c) const;
 
@@ -159,7 +159,7 @@ udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *out
  * Constants are defined in the DictionaryData class.
  *
  * For the data structure of BytesTrie & UCharsTrie see
- * http://site.icu-project.org/design/struct/tries
+ * https://icu.unicode.org/design/struct/tries
  * and the bytestrie.h and ucharstrie.h header files.
  *
  * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4;

+ 1 - 1
thirdparty/icu4c/common/dtintrv.cpp

@@ -53,7 +53,7 @@ DateInterval::clone() const {
 }
 
 
-UBool 
+bool
 DateInterval::operator==(const DateInterval& other) const { 
     return ( fromDate == other.fromDate && toDate == other.toDate );
 }

+ 220 - 0
thirdparty/icu4c/common/emojiprops.cpp

@@ -0,0 +1,220 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// emojiprops.cpp
+// created: 2021sep04 Markus W. Scherer
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/ustringtrie.h"
+#include "unicode/utf16.h"
+#include "emojiprops.h"
+#include "ucln.h"
+#include "ucln_cmn.h"
+#include "umutex.h"
+#include "uset_imp.h"
+
+U_NAMESPACE_BEGIN
+
+namespace {
+
+EmojiProps *singleton = nullptr;
+icu::UInitOnce emojiInitOnce = U_INITONCE_INITIALIZER;
+
+UBool U_CALLCONV emojiprops_cleanup() {
+    delete singleton;
+    singleton = nullptr;
+    emojiInitOnce.reset();
+    return true;
+}
+
+void U_CALLCONV initSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return; }
+    singleton = new EmojiProps(errorCode);
+    if (singleton == nullptr) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+    } else if (U_FAILURE(errorCode)) {
+        delete singleton;
+        singleton = nullptr;
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_EMOJIPROPS, emojiprops_cleanup);
+}
+
+// TODO: turn this into a shared helper function
+// Requires the major version to match, and then requires at least the minor version.
+UBool udata_isAcceptableMajorMinor(
+        const UDataInfo &info, const UChar *dataFormat, uint8_t major, uint8_t minor) {
+    return
+        info.size >= 20 &&
+        info.isBigEndian == U_IS_BIG_ENDIAN &&
+        info.charsetFamily == U_CHARSET_FAMILY &&
+        info.dataFormat[0] == dataFormat[0] &&
+        info.dataFormat[1] == dataFormat[1] &&
+        info.dataFormat[2] == dataFormat[2] &&
+        info.dataFormat[3] == dataFormat[3] &&
+        info.formatVersion[0] == major &&
+        info.formatVersion[1] >= minor;
+}
+
+}  // namespace
+
+EmojiProps::~EmojiProps() {
+    udata_close(memory);
+    ucptrie_close(cpTrie);
+}
+
+const EmojiProps *
+EmojiProps::getSingleton(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return nullptr; }
+    umtx_initOnce(emojiInitOnce, &initSingleton, errorCode);
+    return singleton;
+}
+
+UBool U_CALLCONV
+EmojiProps::isAcceptable(void * /*context*/, const char * /*type*/, const char * /*name*/,
+                         const UDataInfo *pInfo) {
+    return udata_isAcceptableMajorMinor(*pInfo, u"Emoj", 1, 0);
+}
+
+void
+EmojiProps::load(UErrorCode &errorCode) {
+    memory = udata_openChoice(nullptr, "icu", "uemoji", isAcceptable, this, &errorCode);
+    if (U_FAILURE(errorCode)) { return; }
+    const uint8_t *inBytes = (const uint8_t *)udata_getMemory(memory);
+    const int32_t *inIndexes = (const int32_t *)inBytes;
+    int32_t indexesLength = inIndexes[IX_CPTRIE_OFFSET] / 4;
+    if (indexesLength <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET) {
+        errorCode = U_INVALID_FORMAT_ERROR;  // Not enough indexes.
+        return;
+    }
+
+    int32_t i = IX_CPTRIE_OFFSET;
+    int32_t offset = inIndexes[i++];
+    int32_t nextOffset = inIndexes[i];
+    cpTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_8,
+                                    inBytes + offset, nextOffset - offset, nullptr, &errorCode);
+    if (U_FAILURE(errorCode)) {
+        return;
+    }
+
+    for (i = IX_BASIC_EMOJI_TRIE_OFFSET; i <= IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET; ++i) {
+        offset = inIndexes[i];
+        nextOffset = inIndexes[i + 1];
+        // Set/leave nullptr if there is no UCharsTrie.
+        const UChar *p = nextOffset > offset ? (const UChar *)(inBytes + offset) : nullptr;
+        stringTries[getStringTrieIndex(i)] = p;
+    }
+}
+
+void
+EmojiProps::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const {
+    // Add the start code point of each same-value range of the trie.
+    UChar32 start = 0, end;
+    uint32_t value;
+    while ((end = ucptrie_getRange(cpTrie, start, UCPMAP_RANGE_NORMAL, 0,
+                                   nullptr, nullptr, &value)) >= 0) {
+        sa->add(sa->set, start);
+        start = end + 1;
+    }
+}
+
+UBool
+EmojiProps::hasBinaryProperty(UChar32 c, UProperty which) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const EmojiProps *ep = getSingleton(errorCode);
+    return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(c, which);
+}
+
+UBool
+EmojiProps::hasBinaryPropertyImpl(UChar32 c, UProperty which) const {
+    if (which < UCHAR_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return false;
+    }
+    // Note: UCHAR_REGIONAL_INDICATOR is a single, hardcoded range implemented elsewhere.
+    static constexpr int8_t bitFlags[] = {
+        BIT_EMOJI,                  // UCHAR_EMOJI=57
+        BIT_EMOJI_PRESENTATION,     // UCHAR_EMOJI_PRESENTATION=58
+        BIT_EMOJI_MODIFIER,         // UCHAR_EMOJI_MODIFIER=59
+        BIT_EMOJI_MODIFIER_BASE,    // UCHAR_EMOJI_MODIFIER_BASE=60
+        BIT_EMOJI_COMPONENT,        // UCHAR_EMOJI_COMPONENT=61
+        -1,                         // UCHAR_REGIONAL_INDICATOR=62
+        -1,                         // UCHAR_PREPENDED_CONCATENATION_MARK=63
+        BIT_EXTENDED_PICTOGRAPHIC,  // UCHAR_EXTENDED_PICTOGRAPHIC=64
+        BIT_BASIC_EMOJI,            // UCHAR_BASIC_EMOJI=65
+        -1,                         // UCHAR_EMOJI_KEYCAP_SEQUENCE=66
+        -1,                         // UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67
+        -1,                         // UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68
+        -1,                         // UCHAR_RGI_EMOJI_TAG_SEQUENCE=69
+        -1,                         // UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70
+        BIT_BASIC_EMOJI,            // UCHAR_RGI_EMOJI=71
+    };
+    int32_t bit = bitFlags[which - UCHAR_EMOJI];
+    if (bit < 0) {
+        return false;  // not a property that we support in this function
+    }
+    uint8_t bits = UCPTRIE_FAST_GET(cpTrie, UCPTRIE_8, c);
+    return (bits >> bit) & 1;
+}
+
+UBool
+EmojiProps::hasBinaryProperty(const UChar *s, int32_t length, UProperty which) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    const EmojiProps *ep = getSingleton(errorCode);
+    return U_SUCCESS(errorCode) && ep->hasBinaryPropertyImpl(s, length, which);
+}
+
+UBool
+EmojiProps::hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const {
+    if (s == nullptr && length != 0) { return false; }
+    if (length <= 0 && (length == 0 || *s == 0)) { return false; }  // empty string
+    // The caller should have delegated single code points to hasBinaryProperty(c, which).
+    if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return false;
+    }
+    UProperty firstProp = which, lastProp = which;
+    if (which == UCHAR_RGI_EMOJI) {
+        // RGI_Emoji is the union of the other emoji properties of strings.
+        firstProp = UCHAR_BASIC_EMOJI;
+        lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
+    }
+    for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
+        const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
+        if (trieUChars != nullptr) {
+            UCharsTrie trie(trieUChars);
+            UStringTrieResult result = trie.next(s, length);
+            if (USTRINGTRIE_HAS_VALUE(result)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void
+EmojiProps::addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return; }
+    if (which < UCHAR_BASIC_EMOJI || UCHAR_RGI_EMOJI < which) {
+        return;
+    }
+    UProperty firstProp = which, lastProp = which;
+    if (which == UCHAR_RGI_EMOJI) {
+        // RGI_Emoji is the union of the other emoji properties of strings.
+        firstProp = UCHAR_BASIC_EMOJI;
+        lastProp = UCHAR_RGI_EMOJI_ZWJ_SEQUENCE;
+    }
+    for (int32_t prop = firstProp; prop <= lastProp; ++prop) {
+        const UChar *trieUChars = stringTries[prop - UCHAR_BASIC_EMOJI];
+        if (trieUChars != nullptr) {
+            UCharsTrie::Iterator iter(trieUChars, 0, errorCode);
+            while (iter.next(errorCode)) {
+                const UnicodeString &s = iter.getString();
+                sa->addString(sa->set, s.getBuffer(), s.length());
+            }
+        }
+    }
+}
+
+U_NAMESPACE_END

+ 90 - 0
thirdparty/icu4c/common/emojiprops.h

@@ -0,0 +1,90 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// emojiprops.h
+// created: 2021sep03 Markus W. Scherer
+
+#ifndef __EMOJIPROPS_H__
+#define __EMOJIPROPS_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ucptrie.h"
+#include "unicode/udata.h"
+#include "unicode/uobject.h"
+#include "uset_imp.h"
+
+U_NAMESPACE_BEGIN
+
+class EmojiProps : public UMemory {
+public:
+    // @internal
+    EmojiProps(UErrorCode &errorCode) { load(errorCode); }
+    ~EmojiProps();
+
+    static const EmojiProps *getSingleton(UErrorCode &errorCode);
+    static UBool hasBinaryProperty(UChar32 c, UProperty which);
+    static UBool hasBinaryProperty(const UChar *s, int32_t length, UProperty which);
+
+    void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+    void addStrings(const USetAdder *sa, UProperty which, UErrorCode &errorCode) const;
+
+    enum {
+        // Byte offsets from the start of the data, after the generic header,
+        // in ascending order.
+        // UCPTrie=CodePointTrie, follows the indexes
+        IX_CPTRIE_OFFSET,
+        IX_RESERVED1,
+        IX_RESERVED2,
+        IX_RESERVED3,
+
+        // UCharsTrie=CharsTrie
+        IX_BASIC_EMOJI_TRIE_OFFSET,
+        IX_EMOJI_KEYCAP_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_MODIFIER_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_FLAG_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_TAG_SEQUENCE_TRIE_OFFSET,
+        IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET,
+        IX_RESERVED10,
+        IX_RESERVED11,
+        IX_RESERVED12,
+        IX_TOTAL_SIZE,
+
+        // Not initially byte offsets.
+        IX_RESERVED14,
+        IX_RESERVED15,
+        IX_COUNT  // 16
+    };
+
+    // Properties in the code point trie.
+    enum {
+        // https://www.unicode.org/reports/tr51/#Emoji_Properties
+        BIT_EMOJI,
+        BIT_EMOJI_PRESENTATION,
+        BIT_EMOJI_MODIFIER,
+        BIT_EMOJI_MODIFIER_BASE,
+        BIT_EMOJI_COMPONENT,
+        BIT_EXTENDED_PICTOGRAPHIC,
+        // https://www.unicode.org/reports/tr51/#Emoji_Sets
+        BIT_BASIC_EMOJI
+    };
+
+private:
+    static UBool U_CALLCONV
+    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+    /** Input i: One of the IX_..._TRIE_OFFSET indexes into the data file indexes[] array. */
+    static int32_t getStringTrieIndex(int32_t i) {
+        return i - IX_BASIC_EMOJI_TRIE_OFFSET;
+    }
+
+    void load(UErrorCode &errorCode);
+    UBool hasBinaryPropertyImpl(UChar32 c, UProperty which) const;
+    UBool hasBinaryPropertyImpl(const UChar *s, int32_t length, UProperty which) const;
+
+    UDataMemory *memory = nullptr;
+    UCPTrie *cpTrie = nullptr;
+    const UChar *stringTries[6] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
+};
+
+U_NAMESPACE_END
+
+#endif  // __EMOJIPROPS_H__

+ 23 - 24
thirdparty/icu4c/common/filteredbrk.cpp

@@ -49,7 +49,7 @@ static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
 /**
  * Used with sortedInsert()
  */
-static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
+static int32_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
     const UnicodeString &a = *(const UnicodeString*)t1.pointer;
     const UnicodeString &b = *(const UnicodeString*)t2.pointer;
     return a.compare(b);
@@ -90,7 +90,6 @@ class U_COMMON_API UStringSet : public UVector {
     } else {
       sortedInsert(str, compareUnicodeString, status);
       if(U_FAILURE(status)) {
-        delete str;
         return false;
       }
       return true;
@@ -186,37 +185,37 @@ public:
   /* -- cloning and other subclass stuff -- */
   virtual BreakIterator *  createBufferClone(void * /*stackBuffer*/,
                                              int32_t &/*BufferSize*/,
-                                             UErrorCode &status) {
+                                             UErrorCode &status) override {
     // for now - always deep clone
     status = U_SAFECLONE_ALLOCATED_WARNING;
     return clone();
   }
-  virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); }
-  virtual UClassID getDynamicClassID(void) const { return NULL; }
-  virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; }
+  virtual SimpleFilteredSentenceBreakIterator* clone() const override { return new SimpleFilteredSentenceBreakIterator(*this); }
+  virtual UClassID getDynamicClassID(void) const override { return NULL; }
+  virtual bool operator==(const BreakIterator& o) const override { if(this==&o) return true; return false; }
 
   /* -- text modifying -- */
-  virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); }
-  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; }
-  virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
-  virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
+  virtual void setText(UText *text, UErrorCode &status) override { fDelegate->setText(text,status); }
+  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) override { fDelegate->refreshInputText(input,status); return *this; }
+  virtual void adoptText(CharacterIterator* it) override { fDelegate->adoptText(it); }
+  virtual void setText(const UnicodeString &text) override { fDelegate->setText(text); }
 
   /* -- other functions that are just delegated -- */
-  virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); }
-  virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
+  virtual UText *getUText(UText *fillIn, UErrorCode &status) const override { return fDelegate->getUText(fillIn,status); }
+  virtual CharacterIterator& getText(void) const override { return fDelegate->getText(); }
 
   /* -- ITERATION -- */
-  virtual int32_t first(void);
-  virtual int32_t preceding(int32_t offset);
-  virtual int32_t previous(void);
-  virtual UBool isBoundary(int32_t offset);
-  virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
+  virtual int32_t first(void) override;
+  virtual int32_t preceding(int32_t offset) override;
+  virtual int32_t previous(void) override;
+  virtual UBool isBoundary(int32_t offset) override;
+  virtual int32_t current(void) const override { return fDelegate->current(); } // we keep the delegate current, so this should be correct.
 
-  virtual int32_t next(void);
+  virtual int32_t next(void) override;
 
-  virtual int32_t next(int32_t n);
-  virtual int32_t following(int32_t offset);
-  virtual int32_t last(void);
+  virtual int32_t next(int32_t n) override;
+  virtual int32_t following(int32_t offset) override;
+  virtual int32_t last(void) override;
 
 private:
     /**
@@ -488,9 +487,9 @@ public:
   virtual ~SimpleFilteredBreakIteratorBuilder();
   SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status);
   SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
-  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
-  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
-  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status);
+  virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
+  virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) override;
+  virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) override;
 private:
   UStringSet fSet;
 };

+ 4 - 4
thirdparty/icu4c/common/icuplug.cpp

@@ -284,7 +284,7 @@ static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *stat
 
 
 static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
-  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(plug->awaitingLoad) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
     *status = U_INTERNAL_PROGRAM_ERROR;
     return; 
   }
@@ -295,7 +295,7 @@ static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) {
 }
 
 static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) {
-  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
     *status = U_INTERNAL_PROGRAM_ERROR;
     return; 
   }
@@ -317,7 +317,7 @@ static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) {
   if(U_FAILURE(*status)) {
     return;
   }
-  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/
+  if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) {  /* shouldn't happen. Plugin hasn't been loaded yet.*/
     *status = U_INTERNAL_PROGRAM_ERROR;
     return;
   }
@@ -526,7 +526,7 @@ uplug_getPlugLoadStatus(UPlugData *plug) {
 
 
 /**
- * Initialize a plugin fron an entrypoint and library - but don't load it.
+ * Initialize a plugin from an entrypoint and library - but don't load it.
  */
 static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym,
                                                          UErrorCode *status) {

+ 1 - 1
thirdparty/icu4c/common/loadednormalizer2impl.cpp

@@ -157,7 +157,7 @@ static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
         nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
     } else {
-        UPRV_UNREACHABLE;   // Unknown singleton
+        UPRV_UNREACHABLE_EXIT;   // Unknown singleton
     }
     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
 }

+ 8 - 8
thirdparty/icu4c/common/localebuilder.cpp

@@ -228,7 +228,7 @@ LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
         return *this;
     }
     if (extensions_ == nullptr) {
-        extensions_ = new Locale();
+        extensions_ = Locale::getRoot().clone();
         if (extensions_ == nullptr) {
             status_ = U_MEMORY_ALLOCATION_ERROR;
             return *this;
@@ -259,11 +259,11 @@ LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
       return *this;
     }
     if (extensions_ == nullptr) {
-        extensions_ = new Locale();
-    }
-    if (extensions_ == nullptr) {
-        status_ = U_MEMORY_ALLOCATION_ERROR;
-        return *this;
+        extensions_ = Locale::getRoot().clone();
+        if (extensions_ == nullptr) {
+            status_ = U_MEMORY_ALLOCATION_ERROR;
+            return *this;
+        }
     }
     extensions_->setUnicodeKeywordValue(key, type, status_);
     return *this;
@@ -280,7 +280,7 @@ LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
         return *this;
     }
     if (extensions_ == nullptr) {
-        extensions_ = new Locale();
+        extensions_ = Locale::getRoot().clone();
         if (extensions_ == nullptr) {
             status_ = U_MEMORY_ALLOCATION_ERROR;
             return *this;
@@ -415,7 +415,7 @@ void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
         return;
     }
     if (extensions_ == nullptr) {
-        extensions_ = new Locale();
+        extensions_ = Locale::getRoot().clone();
         if (extensions_ == nullptr) {
             status_ = U_MEMORY_ALLOCATION_ERROR;
             return;

+ 3 - 3
thirdparty/icu4c/common/localematcher.cpp

@@ -187,7 +187,7 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin
     for (int32_t i = 0; i < length; ++i) {
         Locale *locale = list.orphanLocaleAt(i);
         if (locale == nullptr) { continue; }
-        supportedLocales_->addElement(locale, errorCode_);
+        supportedLocales_->addElementX(locale, errorCode_);
         if (U_FAILURE(errorCode_)) {
             delete locale;
             break;
@@ -207,7 +207,7 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iter
             errorCode_ = U_MEMORY_ALLOCATION_ERROR;
             break;
         }
-        supportedLocales_->addElement(clone, errorCode_);
+        supportedLocales_->addElementX(clone, errorCode_);
         if (U_FAILURE(errorCode_)) {
             delete clone;
             break;
@@ -223,7 +223,7 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale
         errorCode_ = U_MEMORY_ALLOCATION_ERROR;
         return *this;
     }
-    supportedLocales_->addElement(clone, errorCode_);
+    supportedLocales_->addElementX(clone, errorCode_);
     if (U_FAILURE(errorCode_)) {
         delete clone;
     }

+ 14 - 2
thirdparty/icu4c/common/locdispnames.cpp

@@ -316,17 +316,29 @@ _getStringOrCopyKey(const char *path, const char *locale,
             /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
         }
     } else {
+        bool isLanguageCode = (uprv_strncmp(tableKey, _kLanguages, 9) == 0);
         /* Language code should not be a number. If it is, set the error code. */
-        if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) {
+        if (isLanguageCode && uprv_strtol(itemKey, NULL, 10)) {
             *pErrorCode = U_MISSING_RESOURCE_ERROR;
         } else {
             /* second-level item, use special fallback */
             s=uloc_getTableStringWithFallback(path, locale,
-                                               tableKey, 
+                                               tableKey,
                                                subTableKey,
                                                itemKey,
                                                &length,
                                                pErrorCode);
+            if (U_FAILURE(*pErrorCode) && isLanguageCode && itemKey != nullptr) {
+                // convert itemKey locale code to canonical form and try again, ICU-20870
+                *pErrorCode = U_ZERO_ERROR;
+                Locale canonKey = Locale::createCanonical(itemKey);
+                s=uloc_getTableStringWithFallback(path, locale,
+                                                    tableKey,
+                                                    subTableKey,
+                                                    canonKey.getName(),
+                                                    &length,
+                                                    pErrorCode);
+            }
         }
     }
 

+ 47 - 21
thirdparty/icu4c/common/locdspnm.cpp

@@ -313,29 +313,29 @@ public:
     LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length);
     virtual ~LocaleDisplayNamesImpl();
 
-    virtual const Locale& getLocale() const;
-    virtual UDialectHandling getDialectHandling() const;
-    virtual UDisplayContext getContext(UDisplayContextType type) const;
+    virtual const Locale& getLocale() const override;
+    virtual UDialectHandling getDialectHandling() const override;
+    virtual UDisplayContext getContext(UDisplayContextType type) const override;
 
     virtual UnicodeString& localeDisplayName(const Locale& locale,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& localeDisplayName(const char* localeId,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& languageDisplayName(const char* lang,
-                                               UnicodeString& result) const;
+                                               UnicodeString& result) const override;
     virtual UnicodeString& scriptDisplayName(const char* script,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& regionDisplayName(const char* region,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& variantDisplayName(const char* variant,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& keyDisplayName(const char* key,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
     virtual UnicodeString& keyValueDisplayName(const char* key,
                                                 const char* value,
-                                                UnicodeString& result) const;
+                                                UnicodeString& result) const override;
 private:
     UnicodeString& localeIdName(const char* localeId,
                                 UnicodeString& result, bool substitute) const;
@@ -407,7 +407,7 @@ struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
     virtual ~CapitalizationContextSink();
 
     virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/,
-            UErrorCode &errorCode) {
+            UErrorCode &errorCode) override {
         ResourceTable contexts = value.getTable(errorCode);
         if (U_FAILURE(errorCode)) { return; }
         for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) {
@@ -723,11 +723,25 @@ LocaleDisplayNamesImpl::localeIdName(const char* localeId,
             return result;
         }
     }
-    if (substitute) {
-        return langData.get("Languages", localeId, result);
-    } else {
-        return langData.getNoFallback("Languages", localeId, result);
+    langData.getNoFallback("Languages", localeId, result);
+    if (result.isBogus() && uprv_strchr(localeId, '_') == NULL) {
+        // Canonicalize lang and try again, ICU-20870
+        // (only for language codes without script or region)
+        Locale canonLocale = Locale::createCanonical(localeId);
+        const char* canonLocId = canonLocale.getName();
+        if (nameLength == UDISPCTX_LENGTH_SHORT) {
+            langData.getNoFallback("Languages%short", canonLocId, result);
+            if (!result.isBogus()) {
+                return result;
+            }
+        }
+        langData.getNoFallback("Languages", canonLocId, result);
     }
+    if (result.isBogus() && substitute) {
+        // use key, this is what langData.get (with fallback) falls back to.
+        result.setTo(UnicodeString(localeId, -1, US_INV)); // use key (
+    }
+    return result;
 }
 
 UnicodeString&
@@ -742,10 +756,22 @@ LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
             return adjustForUsageAndContext(kCapContextUsageLanguage, result);
         }
     }
-    if (substitute == UDISPCTX_SUBSTITUTE) {
-        langData.get("Languages", lang, result);
-    } else {
-        langData.getNoFallback("Languages", lang, result);
+    langData.getNoFallback("Languages", lang, result);
+    if (result.isBogus()) {
+        // Canonicalize lang and try again, ICU-20870
+        Locale canonLocale = Locale::createCanonical(lang);
+        const char* canonLocId = canonLocale.getName();
+        if (nameLength == UDISPCTX_LENGTH_SHORT) {
+            langData.getNoFallback("Languages%short", canonLocId, result);
+            if (!result.isBogus()) {
+                return adjustForUsageAndContext(kCapContextUsageLanguage, result);
+            }
+        }
+        langData.getNoFallback("Languages", canonLocId, result);
+    }
+    if (result.isBogus() && substitute == UDISPCTX_SUBSTITUTE) {
+        // use key, this is what langData.get (with fallback) falls back to.
+        result.setTo(UnicodeString(lang, -1, US_INV)); // use key (
     }
     return adjustForUsageAndContext(kCapContextUsageLanguage, result);
 }

+ 41 - 65
thirdparty/icu4c/common/locid.cpp

@@ -297,13 +297,12 @@ Locale::Locale( const   char * newLanguage,
     else
     {
         UErrorCode status = U_ZERO_ERROR;
-        int32_t size = 0;
         int32_t lsize = 0;
         int32_t csize = 0;
         int32_t vsize = 0;
         int32_t ksize = 0;
 
-        // Calculate the size of the resulting string.
+        // Check the sizes of the input strings.
 
         // Language
         if ( newLanguage != NULL )
@@ -313,7 +312,6 @@ Locale::Locale( const   char * newLanguage,
                 setToBogus();
                 return;
             }
-            size = lsize;
         }
 
         CharString togo(newLanguage, lsize, status); // start with newLanguage
@@ -326,7 +324,6 @@ Locale::Locale( const   char * newLanguage,
                 setToBogus();
                 return;
             }
-            size += csize;
         }
 
         // _Variant
@@ -350,21 +347,6 @@ Locale::Locale( const   char * newLanguage,
             }
         }
 
-        if( vsize > 0 )
-        {
-            size += vsize;
-        }
-
-        // Separator rules:
-        if ( vsize > 0 )
-        {
-            size += 2;  // at least: __v
-        }
-        else if ( csize > 0 )
-        {
-            size += 1;  // at least: _v
-        }
-
         if ( newKeywords != NULL)
         {
             ksize = (int32_t)uprv_strlen(newKeywords);
@@ -372,11 +354,9 @@ Locale::Locale( const   char * newLanguage,
               setToBogus();
               return;
             }
-            size += ksize + 1;
         }
 
-        //  NOW we have the full locale string..
-        // Now, copy it back.
+        // We've checked the input sizes, now build up the full locale string..
 
         // newLanguage is already copied
 
@@ -469,14 +449,18 @@ Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
     if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
     if (fullName != fullNameBuffer) uprv_free(fullName);
 
-    if (other.fullName == other.fullNameBuffer) {
+    if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
         uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
+    }
+    if (other.fullName == other.fullNameBuffer) {
         fullName = fullNameBuffer;
     } else {
         fullName = other.fullName;
     }
 
-    if (other.baseName == other.fullName) {
+    if (other.baseName == other.fullNameBuffer) {
+        baseName = fullNameBuffer;
+    } else if (other.baseName == other.fullName) {
         baseName = fullName;
     } else {
         baseName = other.baseName;
@@ -499,7 +483,7 @@ Locale::clone() const {
     return new Locale(*this);
 }
 
-UBool
+bool
 Locale::operator==( const   Locale& other) const
 {
     return (uprv_strcmp(other.fullName, fullName) == 0);
@@ -768,7 +752,7 @@ AliasDataBuilder::readLanguageAlias(
         alias, strings, types, replacementIndexes, length,
 #if U_DEBUG
         [](const char* type) {
-            // Assert the aliasFrom only contains the following possibilties
+            // Assert the aliasFrom only contains the following possibilities
             // language_REGION_variant
             // language_REGION
             // language_variant
@@ -1227,7 +1211,7 @@ AliasReplacer::parseLanguageReplacement(
         status = U_MEMORY_ALLOCATION_ERROR;
         return;
     }
-    toBeFreed.addElement(str, status);
+    toBeFreed.addElementX(str, status);
     char* data = str->data();
     replacedLanguage = (const char*) data;
     char* endOfField = uprv_strchr(data, '_');
@@ -1367,7 +1351,7 @@ AliasReplacer::replaceLanguage(
         }
         if (replacedExtensions != nullptr) {
             // DO NOTHING
-            // UTS35 does not specifiy what should we do if we have extensions in the
+            // UTS35 does not specify what should we do if we have extensions in the
             // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
             // extensions in them languageAlias:
             //  i_default => en_x_i_default
@@ -1441,7 +1425,7 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
             return false;
         }
         replacedRegion = item->data();
-        toBeFreed.addElement(item.orphan(), status);
+        toBeFreed.addElementX(item.orphan(), status);
     }
     U_ASSERT(!same(region, replacedRegion));
     region = replacedRegion;
@@ -1567,6 +1551,7 @@ AliasReplacer::replaceTransformedExtensions(
             const char* tvalue = uprv_strchr(tkey, '-');
             if (tvalue == nullptr) {
                 status = U_ILLEGAL_ARGUMENT_ERROR;
+                return false;
             }
             const char* nextTKey = ultag_getTKeyStart(tvalue);
             if (nextTKey != nullptr) {
@@ -1578,13 +1563,8 @@ AliasReplacer::replaceTransformedExtensions(
             }
             tkey = nextTKey;
         } while (tkey != nullptr);
-        tfields.sort([](UElement e1, UElement e2) -> int8_t {
-            // uprv_strcmp return int and in some platform, such as arm64-v8a,
-            // it may return positive values > 127 which cause the casted value
-            // of int8_t negative.
-            int res = uprv_strcmp(
-                (const char*)e1.pointer, (const char*)e2.pointer);
-            return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
+        tfields.sort([](UElement e1, UElement e2) -> int32_t {
+            return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
         }, status);
         for (int32_t i = 0; i < tfields.size(); i++) {
              if (output.length() > 0) {
@@ -1592,8 +1572,11 @@ AliasReplacer::replaceTransformedExtensions(
              }
              const char* tfield = (const char*) tfields.elementAt(i);
              const char* tvalue = uprv_strchr(tfield, '-');
+             if (tvalue == nullptr) {
+                 status = U_ILLEGAL_ARGUMENT_ERROR;
+                 return false;
+             }
              // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
-             U_ASSERT(tvalue != nullptr);
              *((char*)tvalue++) = '\0'; // NULL terminate tkey
              output.append(tfield, status).append('-', status);
              const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
@@ -1623,13 +1606,8 @@ AliasReplacer::outputToString(
         if (!notEmpty(script) && !notEmpty(region)) {
           out.append(SEP_CHAR, status);
         }
-        variants.sort([](UElement e1, UElement e2) -> int8_t {
-            // uprv_strcmp return int and in some platform, such as arm64-v8a,
-            // it may return positive values > 127 which cause the casted value
-            // of int8_t negative.
-            int res = uprv_strcmp(
-                (const char*)e1.pointer, (const char*)e2.pointer);
-            return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
+        variants.sort([](UElement e1, UElement e2) -> int32_t {
+            return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
         }, status);
         int32_t variantsStart = out.length();
         for (int32_t i = 0; i < variants.size(); i++) {
@@ -1681,21 +1659,16 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
         while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
                U_SUCCESS(status)) {
             *end = NULL_CHAR;  // null terminate inside variantsBuff
-            variants.addElement(start, status);
+            variants.addElementX(start, status);
             start = end + 1;
         }
-        variants.addElement(start, status);
+        variants.addElementX(start, status);
     }
     if (U_FAILURE(status)) { return false; }
 
     // Sort the variants
-    variants.sort([](UElement e1, UElement e2) -> int8_t {
-        // uprv_strcmp return int and in some platform, such as arm64-v8a,
-        // it may return positive values > 127 which cause the casted value
-        // of int8_t negative.
-        int res = uprv_strcmp(
-            (const char*)e1.pointer, (const char*)e2.pointer);
-        return (res == 0) ? 0 : ((res > 0) ? 1 : -1);
+    variants.sort([](UElement e1, UElement e2) -> int32_t {
+        return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
     }, status);
 
     // A changed count to assert when loop too many times.
@@ -1737,7 +1710,7 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
     }  // while(1)
 
     if (U_FAILURE(status)) { return false; }
-    // Nothing changed and we know the order of the vaiants are not change
+    // Nothing changed and we know the order of the variants are not change
     // because we have no variant or only one.
     const char* extensionsStr = locale_getKeywordsStart(locale.getName());
     if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
@@ -2445,7 +2418,7 @@ private:
 
 public:
     static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
-    virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
+    virtual UClassID getDynamicClassID(void) const override { return getStaticClassID(); }
 public:
     KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
         : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
@@ -2469,13 +2442,13 @@ public:
 
     virtual ~KeywordEnumeration();
 
-    virtual StringEnumeration * clone() const
+    virtual StringEnumeration * clone() const override
     {
         UErrorCode status = U_ZERO_ERROR;
         return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
     }
 
-    virtual int32_t count(UErrorCode &/*status*/) const {
+    virtual int32_t count(UErrorCode &/*status*/) const override {
         char *kw = keywords;
         int32_t result = 0;
         while(*kw) {
@@ -2485,7 +2458,7 @@ public:
         return result;
     }
 
-    virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+    virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
         const char* result;
         int32_t len;
         if(U_SUCCESS(status) && *current != 0) {
@@ -2504,13 +2477,13 @@ public:
         return result;
     }
 
-    virtual const UnicodeString* snext(UErrorCode& status) {
+    virtual const UnicodeString* snext(UErrorCode& status) override {
         int32_t resultLength = 0;
         const char *s = next(&resultLength, status);
         return setChars(s, resultLength, status);
     }
 
-    virtual void reset(UErrorCode& /*status*/) {
+    virtual void reset(UErrorCode& /*status*/) override {
         current = keywords;
     }
 };
@@ -2528,18 +2501,18 @@ public:
     using KeywordEnumeration::KeywordEnumeration;
     virtual ~UnicodeKeywordEnumeration();
 
-    virtual const char* next(int32_t* resultLength, UErrorCode& status) {
+    virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
         const char* legacy_key = KeywordEnumeration::next(nullptr, status);
-        if (U_SUCCESS(status) && legacy_key != nullptr) {
+        while (U_SUCCESS(status) && legacy_key != nullptr) {
             const char* key = uloc_toUnicodeLocaleKey(legacy_key);
-            if (key == nullptr) {
-                status = U_ILLEGAL_ARGUMENT_ERROR;
-            } else {
+            if (key != nullptr) {
                 if (resultLength != nullptr) {
                     *resultLength = static_cast<int32_t>(uprv_strlen(key));
                 }
                 return key;
             }
+            // Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
+            legacy_key = KeywordEnumeration::next(nullptr, status);
         }
         if (resultLength != nullptr) *resultLength = 0;
         return nullptr;
@@ -2696,6 +2669,9 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro
         if (fullName != fullNameBuffer) {
             // if full Name is already on the heap, need to free it.
             uprv_free(fullName);
+            if (baseName == fullName) {
+                baseName = newFullName; // baseName should not point to freed memory.
+            }
         }
         fullName = newFullName;
         status = U_ZERO_ERROR;

+ 23 - 18
thirdparty/icu4c/common/loclikely.cpp

@@ -115,7 +115,7 @@ findLikelySubtags(const char* localeID,
  * @param tag The tag to add.
  * @param tagLength The length of the tag.
  * @param buffer The output buffer.
- * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
+ * @param bufferLength The length of the output buffer.  This is an input/output parameter.
  **/
 static void U_CALLCONV
 appendTag(
@@ -1181,13 +1181,13 @@ error:
     }
 }
 
-static UBool
+static int32_t
 do_canonicalize(const char*    localeID,
          char* buffer,
          int32_t bufferCapacity,
          UErrorCode* err)
 {
-    uloc_canonicalize(
+    int32_t canonicalizedSize = uloc_canonicalize(
         localeID,
         buffer,
         bufferCapacity,
@@ -1195,16 +1195,14 @@ do_canonicalize(const char*    localeID,
 
     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
         *err == U_BUFFER_OVERFLOW_ERROR) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
-
-        return FALSE;
+        return canonicalizedSize;
     }
     else if (U_FAILURE(*err)) {
 
-        return FALSE;
+        return -1;
     }
     else {
-        return TRUE;
+        return canonicalizedSize;
     }
 }
 
@@ -1241,12 +1239,17 @@ static UBool
 _ulocimp_addLikelySubtags(const char* localeID,
                           icu::ByteSink& sink,
                           UErrorCode* status) {
-    char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
-    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
-        return _uloc_addLikelySubtags(localeBuffer, sink, status);
+    PreflightingLocaleIDBuffer localeBuffer;
+    do {
+        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
+            localeBuffer.getCapacity(), status);
+    } while (localeBuffer.needToTryAgain(status));
+    
+    if (U_SUCCESS(*status)) {
+        return _uloc_addLikelySubtags(localeBuffer.getBuffer(), sink, status);
+    } else {
+        return FALSE;
     }
-    return FALSE;
 }
 
 U_CAPI void U_EXPORT2
@@ -1289,11 +1292,13 @@ U_CAPI void U_EXPORT2
 ulocimp_minimizeSubtags(const char* localeID,
                         icu::ByteSink& sink,
                         UErrorCode* status) {
-    char localeBuffer[ULOC_FULLNAME_CAPACITY];
-
-    if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
-        _uloc_minimizeSubtags(localeBuffer, sink, status);
-    }
+    PreflightingLocaleIDBuffer localeBuffer;
+    do {
+        localeBuffer.requestedCapacity = do_canonicalize(localeID, localeBuffer.getBuffer(),
+            localeBuffer.getCapacity(), status);
+    } while (localeBuffer.needToTryAgain(status));
+    
+    _uloc_minimizeSubtags(localeBuffer.getBuffer(), sink, status);
 }
 
 // Pairs of (language subtag, + or -) for finding out fast if common languages

+ 1 - 1
thirdparty/icu4c/common/lsr.cpp

@@ -72,7 +72,7 @@ UBool LSR::isEquivalentTo(const LSR &other) const {
         (regionIndex > 0 || uprv_strcmp(region, other.region) == 0);
 }
 
-UBool LSR::operator==(const LSR &other) const {
+bool LSR::operator==(const LSR &other) const {
     return
         uprv_strcmp(language, other.language) == 0 &&
         uprv_strcmp(script, other.script) == 0 &&

+ 2 - 2
thirdparty/icu4c/common/lsr.h

@@ -65,9 +65,9 @@ struct LSR final : public UMemory {
     static int32_t indexForRegion(const char *region);
 
     UBool isEquivalentTo(const LSR &other) const;
-    UBool operator==(const LSR &other) const;
+    bool operator==(const LSR &other) const;
 
-    inline UBool operator!=(const LSR &other) const {
+    inline bool operator!=(const LSR &other) const {
         return !operator==(other);
     }
 

+ 855 - 0
thirdparty/icu4c/common/lstmbe.cpp

@@ -0,0 +1,855 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <utility>
+#include <ctgmath>
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "brkeng.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "lstmbe.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "ubrkimpl.h"
+#include "uresimp.h"
+#include "uvectr32.h"
+#include "uvector.h"
+
+#include "unicode/brkiter.h"
+#include "unicode/resbund.h"
+#include "unicode/ubrk.h"
+#include "unicode/uniset.h"
+#include "unicode/ustring.h"
+#include "unicode/utf.h"
+
+U_NAMESPACE_BEGIN
+
+// Uncomment the following #define to debug.
+// #define LSTM_DEBUG 1
+// #define LSTM_VECTORIZER_DEBUG 1
+
+/**
+ * Interface for reading 1D array.
+ */
+class ReadArray1D {
+public:
+    virtual ~ReadArray1D();
+    virtual int32_t d1() const = 0;
+    virtual float get(int32_t i) const = 0;
+
+#ifdef LSTM_DEBUG
+    void print() const {
+        printf("\n[");
+        for (int32_t i = 0; i < d1(); i++) {
+           printf("%0.8e ", get(i));
+           if (i % 4 == 3) printf("\n");
+        }
+        printf("]\n");
+    }
+#endif
+};
+
+ReadArray1D::~ReadArray1D()
+{
+}
+
+/**
+ * Interface for reading 2D array.
+ */
+class ReadArray2D {
+public:
+    virtual ~ReadArray2D();
+    virtual int32_t d1() const = 0;
+    virtual int32_t d2() const = 0;
+    virtual float get(int32_t i, int32_t j) const = 0;
+};
+
+ReadArray2D::~ReadArray2D()
+{
+}
+
+/**
+ * A class to index a float array as a 1D Array without owning the pointer or
+ * copy the data.
+ */
+class ConstArray1D : public ReadArray1D {
+public:
+    ConstArray1D() : data_(nullptr), d1_(0) {}
+
+    ConstArray1D(const float* data, int32_t d1) : data_(data), d1_(d1) {}
+
+    virtual ~ConstArray1D();
+
+    // Init the object, the object does not own the data nor copy.
+    // It is designed to directly use data from memory mapped resources.
+    void init(const int32_t* data, int32_t d1) {
+        U_ASSERT(IEEE_754 == 1);
+        data_ = reinterpret_cast<const float*>(data);
+        d1_ = d1;
+    }
+
+    // ReadArray1D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual float get(int32_t i) const override {
+        U_ASSERT(i < d1_);
+        return data_[i];
+    }
+
+private:
+    const float* data_;
+    int32_t d1_;
+};
+
+ConstArray1D::~ConstArray1D()
+{
+}
+
+/**
+ * A class to index a float array as a 2D Array without owning the pointer or
+ * copy the data.
+ */
+class ConstArray2D : public ReadArray2D {
+public:
+    ConstArray2D() : data_(nullptr), d1_(0), d2_(0) {}
+
+    ConstArray2D(const float* data, int32_t d1, int32_t d2)
+        : data_(data), d1_(d1), d2_(d2) {}
+
+    virtual ~ConstArray2D();
+
+    // Init the object, the object does not own the data nor copy.
+    // It is designed to directly use data from memory mapped resources.
+    void init(const int32_t* data, int32_t d1, int32_t d2) {
+        U_ASSERT(IEEE_754 == 1);
+        data_ = reinterpret_cast<const float*>(data);
+        d1_ = d1;
+        d2_ = d2;
+    }
+
+    // ReadArray2D methods.
+    inline int32_t d1() const override { return d1_; }
+    inline int32_t d2() const override { return d2_; }
+    float get(int32_t i, int32_t j) const override {
+        U_ASSERT(i < d1_);
+        U_ASSERT(j < d2_);
+        return data_[i * d2_ + j];
+    }
+
+    // Expose the ith row as a ConstArray1D
+    inline ConstArray1D row(int32_t i) const {
+        U_ASSERT(i < d1_);
+        return ConstArray1D(data_ + i * d2_, d2_);
+    }
+
+private:
+    const float* data_;
+    int32_t d1_;
+    int32_t d2_;
+};
+
+ConstArray2D::~ConstArray2D()
+{
+}
+
+/**
+ * A class to allocate data as a writable 1D array.
+ * This is the main class implement matrix operation.
+ */
+class Array1D : public ReadArray1D {
+public:
+    Array1D() : memory_(nullptr), data_(nullptr), d1_(0) {}
+    Array1D(int32_t d1, UErrorCode &status)
+        : memory_(uprv_malloc(d1 * sizeof(float))),
+          data_((float*)memory_), d1_(d1) {
+        if (U_SUCCESS(status)) {
+            if (memory_ == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            clear();
+        }
+    }
+
+    virtual ~Array1D();
+
+    // A special constructor which does not own the memory but writeable
+    // as a slice of an array.
+    Array1D(float* data, int32_t d1)
+        : memory_(nullptr), data_(data), d1_(d1) {}
+
+    // ReadArray1D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual float get(int32_t i) const override {
+        U_ASSERT(i < d1_);
+        return data_[i];
+    }
+
+    // Return the index which point to the max data in the array.
+    inline int32_t maxIndex() const {
+        int32_t index = 0;
+        float max = data_[0];
+        for (int32_t i = 1; i < d1_; i++) {
+            if (data_[i] > max) {
+                max = data_[i];
+                index = i;
+            }
+        }
+        return index;
+    }
+
+    // Slice part of the array to a new one.
+    inline Array1D slice(int32_t from, int32_t size) const {
+        U_ASSERT(from >= 0);
+        U_ASSERT(from < d1_);
+        U_ASSERT(from + size <= d1_);
+        return Array1D(data_ + from, size);
+    }
+
+    // Add dot product of a 1D array and a 2D array into this one.
+    inline Array1D& addDotProduct(const ReadArray1D& a, const ReadArray2D& b) {
+        U_ASSERT(a.d1() == b.d1());
+        U_ASSERT(b.d2() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            for (int32_t j = 0; j < a.d1(); j++) {
+                data_[i] += a.get(j) * b.get(j, i);
+            }
+        }
+        return *this;
+    }
+
+    // Hadamard Product the values of another array of the same size into this one.
+    inline Array1D& hadamardProduct(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] *= a.get(i);
+        }
+        return *this;
+    }
+
+    // Add the Hadamard Product of two arrays of the same size into this one.
+    inline Array1D& addHadamardProduct(const ReadArray1D& a, const ReadArray1D& b) {
+        U_ASSERT(a.d1() == d1());
+        U_ASSERT(b.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] += a.get(i) * b.get(i);
+        }
+        return *this;
+    }
+
+    // Add the values of another array of the same size into this one.
+    inline Array1D& add(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] += a.get(i);
+        }
+        return *this;
+    }
+
+    // Assign the values of another array of the same size into this one.
+    inline Array1D& assign(const ReadArray1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1(); i++) {
+            data_[i] = a.get(i);
+        }
+        return *this;
+    }
+
+    // Apply tanh to all the elements in the array.
+    inline Array1D& tanh() {
+        return tanh(*this);
+    }
+
+    // Apply tanh of a and store into this array.
+    inline Array1D& tanh(const Array1D& a) {
+        U_ASSERT(a.d1() == d1());
+        for (int32_t i = 0; i < d1_; i++) {
+            data_[i] = std::tanh(a.get(i));
+        }
+        return *this;
+    }
+
+    // Apply sigmoid to all the elements in the array.
+    inline Array1D& sigmoid() {
+        for (int32_t i = 0; i < d1_; i++) {
+            data_[i] = 1.0f/(1.0f + expf(-data_[i]));
+        }
+        return *this;
+    }
+
+    inline Array1D& clear() {
+        uprv_memset(data_, 0, d1_ * sizeof(float));
+        return *this;
+    }
+
+private:
+    void* memory_;
+    float* data_;
+    int32_t d1_;
+};
+
+Array1D::~Array1D()
+{
+    uprv_free(memory_);
+}
+
+class Array2D : public ReadArray2D {
+public:
+    Array2D() : memory_(nullptr), data_(nullptr), d1_(0), d2_(0) {}
+    Array2D(int32_t d1, int32_t d2, UErrorCode &status)
+        : memory_(uprv_malloc(d1 * d2 * sizeof(float))),
+          data_((float*)memory_), d1_(d1), d2_(d2) {
+        if (U_SUCCESS(status)) {
+            if (memory_ == nullptr) {
+                status = U_MEMORY_ALLOCATION_ERROR;
+                return;
+            }
+            clear();
+        }
+    }
+    virtual ~Array2D();
+
+    // ReadArray2D methods.
+    virtual int32_t d1() const override { return d1_; }
+    virtual int32_t d2() const override { return d2_; }
+    virtual float get(int32_t i, int32_t j) const override {
+        U_ASSERT(i < d1_);
+        U_ASSERT(j < d2_);
+        return data_[i * d2_ + j];
+    }
+
+    inline Array1D row(int32_t i) const {
+        U_ASSERT(i < d1_);
+        return Array1D(data_ + i * d2_, d2_);
+    }
+
+    inline Array2D& clear() {
+        uprv_memset(data_, 0, d1_ * d2_ * sizeof(float));
+        return *this;
+    }
+
+private:
+    void* memory_;
+    float* data_;
+    int32_t d1_;
+    int32_t d2_;
+};
+
+Array2D::~Array2D()
+{
+    uprv_free(memory_);
+}
+
+typedef enum {
+    BEGIN,
+    INSIDE,
+    END,
+    SINGLE
+} LSTMClass;
+
+typedef enum {
+    UNKNOWN,
+    CODE_POINTS,
+    GRAPHEME_CLUSTER,
+} EmbeddingType;
+
+struct LSTMData : public UMemory {
+    LSTMData(UResourceBundle* rb, UErrorCode &status);
+    ~LSTMData();
+    UHashtable* fDict;
+    EmbeddingType fType;
+    const UChar* fName;
+    ConstArray2D fEmbedding;
+    ConstArray2D fForwardW;
+    ConstArray2D fForwardU;
+    ConstArray1D fForwardB;
+    ConstArray2D fBackwardW;
+    ConstArray2D fBackwardU;
+    ConstArray1D fBackwardB;
+    ConstArray2D fOutputW;
+    ConstArray1D fOutputB;
+
+private:
+    UResourceBundle* fBundle;
+};
+
+LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status)
+    : fDict(nullptr), fType(UNKNOWN), fName(nullptr),
+      fBundle(rb)
+{
+    if (U_FAILURE(status)) {
+        return;
+    }
+    if (IEEE_754 != 1) {
+        status = U_UNSUPPORTED_ERROR;
+        return;
+    }
+    LocalUResourceBundlePointer embeddings_res(
+        ures_getByKey(rb, "embeddings", nullptr, &status));
+    int32_t embedding_size = ures_getInt(embeddings_res.getAlias(), &status);
+    LocalUResourceBundlePointer hunits_res(
+        ures_getByKey(rb, "hunits", nullptr, &status));
+    if (U_FAILURE(status)) return;
+    int32_t hunits = ures_getInt(hunits_res.getAlias(), &status);
+    const UChar* type = ures_getStringByKey(rb, "type", nullptr, &status);
+    if (U_FAILURE(status)) return;
+    if (u_strCompare(type, -1, u"codepoints", -1, false) == 0) {
+        fType = CODE_POINTS;
+    } else if (u_strCompare(type, -1, u"graphclust", -1, false) == 0) {
+        fType = GRAPHEME_CLUSTER;
+    }
+    fName = ures_getStringByKey(rb, "model", nullptr, &status);
+    LocalUResourceBundlePointer dataRes(ures_getByKey(rb, "data", nullptr, &status));
+    if (U_FAILURE(status)) return;
+    int32_t data_len = 0;
+    const int32_t* data = ures_getIntVector(dataRes.getAlias(), &data_len, &status);
+    fDict = uhash_open(uhash_hashUChars, uhash_compareUChars, nullptr, &status);
+
+    StackUResourceBundle stackTempBundle;
+    ResourceDataValue value;
+    ures_getValueWithFallback(rb, "dict", stackTempBundle.getAlias(), value, status);
+    ResourceArray stringArray = value.getArray(status);
+    int32_t num_index = stringArray.getSize();
+    if (U_FAILURE(status)) { return; }
+
+    // put dict into hash
+    int32_t stringLength;
+    for (int32_t idx = 0; idx < num_index; idx++) {
+        stringArray.getValue(idx, value);
+        const UChar* str = value.getString(stringLength, status);
+        uhash_putiAllowZero(fDict, (void*)str, idx, &status);
+        if (U_FAILURE(status)) return;
+#ifdef LSTM_VECTORIZER_DEBUG
+        printf("Assign [");
+        while (*str != 0x0000) {
+            printf("U+%04x ", *str);
+            str++;
+        }
+        printf("] map to %d\n", idx-1);
+#endif
+    }
+    int32_t mat1_size = (num_index + 1) * embedding_size;
+    int32_t mat2_size = embedding_size * 4 * hunits;
+    int32_t mat3_size = hunits * 4 * hunits;
+    int32_t mat4_size = 4 * hunits;
+    int32_t mat5_size = mat2_size;
+    int32_t mat6_size = mat3_size;
+    int32_t mat7_size = mat4_size;
+    int32_t mat8_size = 2 * hunits * 4;
+#if U_DEBUG
+    int32_t mat9_size = 4;
+    U_ASSERT(data_len == mat1_size + mat2_size + mat3_size + mat4_size + mat5_size +
+        mat6_size + mat7_size + mat8_size + mat9_size);
+#endif
+
+    fEmbedding.init(data, (num_index + 1), embedding_size);
+    data += mat1_size;
+    fForwardW.init(data, embedding_size, 4 * hunits);
+    data += mat2_size;
+    fForwardU.init(data, hunits, 4 * hunits);
+    data += mat3_size;
+    fForwardB.init(data, 4 * hunits);
+    data += mat4_size;
+    fBackwardW.init(data, embedding_size, 4 * hunits);
+    data += mat5_size;
+    fBackwardU.init(data, hunits, 4 * hunits);
+    data += mat6_size;
+    fBackwardB.init(data, 4 * hunits);
+    data += mat7_size;
+    fOutputW.init(data, 2 * hunits, 4);
+    data += mat8_size;
+    fOutputB.init(data, 4);
+}
+
+LSTMData::~LSTMData() {
+    uhash_close(fDict);
+    ures_close(fBundle);
+}
+
+class Vectorizer : public UMemory {
+public:
+    Vectorizer(UHashtable* dict) : fDict(dict) {}
+    virtual ~Vectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const = 0;
+protected:
+    int32_t stringToIndex(const UChar* str) const {
+        UBool found = false;
+        int32_t ret = uhash_getiAndFound(fDict, (const void*)str, &found);
+        if (!found) {
+            ret = fDict->count;
+        }
+#ifdef LSTM_VECTORIZER_DEBUG
+        printf("[");
+        while (*str != 0x0000) {
+            printf("U+%04x ", *str);
+            str++;
+        }
+        printf("] map to %d\n", ret);
+#endif
+        return ret;
+    }
+
+private:
+    UHashtable* fDict;
+};
+
+Vectorizer::~Vectorizer()
+{
+}
+
+class CodePointsVectorizer : public Vectorizer {
+public:
+    CodePointsVectorizer(UHashtable* dict) : Vectorizer(dict) {}
+    virtual ~CodePointsVectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const override;
+};
+
+CodePointsVectorizer::~CodePointsVectorizer()
+{
+}
+
+void CodePointsVectorizer::vectorize(
+    UText *text, int32_t startPos, int32_t endPos,
+    UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
+{
+    if (offsets.ensureCapacity(endPos - startPos, status) &&
+            indices.ensureCapacity(endPos - startPos, status)) {
+        if (U_FAILURE(status)) return;
+        utext_setNativeIndex(text, startPos);
+        int32_t current;
+        UChar str[2] = {0, 0};
+        while (U_SUCCESS(status) &&
+               (current = (int32_t)utext_getNativeIndex(text)) < endPos) {
+            // Since the LSTMBreakEngine is currently only accept chars in BMP,
+            // we can ignore the possibility of hitting supplementary code
+            // point.
+            str[0] = (UChar) utext_next32(text);
+            U_ASSERT(!U_IS_SURROGATE(str[0]));
+            offsets.addElement(current, status);
+            indices.addElement(stringToIndex(str), status);
+        }
+    }
+}
+
+class GraphemeClusterVectorizer : public Vectorizer {
+public:
+    GraphemeClusterVectorizer(UHashtable* dict)
+        : Vectorizer(dict)
+    {
+    }
+    virtual ~GraphemeClusterVectorizer();
+    virtual void vectorize(UText *text, int32_t startPos, int32_t endPos,
+                           UVector32 &offsets, UVector32 &indices,
+                           UErrorCode &status) const override;
+};
+
+GraphemeClusterVectorizer::~GraphemeClusterVectorizer()
+{
+}
+
+constexpr int32_t MAX_GRAPHEME_CLSTER_LENGTH = 10;
+
+void GraphemeClusterVectorizer::vectorize(
+    UText *text, int32_t startPos, int32_t endPos,
+    UVector32 &offsets, UVector32 &indices, UErrorCode &status) const
+{
+    if (U_FAILURE(status)) return;
+    if (!offsets.ensureCapacity(endPos - startPos, status) ||
+            !indices.ensureCapacity(endPos - startPos, status)) {
+        return;
+    }
+    if (U_FAILURE(status)) return;
+    LocalPointer<BreakIterator> graphemeIter(BreakIterator::createCharacterInstance(Locale(), status));
+    if (U_FAILURE(status)) return;
+    graphemeIter->setText(text, status);
+    if (U_FAILURE(status)) return;
+
+    if (startPos != 0) {
+        graphemeIter->preceding(startPos);
+    }
+    int32_t last = startPos;
+    int32_t current = startPos;
+    UChar str[MAX_GRAPHEME_CLSTER_LENGTH];
+    while ((current = graphemeIter->next()) != BreakIterator::DONE) {
+        if (current >= endPos) {
+            break;
+        }
+        if (current > startPos) {
+            utext_extract(text, last, current, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
+            if (U_FAILURE(status)) return;
+            offsets.addElement(last, status);
+            indices.addElement(stringToIndex(str), status);
+            if (U_FAILURE(status)) return;
+        }
+        last = current;
+    }
+    if (U_FAILURE(status) || last >= endPos) {
+        return;
+    }
+    utext_extract(text, last, endPos, str, MAX_GRAPHEME_CLSTER_LENGTH, &status);
+    if (U_SUCCESS(status)) {
+        offsets.addElement(last, status);
+        indices.addElement(stringToIndex(str), status);
+    }
+}
+
+// Computing LSTM as stated in
+// https://en.wikipedia.org/wiki/Long_short-term_memory#LSTM_with_a_forget_gate
+// ifco is temp array allocate outside which does not need to be
+// input/output value but could avoid unnecessary memory alloc/free if passing
+// in.
+void compute(
+    int32_t hunits,
+    const ReadArray2D& W, const ReadArray2D& U, const ReadArray1D& b,
+    const ReadArray1D& x, Array1D& h, Array1D& c,
+    Array1D& ifco)
+{
+    // ifco = x * W + h * U + b
+    ifco.assign(b)
+        .addDotProduct(x, W)
+        .addDotProduct(h, U);
+
+    ifco.slice(0*hunits, hunits).sigmoid();  // i: sigmod
+    ifco.slice(1*hunits, hunits).sigmoid(); // f: sigmoid
+    ifco.slice(2*hunits, hunits).tanh(); // c_: tanh
+    ifco.slice(3*hunits, hunits).sigmoid(); // o: sigmod
+
+    c.hadamardProduct(ifco.slice(hunits, hunits))
+        .addHadamardProduct(ifco.slice(0, hunits), ifco.slice(2*hunits, hunits));
+
+    h.tanh(c)
+        .hadamardProduct(ifco.slice(3*hunits, hunits));
+}
+
+// Minimum word size
+static const int32_t MIN_WORD = 2;
+
+// Minimum number of characters for two words
+static const int32_t MIN_WORD_SPAN = MIN_WORD * 2;
+
+int32_t
+LSTMBreakEngine::divideUpDictionaryRange( UText *text,
+                                                int32_t startPos,
+                                                int32_t endPos,
+                                                UVector32 &foundBreaks,
+                                                UErrorCode& status) const {
+    if (U_FAILURE(status)) return 0;
+    int32_t beginFoundBreakSize = foundBreaks.size();
+    utext_setNativeIndex(text, startPos);
+    utext_moveIndex32(text, MIN_WORD_SPAN);
+    if (utext_getNativeIndex(text) >= endPos) {
+        return 0;       // Not enough characters for two words
+    }
+    utext_setNativeIndex(text, startPos);
+
+    UVector32 offsets(status);
+    UVector32 indices(status);
+    if (U_FAILURE(status)) return 0;
+    fVectorizer->vectorize(text, startPos, endPos, offsets, indices, status);
+    if (U_FAILURE(status)) return 0;
+    int32_t* offsetsBuf = offsets.getBuffer();
+    int32_t* indicesBuf = indices.getBuffer();
+
+    int32_t input_seq_len = indices.size();
+    int32_t hunits = fData->fForwardU.d1();
+
+    // ----- Begin of all the Array memory allocation needed for this function
+    // Allocate temp array used inside compute()
+    Array1D ifco(4 * hunits, status);
+
+    Array1D c(hunits, status);
+    Array1D logp(4, status);
+
+    // TODO: limit size of hBackward. If input_seq_len is too big, we could
+    // run out of memory.
+    // Backward LSTM
+    Array2D hBackward(input_seq_len, hunits, status);
+
+    // Allocate fbRow and slice the internal array in two.
+    Array1D fbRow(2 * hunits, status);
+
+    // ----- End of all the Array memory allocation needed for this function
+    if (U_FAILURE(status)) return 0;
+
+    // To save the needed memory usage, the following is different from the
+    // Python or ICU4X implementation. We first perform the Backward LSTM
+    // and then merge the iteration of the forward LSTM and the output layer
+    // together because we only neetdto remember the h[t-1] for Forward LSTM.
+    for (int32_t i = input_seq_len - 1; i >= 0; i--) {
+        Array1D hRow = hBackward.row(i);
+        if (i != input_seq_len - 1) {
+            hRow.assign(hBackward.row(i+1));
+        }
+#ifdef LSTM_DEBUG
+        printf("hRow %d\n", i);
+        hRow.print();
+        printf("indicesBuf[%d] = %d\n", i, indicesBuf[i]);
+        printf("fData->fEmbedding.row(indicesBuf[%d]):\n", i);
+        fData->fEmbedding.row(indicesBuf[i]).print();
+#endif  // LSTM_DEBUG
+        compute(hunits,
+                fData->fBackwardW, fData->fBackwardU, fData->fBackwardB,
+                fData->fEmbedding.row(indicesBuf[i]),
+                hRow, c, ifco);
+    }
+
+
+    Array1D forwardRow = fbRow.slice(0, hunits);  // point to first half of data in fbRow.
+    Array1D backwardRow = fbRow.slice(hunits, hunits);  // point to second half of data n fbRow.
+
+    // The following iteration merge the forward LSTM and the output layer
+    // together.
+    c.clear();  // reuse c since it is the same size.
+    for (int32_t i = 0; i < input_seq_len; i++) {
+#ifdef LSTM_DEBUG
+        printf("forwardRow %d\n", i);
+        forwardRow.print();
+#endif  // LSTM_DEBUG
+        // Forward LSTM
+        // Calculate the result into forwardRow, which point to the data in the first half
+        // of fbRow.
+        compute(hunits,
+                fData->fForwardW, fData->fForwardU, fData->fForwardB,
+                fData->fEmbedding.row(indicesBuf[i]),
+                forwardRow, c, ifco);
+
+        // assign the data from hBackward.row(i) to second half of fbRowa.
+        backwardRow.assign(hBackward.row(i));
+
+        logp.assign(fData->fOutputB).addDotProduct(fbRow, fData->fOutputW);
+#ifdef LSTM_DEBUG
+        printf("backwardRow %d\n", i);
+        backwardRow.print();
+        printf("logp %d\n", i);
+        logp.print();
+#endif  // LSTM_DEBUG
+
+        // current = argmax(logp)
+        LSTMClass current = (LSTMClass)logp.maxIndex();
+        // BIES logic.
+        if (current == BEGIN || current == SINGLE) {
+            if (i != 0) {
+                foundBreaks.addElement(offsetsBuf[i], status);
+                if (U_FAILURE(status)) return 0;
+            }
+        }
+    }
+    return foundBreaks.size() - beginFoundBreakSize;
+}
+
+Vectorizer* createVectorizer(const LSTMData* data, UErrorCode &status) {
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    switch (data->fType) {
+        case CODE_POINTS:
+            return new CodePointsVectorizer(data->fDict);
+            break;
+        case GRAPHEME_CLUSTER:
+            return new GraphemeClusterVectorizer(data->fDict);
+            break;
+        default:
+            break;
+    }
+    UPRV_UNREACHABLE_EXIT;
+}
+
+LSTMBreakEngine::LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status)
+    : DictionaryBreakEngine(), fData(data), fVectorizer(createVectorizer(fData, status))
+{
+    if (U_FAILURE(status)) {
+      fData = nullptr;  // If failure, we should not delete fData in destructor because the caller will do so.
+      return;
+    }
+    setCharacters(set);
+}
+
+LSTMBreakEngine::~LSTMBreakEngine() {
+    delete fData;
+    delete fVectorizer;
+}
+
+const UChar* LSTMBreakEngine::name() const {
+    return fData->fName;
+}
+
+UnicodeString defaultLSTM(UScriptCode script, UErrorCode& status) {
+    // open root from brkitr tree.
+    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
+    b = ures_getByKeyWithFallback(b, "lstm", b, &status);
+    UnicodeString result = ures_getUnicodeStringByKey(b, uscript_getShortName(script), &status);
+    ures_close(b);
+    return result;
+}
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(UScriptCode script, UErrorCode& status)
+{
+    if (script != USCRIPT_KHMER && script != USCRIPT_LAO && script != USCRIPT_MYANMAR && script != USCRIPT_THAI) {
+        return nullptr;
+    }
+    UnicodeString name = defaultLSTM(script, status);
+    if (U_FAILURE(status)) return nullptr;
+    CharString namebuf;
+    namebuf.appendInvariantChars(name, status).truncate(namebuf.lastIndexOf('.'));
+
+    LocalUResourceBundlePointer rb(
+        ures_openDirect(U_ICUDATA_BRKITR, namebuf.data(), &status));
+    if (U_FAILURE(status)) return nullptr;
+
+    return CreateLSTMData(rb.orphan(), status);
+}
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(UResourceBundle* rb, UErrorCode& status)
+{
+    return new LSTMData(rb, status);
+}
+
+U_CAPI const LanguageBreakEngine* U_EXPORT2
+CreateLSTMBreakEngine(UScriptCode script, const LSTMData* data, UErrorCode& status)
+{
+    UnicodeString unicodeSetString;
+    switch(script) {
+        case USCRIPT_THAI:
+            unicodeSetString = UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]");
+            break;
+        case USCRIPT_MYANMAR:
+            unicodeSetString = UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]");
+            break;
+        default:
+            delete data;
+            return nullptr;
+    }
+    UnicodeSet unicodeSet;
+    unicodeSet.applyPattern(unicodeSetString, status);
+    const LanguageBreakEngine* engine = new LSTMBreakEngine(data, unicodeSet, status);
+    if (U_FAILURE(status) || engine == nullptr) {
+        if (engine != nullptr) {
+            delete engine;
+        } else {
+            status = U_MEMORY_ALLOCATION_ERROR;
+        }
+        return nullptr;
+    }
+    return engine;
+}
+
+U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data)
+{
+    delete data;
+}
+
+U_CAPI const UChar* U_EXPORT2 LSTMDataName(const LSTMData* data)
+{
+    return data->fName;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */

+ 87 - 0
thirdparty/icu4c/common/lstmbe.h

@@ -0,0 +1,87 @@
+// © 2021 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef LSTMBE_H
+#define LSTMBE_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uniset.h"
+#include "unicode/ures.h"
+#include "unicode/utext.h"
+#include "unicode/utypes.h"
+
+#include "brkeng.h"
+#include "dictbe.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+class Vectorizer;
+struct LSTMData;
+
+/*******************************************************************
+ * LSTMBreakEngine
+ */
+
+/**
+ * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
+ * LSTM to determine language-specific breaks.</p>
+ *
+ * <p>After it is constructed a LSTMBreakEngine may be shared between
+ * threads without synchronization.</p>
+ */
+class LSTMBreakEngine : public DictionaryBreakEngine {
+public:
+    /**
+     * <p>Constructor.</p>
+     */
+    LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);
+
+    /**
+     * <p>Virtual destructor.</p>
+     */
+    virtual ~LSTMBreakEngine();
+
+    virtual const UChar* name() const;
+
+protected:
+    /**
+     * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
+     *
+     * @param text A UText representing the text
+     * @param rangeStart The start of the range of dictionary characters
+     * @param rangeEnd The end of the range of dictionary characters
+     * @param foundBreaks Output of C array of int32_t break positions, or 0
+     * @param status Information on any errors encountered.
+     * @return The number of breaks found
+     */
+     virtual int32_t divideUpDictionaryRange(UText *text,
+                                             int32_t rangeStart,
+                                             int32_t rangeEnd,
+                                             UVector32 &foundBreaks,
+                                             UErrorCode& status) const override;
+private:
+    const LSTMData* fData;
+    const Vectorizer* fVectorizer;
+};
+
+U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
+    UScriptCode script, const LSTMData* data, UErrorCode& status);
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
+    UResourceBundle* rb, UErrorCode& status);
+
+U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
+    UScriptCode script, UErrorCode& status);
+
+U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
+U_CAPI const UChar* U_EXPORT2 LSTMDataName(const LSTMData* data);
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif  /* LSTMBE_H */

+ 4 - 4
thirdparty/icu4c/common/messagepattern.cpp

@@ -309,10 +309,10 @@ MessagePattern::clear() {
     numericValuesLength=0;
 }
 
-UBool
+bool
 MessagePattern::operator==(const MessagePattern &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     return
         aposMode==other.aposMode &&
@@ -387,10 +387,10 @@ MessagePattern::getPluralOffset(int32_t pluralStart) const {
 
 // MessagePattern::Part ---------------------------------------------------- ***
 
-UBool
+bool
 MessagePattern::Part::operator==(const Part &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     return
         type==other.type &&

+ 1 - 1
thirdparty/icu4c/common/msvcres.h

@@ -19,7 +19,7 @@ STLPort's broken stddef.h from being used when rc.exe parses this file.
 
 #include "unicode/uversion.h"
 
-#define ICU_WEBSITE "http://icu-project.org"
+#define ICU_WEBSITE "https://icu.unicode.org/"
 #define ICU_COMPANY "The ICU Project"
 #define ICU_PRODUCT_PREFIX "ICU"
 #define ICU_PRODUCT "International Components for Unicode"

+ 383 - 380
thirdparty/icu4c/common/norm2_nfc_data.h

@@ -1,6 +1,5 @@
-// © 2016 and later: Unicode, Inc. and others.
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
-//
 // Copyright (C) 1999-2016, International Business Machines
 // Corporation and others.  All Rights Reserved.
 //
@@ -8,31 +7,30 @@
 //
 // machine-generated by: icu/source/tools/gennorm2/n2builder.cpp
 
-
 #ifdef INCLUDED_FROM_NORMALIZER2_CPP
 
 static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0};
-static const UVersionInfo norm2_nfc_data_dataVersion={0xd,0,0,0};
+static const UVersionInfo norm2_nfc_data_dataVersion={0xe,0,0,0};
 
 static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
-0x50,0x4bac,0x8814,0x8914,0x8914,0x8914,0x8914,0x8914,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c,
+0x50,0x4c54,0x88bc,0x89bc,0x89bc,0x89bc,0x89bc,0x89bc,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c,
 0x3c34,0x3c66,0x300,0
 };
 
-static const uint16_t norm2_nfc_data_trieIndex[1746]={
+static const uint16_t norm2_nfc_data_trieIndex[1748]={
 0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353,
 0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3,
-0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6,
-0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa5,0xacd,0xb0a,0xb4a,0xb84,
-0xb9f,0x226,0xbda,0x226,0xc1a,0xc39,0xc6f,0xcac,0x226,0x226,0x226,0x226,0x226,0xccf,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfb,0x226,0x226,0xd30,
-0x226,0x226,0xd4e,0x226,0xd78,0x226,0x226,0x226,0xdb4,0xdd4,0xe14,0xe53,0xe8e,0xece,0xf02,0xf2e,
-0x808,0x226,0x226,0xf62,0x226,0x226,0x226,0xfa2,0xfe2,0x1022,0x1062,0x10a2,0x10e2,0x1122,0x1162,0x11a2,
-0x11e2,0x226,0x226,0x1212,0x1243,0x226,0x1273,0x12a6,0x12e3,0x1322,0x1362,0x1398,0x13c6,0x226,0x226,0x226,
+0x631,0x65f,0x687,0x6bd,0x6fd,0x73a,0x75a,0x799,0x7d8,0x815,0x834,0x871,0x75a,0x8aa,0x8d8,0x917,
+0x834,0x951,0x968,0x9a8,0x9bf,0x9fe,0x226,0xa34,0xa54,0xa8f,0xa9b,0xad6,0xafe,0xb3b,0xb7b,0xbb5,
+0xbd0,0x226,0xc0b,0x226,0xc4b,0xc6a,0xca0,0xcdd,0x226,0x226,0x226,0x226,0x226,0xd00,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xd2c,0x226,0x226,0xd61,
+0x226,0x226,0xd7f,0x226,0xda9,0x226,0x226,0x226,0xde5,0xe05,0xe45,0xe84,0xebf,0xeff,0xf33,0xf5f,
+0x839,0x226,0x226,0xf93,0x226,0x226,0x226,0xfd3,0x1013,0x1053,0x1093,0x10d3,0x1113,0x1153,0x1193,0x11d3,
+0x1213,0x226,0x226,0x1243,0x1274,0x226,0x12a4,0x12d7,0x1314,0x1353,0x1393,0x13c9,0x13f7,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13f1,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0xcbd,0x226,0x140e,0x226,0x144e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x148e,0x14c8,0x1506,0x1546,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1422,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0xcee,0x226,0x143f,0x226,0x147f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x14bf,0x14f9,0x1537,0x1577,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
@@ -61,20 +59,20 @@ static const uint16_t norm2_nfc_data_trieIndex[1746]={
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1585,0x15c3,0x15e3,0x226,0x226,0x226,0x226,
-0x161d,0x226,0x226,0x1645,0x1677,0x16a5,0x80c,0x16b8,0x226,0x226,0x16c8,0x1708,0x226,0x226,0x226,0x1420,
-0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
-0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
-0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
-0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,
-0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,
-0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,
-0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,
-0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,
-0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,
-0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,
-0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x1794,0x226,
-0x17d4,0x180f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x15b6,0x15f4,0x1614,0x226,0x226,0x226,0x226,
+0x164e,0x226,0x226,0x1676,0x16a8,0x16d6,0x83d,0x16e9,0x226,0x226,0x16f9,0x1739,0x226,0x226,0x226,0x1451,
+0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,
+0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,
+0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,
+0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,
+0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,
+0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,
+0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,
+0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,
+0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,
+0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,
+0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x178d,0x1779,0x1781,0x1789,0x1791,0x177d,0x1785,0x17c5,0x226,
+0x1805,0x1840,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
@@ -82,57 +80,57 @@ static const uint16_t norm2_nfc_data_trieIndex[1746]={
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x184f,0x188f,0x18cf,0x190f,0x194f,0x198f,0x19cf,0x1a0f,0x1a32,0x1a72,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a92,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x655,0x664,0x67c,0x69b,0x6b0,0x6b0,0x6b0,0x6b4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x1880,0x18c0,0x1900,0x1940,0x1980,0x19c0,0x1a00,0x1a40,0x1a63,0x1aa3,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac3,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x657,0x666,0x67e,0x69d,0x6b2,0x6b2,0x6b2,0x6b6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbda,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc0b,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac5,0x226,0x226,0x1ad5,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0xdc6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ae5,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x15d6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x1aef,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,
-0x9ba,0x226,0x1aff,0x1b0c,0x1b18,0x226,0x226,0x226,0x226,0x414,0x226,0x1b23,0x1b33,0x226,0x226,0x226,
-0x7e0,0x226,0x226,0x226,0x226,0x1b43,0x226,0x226,0x226,0x1b4e,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x1b55,0x226,0x226,0x226,0x226,0x1b60,0x1b6f,0x8f6,0x1b7d,0x412,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x1b8b,0x798,0x226,0x226,0x226,0x226,0x226,0x1b9b,0x1baa,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1bb2,0x1bc2,0x226,
-0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bcc,0x226,0x226,0x226,0x226,0x226,
-0x226,0x7e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bc9,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bdc,
-0x7e0,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x84d,0x226,0x226,0x226,0x7ed,0x7ea,
-0x226,0x226,0x226,0x226,0x7e8,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd4,0x226,0x226,0x226,
-0x226,0x7ea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x1bec,0x226,0x226,0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1af6,0x226,0x226,0x1b06,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0xdf7,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1b16,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1607,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x1b20,0x54f,0x226,0x226,0x1b30,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x81c,0x226,0x226,
+0x1b40,0x226,0x1b50,0x1b5d,0x1b69,0x226,0x226,0x226,0x226,0x414,0x226,0x1b74,0x1b84,0x226,0x226,0x226,
+0x811,0x226,0x226,0x226,0x226,0x1b94,0x226,0x226,0x226,0x1b9f,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x1ba6,0x226,0x226,0x226,0x226,0x1bb1,0x1bc0,0x927,0x1bce,0x412,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x1bdc,0x7c9,0x226,0x226,0x226,0x226,0x226,0x1bec,0x1bfb,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x907,0x1c03,0x1c13,0x226,
+0x226,0x226,0x9eb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c1d,0x226,0x226,0x226,0x226,0x226,
+0x226,0x817,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c1a,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c2d,
+0x811,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x87e,0x226,0x226,0x226,0x81e,0x81b,
+0x226,0x226,0x226,0x226,0x819,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x9eb,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc05,0x226,0x226,0x226,
+0x226,0x81b,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x1c3d,0x226,0x226,0x226,0xf2c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x1bfc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bfe,
+0x226,0x226,0x226,0x226,0x226,0x1c4d,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c4f,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x1c0d,0x1c1d,0x1c2b,0x1c38,0x226,0x1c44,0x1c52,0x1c62,0x226,0x226,
-0x226,0x226,0xcea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c72,0x1c7a,
-0x1c88,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x4fc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x1c98,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca4,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x1cb4,0x1cc4,0x1cd4,0x1ce4,0x1cf4,0x1d04,0x1d14,0x1d24,0x1d34,0x1d44,0x1d54,
-0x1d64,0x1d74,0x1d84,0x1d94,0x1da4,0x1db4,0x1dc4,0x1dd4,0x1de4,0x1df4,0x1e04,0x1e14,0x1e24,0x1e34,0x1e44,0x1e54,
-0x1e64,0x1e74,0x1e84,0x1e94,0x1ea4,0x1eb4,0x1ec4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x1c5e,0x1c6e,0x1c7c,0x1c89,0x226,0x1c95,0x1ca3,0x1cb3,0x226,0x226,
+0x226,0x226,0xd1b,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cc3,0x1ccb,
+0x1cd9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0xf2c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7c9,0x226,
+0x226,0x226,0x4fc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x1ce9,0x226,0x226,0x226,0x226,0x226,0x226,0x1cf5,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d05,0x1d15,0x1d25,0x1d35,0x1d45,0x1d55,0x1d65,0x1d75,0x1d85,
+0x1d95,0x1da5,0x1db5,0x1dc5,0x1dd5,0x1de5,0x1df5,0x1e05,0x1e15,0x1e25,0x1e35,0x1e45,0x1e55,0x1e65,0x1e75,0x1e85,
+0x1e95,0x1ea5,0x1eb5,0x1ec5,0x1ed5,0x1ee5,0x1ef5,0x1f05,0x1f15,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x408,0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46d,0x489,0x4a6,0x4c2,
-0x4df,0x4fc,0x51b,0x538,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x552,0xc4,0x566,0xc4,0xc4,0xc4,0xc4,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408,0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46d,0x489,
+0x4a6,0x4c2,0x4df,0x4fc,0x51b,0x538,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x552,0xc4,0x566,0xc4,0xc4,
 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x586,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0x591,0x5ae,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5ce,0x5e2,0xc4,0xc4,0x5f5,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x586,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0x591,0x5ae,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5ce,0x5e4,0xc4,
+0xc4,0x5f7,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0x615,0x635
+0xc4,0xc4,0x617,0x637
 };
 
-static const uint16_t norm2_nfc_data_trieData[7892]={
+static const uint16_t norm2_nfc_data_trieData[7974]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -238,401 +236,406 @@ static const uint16_t norm2_nfc_data_trieData[7892]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8,
-0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc,
-0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,
+0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,
+0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,
+0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,
+0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x864,0x1993,1,1,1,1,1,1,0x868,0x1999,1,0x86c,
-0x199f,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1,
-1,0x29ec,0x29f2,0x29f8,0x29fe,0x2a04,0x2a0a,0x2a10,0x2a16,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0x864,0x1993,1,1,1,1,1,1,0x868,0x1999,1,
+0x86c,0x199f,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,
+1,1,0x29ec,0x29f2,0x29f8,0x29fe,0x2a04,0x2a0a,0x2a10,0x2a16,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870,
-1,1,1,0x19a5,0x19ab,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,
-1,1,1,1,0x2a1c,0x2a22,1,0x2a28,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,
+0x870,1,1,1,0x19a5,0x19ab,0xfe12,1,1,1,1,1,1,1,1,1,
+0xfc00,1,1,1,1,0x2a1c,0x2a22,1,0x2a28,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0x2a2e,1,1,0x2a34,1,1,
-1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a3a,0x2a40,0x2a46,
-1,1,0x2a4c,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x2a2e,1,1,0x2a34,1,
+1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a3a,0x2a40,
+0x2a46,1,1,0x2a4c,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-0x878,0x19b1,1,1,0x19b7,0x19bd,0xfe12,1,1,1,1,1,1,1,1,0xfc00,
-0xfc00,1,1,1,1,0x2a52,0x2a58,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0x884,1,0x19c3,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0x878,0x19b1,1,1,0x19b7,0x19bd,0xfe12,1,1,1,1,1,1,1,1,
+0xfc00,0xfc00,1,1,1,1,0x2a52,0x2a58,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1,
-0x19c9,0x19cf,0x19d5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1,
+1,1,1,1,1,1,1,1,1,1,0x884,1,0x19c3,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x894,1,0x19db,1,1,1,1,0xfe12,1,1,
-1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,
+1,0x19c9,0x19cf,0x19d5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0x894,1,0x19db,1,1,1,1,0xfe12,1,
+1,1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xfe0e,1,1,0x898,0x19e1,1,0xfc00,1,1,1,0x89c,0x19e7,0x19ed,
-1,0xdca,0x19f5,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xfe0e,1,1,0x898,0x19e1,1,0xfc00,1,1,1,0x89c,0x19e7,
+0x19ed,1,0xdca,0x19f5,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1,
-1,1,1,0x8a8,0x8b0,1,1,0x19fd,0x1a03,0x1a09,0xfe12,1,1,1,1,1,
-1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1,
-1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a0f,1,0xdd4,
-0x1a17,0x1a1f,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,
+1,1,1,1,0x8a8,0x8b0,1,1,0x19fd,0x1a03,0x1a09,0xfe12,1,1,1,1,
+1,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1,
-1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,
+1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a0f,1,
+0xdd4,0x1a17,0x1a1f,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,
+1,1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfeec,0xfeec,0xfe12,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,
-0xfef4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfeec,0xfeec,0xfe12,1,1,1,1,1,1,1,1,0xfef4,0xfef4,
+0xfef4,0xfef4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a5f,1,1,
-1,1,1,1,1,1,1,0x2a65,1,1,1,1,0x2a6b,1,1,1,
-1,0x2a71,1,1,1,1,0x2a77,1,1,1,1,1,1,1,1,1,
-1,1,1,0x2a7d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c50,0xff08,0x3c58,
-0x2a82,1,0x2a88,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c60,0xffcc,0xffcc,0xfe12,1,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a8f,1,1,
-1,1,1,1,1,1,1,0x2a95,1,1,1,1,0x2a9b,1,1,1,
-1,0x2aa1,1,1,1,1,0x2aa7,1,1,1,1,1,1,1,1,1,
-1,1,1,0x2aad,1,1,1,1,1,1,0xffb8,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a5f,1,
+1,1,1,1,1,1,1,1,0x2a65,1,1,1,1,0x2a6b,1,1,
+1,1,0x2a71,1,1,1,1,0x2a77,1,1,1,1,1,1,1,1,
+1,1,1,1,0x2a7d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c50,0xff08,
+0x3c58,0x2a82,1,0x2a88,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c60,0xffcc,0xffcc,0xfe12,
+1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a8f,1,
+1,1,1,1,1,1,1,1,0x2a95,1,1,1,1,0x2a9b,1,1,
+1,1,0x2aa1,1,1,1,1,0x2aa7,1,1,1,1,1,1,1,1,
+1,1,1,1,0x2aad,1,1,1,1,1,1,0xffb8,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x8c0,0x1a25,1,1,1,1,1,1,1,0xfc00,1,1,
-1,1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,
+1,1,1,1,1,0x8c0,0x1a25,1,1,1,1,1,1,1,0xfc00,1,
+1,1,1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
-0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
+1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
-0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,
+1,1,1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
+0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
+0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,
+0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,
-0xffcc,0xffb8,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
+0xffcc,0xffcc,0xffb8,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x8c4,0x1a2b,0x8c8,0x1a31,0x8cc,0x1a37,0x8d0,0x1a3d,0x8d4,0x1a43,1,1,0x8d8,
-0x1a49,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a4f,0x8e0,0x1a55,0x8e4,0x8e8,0x1a5b,0x1a61,
-0x8ec,0x1a67,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x8c4,0x1a2b,0x8c8,0x1a31,0x8cc,0x1a37,0x8d0,0x1a3d,0x8d4,0x1a43,1,1,
+0x8d8,0x1a49,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a4f,0x8e0,0x1a55,0x8e4,0x8e8,0x1a5b,
+0x1a61,0x8ec,0x1a67,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1,
+1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,
-0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,
-0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,0xffb8,
-1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffd4,
-0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,
+1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,
+0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,
+0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,
+0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8,
-0xffcc,0xffb8,0x1a6c,0x1a72,0x1a78,0x1a7e,0x1a85,0x1a8b,0x1a91,0x1a97,0x1a9f,0x1aa9,0x1ab0,0x1ab6,0x1abc,0x1ac2,
-0x1ac8,0x1ace,0x1ad5,0x1adb,0x1ae0,0x1ae6,0x1aee,0x1af8,0x1b02,0x1b0c,0x1b14,0x1b1a,0x1b20,0x1b26,0x1b2f,0x1b39,
-0x1b41,0x1b47,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b76,0x1b7d,0x1b83,0x1b88,0x1b8e,0x1b94,0x1b9a,
-0x1ba2,0x1bac,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0x1bd2,0xdde,0xde8,0x1bda,0x1be4,0x1bec,0x1bf2,0x1bf8,0x1bfe,
-0x1c04,0x1c0a,0x1c10,0x1c16,0x1c1d,0x1c23,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c52,0x1c5a,0x1c64,
-0x1c6e,0x1c78,0x1c82,0x1c8c,0x1c96,0x1ca0,0x1ca9,0x1caf,0x1cb5,0x1cbb,0x1cc0,0x1cc6,0xdf2,0xdfc,0x1cce,0x1cd8,
-0x1ce0,0x1ce6,0x1cec,0x1cf2,0xe06,0xe10,0x1cfa,0x1d04,0x1d0e,0x1d18,0x1d22,0x1d2c,0x1d34,0x1d3a,0x1d40,0x1d46,
-0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d82,0x1d8a,0x1d94,0x1d9e,0x1da8,0x1db0,0x1db6,
-0x1dbd,0x1dc3,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df2,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e17,
-0x1e1c,0x1e22,0x1e28,0x1e2e,0x1e35,0x1e3b,0x1e41,0x1e47,0x1e4c,0x1e52,0x1e58,0x1e5e,1,0x1e65,1,1,
-1,1,0xe1a,0xe28,0x1e6a,0x1e70,0x1e78,0x1e82,0x1e8c,0x1e96,0x1ea0,0x1eaa,0x1eb4,0x1ebe,0x1ec8,0x1ed2,
-0x1edc,0x1ee6,0x1ef0,0x1efa,0x1f04,0x1f0e,0x1f18,0x1f22,0x1f2c,0x1f36,0xe36,0xe40,0x1f3e,0x1f44,0x1f4a,0x1f50,
-0x1f58,0x1f62,0x1f6c,0x1f76,0x1f80,0x1f8a,0x1f94,0x1f9e,0x1fa8,0x1fb2,0x1fba,0x1fc0,0x1fc6,0x1fcc,0xe4a,0xe54,
-0x1fd2,0x1fd8,0x1fe0,0x1fea,0x1ff4,0x1ffe,0x2008,0x2012,0x201c,0x2026,0x2030,0x203a,0x2044,0x204e,0x2058,0x2062,
-0x206c,0x2076,0x2080,0x208a,0x2094,0x209e,0x20a6,0x20ac,0x20b2,0x20b8,0x20c0,0x20ca,0x20d4,0x20de,0x20e8,0x20f2,
-0x20fc,0x2106,0x2110,0x211a,0x2122,0x2128,0x212f,0x2135,0x213a,0x2140,0x2146,0x214c,1,1,1,1,
-1,1,0xe5e,0xe74,0xe8c,0xe9a,0xea8,0xeb6,0xec4,0xed2,0xede,0xef4,0xf0c,0xf1a,0xf28,0xf36,
-0xf44,0xf52,0xf5e,0xf6c,0x2155,0x215f,0x2169,0x2173,1,1,0xf7a,0xf88,0x217d,0x2187,0x2191,0x219b,
-1,1,0xf96,0xfac,0xfc4,0xfd2,0xfe0,0xfee,0xffc,0x100a,0x1016,0x102c,0x1044,0x1052,0x1060,0x106e,
-0x107c,0x108a,0x1096,0x10a8,0x21a5,0x21af,0x21b9,0x21c3,0x21cd,0x21d7,0x10ba,0x10cc,0x21e1,0x21eb,0x21f5,0x21ff,
-0x2209,0x2213,0x10de,0x10ec,0x221d,0x2227,0x2231,0x223b,1,1,0x10fa,0x1108,0x2245,0x224f,0x2259,0x2263,
-1,1,0x1116,0x1128,0x226d,0x2277,0x2281,0x228b,0x2295,0x229f,1,0x113a,1,0x22a9,1,0x22b3,
-1,0x22bd,0x114c,0x1162,0x117a,0x1188,0x1196,0x11a4,0x11b2,0x11c0,0x11cc,0x11e2,0x11fa,0x1208,0x1216,0x1224,
-0x1232,0x1240,0x124c,0x3b9e,0x22c5,0x3ba6,0x1256,0x3bae,0x22cb,0x3bb6,0x22d1,0x3bbe,0x22d7,0x3bc6,0x1260,0x3bce,
-1,1,0x22de,0x22e8,0x22f7,0x2307,0x2317,0x2327,0x2337,0x2347,0x2352,0x235c,0x236b,0x237b,0x238b,0x239b,
-0x23ab,0x23bb,0x23c6,0x23d0,0x23df,0x23ef,0x23ff,0x240f,0x241f,0x242f,0x243a,0x2444,0x2453,0x2463,0x2473,0x2483,
-0x2493,0x24a3,0x24ae,0x24b8,0x24c7,0x24d7,0x24e7,0x24f7,0x2507,0x2517,0x2522,0x252c,0x253b,0x254b,0x255b,0x256b,
-0x257b,0x258b,0x2595,0x259b,0x25a3,0x25aa,0x25b3,1,0x126a,0x25bd,0x25c5,0x25cb,0x25d1,0x3bd6,0x25d6,1,
-0x2ab2,0x8f0,1,0x25dd,0x25e5,0x25ec,0x25f5,1,0x1274,0x25ff,0x2607,0x3bde,0x260d,0x3be6,0x2612,0x2619,
-0x261f,0x2625,0x262b,0x2631,0x2639,0x3bf0,1,1,0x2641,0x2649,0x2651,0x2657,0x265d,0x3bfa,1,0x2663,
-0x2669,0x266f,0x2675,0x267b,0x2683,0x3c04,0x268b,0x2691,0x2697,0x269f,0x26a7,0x26ad,0x26b3,0x3c0e,0x26b9,0x26bf,
-0x3c16,0x2ab7,1,1,0x26c7,0x26ce,0x26d7,1,0x127e,0x26e1,0x26e9,0x3c1e,0x26ef,0x3c26,0x26f4,0x2abb,
-0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1,
-1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8,
-0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0x2abe,1,1,1,0x2ac2,0x3c2e,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26fb,0x2701,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0x2707,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x270d,0x2713,0x2719,0x914,1,0x918,1,0x91c,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x920,0x271f,1,1,1,0x924,0x2725,1,0x928,0x272b,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,0x92c,0x2731,0x930,0x2737,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x934,
-1,1,1,0x273d,1,0x938,0x2743,0x93c,1,0x2749,0x940,0x274f,1,1,1,0x944,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0x2755,0x948,0x275b,1,0x94c,0x950,1,1,1,1,1,1,1,0x2761,
-0x2767,0x276d,0x2773,0x2779,0x954,0x958,0x277f,0x2785,0x95c,0x960,0x278b,0x2791,0x964,0x968,0x96c,0x970,
-1,1,0x2797,0x279d,0x974,0x978,0x27a3,0x27a9,0x97c,0x980,0x27af,0x27b5,1,1,1,1,
-1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998,0x27bb,0x27c1,
-0x27c7,0x27cd,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0x27d3,0x27d9,0x27df,0x27e5,1,1,1,1,
-1,1,0x27eb,0x27f1,0x27f7,0x27fd,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2ac7,
-0x2acb,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0x2acf,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,0xffb4,0xffcc,0xffd2,
+0xffb8,0xffcc,0xffb8,0x1a6c,0x1a72,0x1a78,0x1a7e,0x1a85,0x1a8b,0x1a91,0x1a97,0x1a9f,0x1aa9,0x1ab0,0x1ab6,0x1abc,
+0x1ac2,0x1ac8,0x1ace,0x1ad5,0x1adb,0x1ae0,0x1ae6,0x1aee,0x1af8,0x1b02,0x1b0c,0x1b14,0x1b1a,0x1b20,0x1b26,0x1b2f,
+0x1b39,0x1b41,0x1b47,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b76,0x1b7d,0x1b83,0x1b88,0x1b8e,0x1b94,
+0x1b9a,0x1ba2,0x1bac,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0x1bd2,0xdde,0xde8,0x1bda,0x1be4,0x1bec,0x1bf2,0x1bf8,
+0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c16,0x1c1d,0x1c23,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c52,0x1c5a,
+0x1c64,0x1c6e,0x1c78,0x1c82,0x1c8c,0x1c96,0x1ca0,0x1ca9,0x1caf,0x1cb5,0x1cbb,0x1cc0,0x1cc6,0xdf2,0xdfc,0x1cce,
+0x1cd8,0x1ce0,0x1ce6,0x1cec,0x1cf2,0xe06,0xe10,0x1cfa,0x1d04,0x1d0e,0x1d18,0x1d22,0x1d2c,0x1d34,0x1d3a,0x1d40,
+0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d82,0x1d8a,0x1d94,0x1d9e,0x1da8,0x1db0,
+0x1db6,0x1dbd,0x1dc3,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df2,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,
+0x1e17,0x1e1c,0x1e22,0x1e28,0x1e2e,0x1e35,0x1e3b,0x1e41,0x1e47,0x1e4c,0x1e52,0x1e58,0x1e5e,1,0x1e65,1,
+1,1,1,0xe1a,0xe28,0x1e6a,0x1e70,0x1e78,0x1e82,0x1e8c,0x1e96,0x1ea0,0x1eaa,0x1eb4,0x1ebe,0x1ec8,
+0x1ed2,0x1edc,0x1ee6,0x1ef0,0x1efa,0x1f04,0x1f0e,0x1f18,0x1f22,0x1f2c,0x1f36,0xe36,0xe40,0x1f3e,0x1f44,0x1f4a,
+0x1f50,0x1f58,0x1f62,0x1f6c,0x1f76,0x1f80,0x1f8a,0x1f94,0x1f9e,0x1fa8,0x1fb2,0x1fba,0x1fc0,0x1fc6,0x1fcc,0xe4a,
+0xe54,0x1fd2,0x1fd8,0x1fe0,0x1fea,0x1ff4,0x1ffe,0x2008,0x2012,0x201c,0x2026,0x2030,0x203a,0x2044,0x204e,0x2058,
+0x2062,0x206c,0x2076,0x2080,0x208a,0x2094,0x209e,0x20a6,0x20ac,0x20b2,0x20b8,0x20c0,0x20ca,0x20d4,0x20de,0x20e8,
+0x20f2,0x20fc,0x2106,0x2110,0x211a,0x2122,0x2128,0x212f,0x2135,0x213a,0x2140,0x2146,0x214c,1,1,1,
+1,1,1,0xe5e,0xe74,0xe8c,0xe9a,0xea8,0xeb6,0xec4,0xed2,0xede,0xef4,0xf0c,0xf1a,0xf28,
+0xf36,0xf44,0xf52,0xf5e,0xf6c,0x2155,0x215f,0x2169,0x2173,1,1,0xf7a,0xf88,0x217d,0x2187,0x2191,
+0x219b,1,1,0xf96,0xfac,0xfc4,0xfd2,0xfe0,0xfee,0xffc,0x100a,0x1016,0x102c,0x1044,0x1052,0x1060,
+0x106e,0x107c,0x108a,0x1096,0x10a8,0x21a5,0x21af,0x21b9,0x21c3,0x21cd,0x21d7,0x10ba,0x10cc,0x21e1,0x21eb,0x21f5,
+0x21ff,0x2209,0x2213,0x10de,0x10ec,0x221d,0x2227,0x2231,0x223b,1,1,0x10fa,0x1108,0x2245,0x224f,0x2259,
+0x2263,1,1,0x1116,0x1128,0x226d,0x2277,0x2281,0x228b,0x2295,0x229f,1,0x113a,1,0x22a9,1,
+0x22b3,1,0x22bd,0x114c,0x1162,0x117a,0x1188,0x1196,0x11a4,0x11b2,0x11c0,0x11cc,0x11e2,0x11fa,0x1208,0x1216,
+0x1224,0x1232,0x1240,0x124c,0x3b9e,0x22c5,0x3ba6,0x1256,0x3bae,0x22cb,0x3bb6,0x22d1,0x3bbe,0x22d7,0x3bc6,0x1260,
+0x3bce,1,1,0x22de,0x22e8,0x22f7,0x2307,0x2317,0x2327,0x2337,0x2347,0x2352,0x235c,0x236b,0x237b,0x238b,
+0x239b,0x23ab,0x23bb,0x23c6,0x23d0,0x23df,0x23ef,0x23ff,0x240f,0x241f,0x242f,0x243a,0x2444,0x2453,0x2463,0x2473,
+0x2483,0x2493,0x24a3,0x24ae,0x24b8,0x24c7,0x24d7,0x24e7,0x24f7,0x2507,0x2517,0x2522,0x252c,0x253b,0x254b,0x255b,
+0x256b,0x257b,0x258b,0x2595,0x259b,0x25a3,0x25aa,0x25b3,1,0x126a,0x25bd,0x25c5,0x25cb,0x25d1,0x3bd6,0x25d6,
+1,0x2ab2,0x8f0,1,0x25dd,0x25e5,0x25ec,0x25f5,1,0x1274,0x25ff,0x2607,0x3bde,0x260d,0x3be6,0x2612,
+0x2619,0x261f,0x2625,0x262b,0x2631,0x2639,0x3bf0,1,1,0x2641,0x2649,0x2651,0x2657,0x265d,0x3bfa,1,
+0x2663,0x2669,0x266f,0x2675,0x267b,0x2683,0x3c04,0x268b,0x2691,0x2697,0x269f,0x26a7,0x26ad,0x26b3,0x3c0e,0x26b9,
+0x26bf,0x3c16,0x2ab7,1,1,0x26c7,0x26ce,0x26d7,1,0x127e,0x26e1,0x26e9,0x3c1e,0x26ef,0x3c26,0x26f4,
+0x2abb,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,
+1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,
+0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0x2abe,1,1,1,0x2ac2,0x3c2e,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26fb,0x2701,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,0x2707,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x270d,0x2713,0x2719,0x914,1,0x918,1,0x91c,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0x920,0x271f,1,1,1,0x924,0x2725,1,0x928,
+0x272b,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0x92c,0x2731,0x930,0x2737,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x934,1,1,1,0x273d,1,0x938,0x2743,0x93c,1,0x2749,0x940,0x274f,1,1,1,
+0x944,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0x2755,0x948,0x275b,1,0x94c,0x950,1,1,1,1,1,1,1,
+0x2761,0x2767,0x276d,0x2773,0x2779,0x954,0x958,0x277f,0x2785,0x95c,0x960,0x278b,0x2791,0x964,0x968,0x96c,
+0x970,1,1,0x2797,0x279d,0x974,0x978,0x27a3,0x27a9,0x97c,0x980,0x27af,0x27b5,1,1,1,
+1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998,0x27bb,
+0x27c1,0x27c7,0x27cd,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0x27d3,0x27d9,0x27df,0x27e5,1,1,1,
+1,1,1,0x27eb,0x27f1,0x27f7,0x27fd,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x2ac7,0x2acb,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2acf,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,
 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1,
-1,1,1,0x9b0,0x2803,0x9b4,0x2809,0x9b8,0x280f,0x9bc,0x2815,0x9c0,0x281b,0x9c4,0x2821,0x9c8,
-0x2827,0x9cc,0x282d,0x9d0,0x2833,0x9d4,0x2839,0x9d8,0x283f,0x9dc,0x2845,1,0x9e0,0x284b,0x9e4,0x2851,
-0x9e8,0x2857,1,1,1,1,1,0x9ec,0x285d,0x2863,0x9f4,0x2869,0x286f,0x9fc,0x2875,0x287b,
-0xa04,0x2881,0x2887,0xa0c,0x288d,0x2893,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,0x2899,1,1,1,1,0xfc10,
-0xfc10,1,1,0xa14,0x289f,1,1,1,1,1,1,1,0xa18,1,1,1,
-1,0xa1c,0x28a5,0xa20,0x28ab,0xa24,0x28b1,0xa28,0x28b7,0xa2c,0x28bd,0xa30,0x28c3,0xa34,0x28c9,0xa38,
-0x28cf,0xa3c,0x28d5,0xa40,0x28db,0xa44,0x28e1,0xa48,0x28e7,1,0xa4c,0x28ed,0xa50,0x28f3,0xa54,0x28f9,
-1,1,1,1,1,0xa58,0x28ff,0x2905,0xa60,0x290b,0x2911,0xa68,0x2917,0x291d,0xa70,0x2923,
-0x2929,0xa78,0x292f,0x2935,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x293b,1,1,0x2941,0x2947,0x294d,
-0x2953,1,1,0xa90,0x2959,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,
+1,1,1,1,0x9b0,0x2803,0x9b4,0x2809,0x9b8,0x280f,0x9bc,0x2815,0x9c0,0x281b,0x9c4,0x2821,
+0x9c8,0x2827,0x9cc,0x282d,0x9d0,0x2833,0x9d4,0x2839,0x9d8,0x283f,0x9dc,0x2845,1,0x9e0,0x284b,0x9e4,
+0x2851,0x9e8,0x2857,1,1,1,1,1,0x9ec,0x285d,0x2863,0x9f4,0x2869,0x286f,0x9fc,0x2875,
+0x287b,0xa04,0x2881,0x2887,0xa0c,0x288d,0x2893,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0x2899,1,1,1,1,
+0xfc10,0xfc10,1,1,0xa14,0x289f,1,1,1,1,1,1,1,0xa18,1,1,
+1,1,0xa1c,0x28a5,0xa20,0x28ab,0xa24,0x28b1,0xa28,0x28b7,0xa2c,0x28bd,0xa30,0x28c3,0xa34,0x28c9,
+0xa38,0x28cf,0xa3c,0x28d5,0xa40,0x28db,0xa44,0x28e1,0xa48,0x28e7,1,0xa4c,0x28ed,0xa50,0x28f3,0xa54,
+0x28f9,1,1,1,1,1,0xa58,0x28ff,0x2905,0xa60,0x290b,0x2911,0xa68,0x2917,0x291d,0xa70,
+0x2923,0x2929,0xa78,0x292f,0x2935,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x293b,1,1,0x2941,0x2947,
+0x294d,0x2953,1,1,0xa90,0x2959,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
+1,1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,
+1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,
+1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,
-0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,
-1,1,1,1,1,1,1,1,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,
+0xffcc,0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
+1,1,1,1,1,1,1,1,1,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
 0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
 0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,
+0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,
 0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,
-0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,0x3c66,1,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3c66,0x3c66,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,0x3c66,1,1,1,1,0x3c66,1,1,1,0x3c66,1,0x3c66,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,0x3b97,1,0x2ad5,
-0x2ad9,0x2add,0x2ae1,0x2ae5,0x2ae9,0x2aed,0x2af1,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,
-0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,
-0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,
-0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,
-0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,
-0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d,0x2c31,0x2c35,0x2c39,0x2c3d,0x2b21,0x2c41,0x2c45,0x2c49,0x2c4d,
-0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,
-0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,
-0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,
-0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2d41,0x2d45,0x2d49,0x2d4d,
-0x2c89,0x2d51,0x2d55,0x2d59,0x2d5d,0x2d61,0x2d65,0x2d69,0x2d6d,0x2c49,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,
-0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2db1,0x2db5,0x2db9,0x2dbd,0x2b21,0x2dc1,
-0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,
-0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2e1d,0x2e21,0x2e25,0x2e29,0x2c51,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,
-0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,
-0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,
-0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9,0x2edd,0x2ee1,0x2ee5,0x2ee9,0x2eed,0x2ef1,1,1,0x2ef5,
-1,0x2ef9,1,1,0x2efd,0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,0x2f15,0x2f19,0x2f1d,0x2f21,1,0x2f25,
-1,0x2f29,1,1,0x2f2d,0x2f31,1,1,1,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,
-0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,
-0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,
-0x2fd1,0x2fd5,0x2fd9,0x2fdd,0x2fe1,0x2fe5,0x2d25,0x2fe9,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x2ffd,0x3001,0x3005,
-0x3009,0x300d,0x3011,0x3015,0x3019,0x301d,0x2f2d,0x3021,0x3025,0x3029,0x302d,0x3031,0x3037,1,1,0x303b,
-0x303f,0x3043,0x3047,0x304b,0x304f,0x3053,0x3057,0x2f65,0x305b,0x305f,0x3063,0x2ef5,0x3067,0x306b,0x306f,0x3073,
-0x3077,0x307b,0x307f,0x3083,0x3087,0x308b,0x308f,0x3093,0x2f89,0x3097,0x2f8d,0x309b,0x309f,0x30a3,0x30a7,0x30ab,
-0x2ef9,0x2b75,0x30af,0x30b3,0x30b7,0x2c8d,0x2de9,0x30bb,0x30bf,0x2fa9,0x30c3,0x2fad,0x30c7,0x30cb,0x30cf,0x2f01,
-0x30d3,0x30d7,0x30db,0x30df,0x30e3,0x2f05,0x30e7,0x30eb,0x30ef,0x30f3,0x30f7,0x30fb,0x2fe5,0x30ff,0x3103,0x2d25,
-0x3107,0x2ff5,0x310b,0x310f,0x3113,0x3117,0x311b,0x3009,0x311f,0x2f29,0x3123,0x300d,0x2c41,0x3127,0x3011,0x312b,
-0x3019,0x312f,0x3133,0x3137,0x313b,0x313f,0x3021,0x2f19,0x3143,0x3025,0x3147,0x3029,0x314b,0x2af1,0x314f,0x3155,
-0x315b,0x3161,0x3165,0x3169,0x316d,0x3173,0x3179,0x317f,0x3183,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3186,
-0xfe34,0x318c,1,1,1,1,1,1,1,1,1,1,0x3192,0x3198,0x31a0,0x31aa,
-0x31b2,0x31b8,0x31be,0x31c4,0x31ca,0x31d0,0x31d6,0x31dc,0x31e2,1,0x31e8,0x31ee,0x31f4,0x31fa,0x3200,1,
-0x3206,1,0x320c,0x3212,1,0x3218,0x321e,1,0x3224,0x322a,0x3230,0x3236,0x323c,0x3242,0x3248,0x324e,
-0x3254,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1,0xffcc,0xfe02,0xffb8,
-1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,
-1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,
-1,1,1,1,1,1,1,1,0xa94,0x295f,0xa9a,0x2969,1,1,1,1,
-1,0xaa0,1,1,1,1,1,0x2973,1,1,1,1,1,1,1,1,
-1,0xfe12,0xfc0e,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,
-1,0x297d,0x2987,1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1,
-1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,
-1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12,
-1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1,
-1,1,1,1,1,1,0xab2,1,1,1,0x2991,0x299b,0xfe12,1,1,1,
-1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xfe12,1,1,
-1,0xfe0e,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1,
-1,1,1,1,0xabe,0xfc00,0x29a5,0x29af,0xfc00,0x29b9,1,1,0xfe12,0xfe0e,1,1,
-1,1,1,1,1,1,1,1,1,1,0xad0,0xad6,0x29c3,0x29cd,1,1,
-1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,0xfc00,1,1,1,
-1,0xadc,1,1,0x29d7,1,1,1,1,0xfe12,0xfe12,1,0xfe02,0xfe02,0xfe02,0xfe02,
-0xfe02,1,1,1,1,1,1,1,1,1,1,1,0xfe0c,0xfe0c,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,0x325a,0x3264,0x3278,0x3290,0x32a8,
-0x32c0,0x32d8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,
-1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,
-0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x32e6,0x32f0,0x3304,
-0x331c,0x3334,0x334c,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,
-0xffcc,0xffcc,0xffcc,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,
-1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,
-1,1,1,1,0x335b,0x335f,0x3363,0x3367,0x336d,0x2f4d,0x3371,0x3375,0x3379,0x337d,0x2f51,0x3381,
-0x3385,0x3389,0x2f55,0x338f,0x3393,0x3397,0x339b,0x33a1,0x33a5,0x33a9,0x33ad,0x33b3,0x33b7,0x33bb,0x33bf,0x303f,
-0x33c3,0x33c9,0x33cd,0x33d1,0x33d5,0x33d9,0x33dd,0x33e1,0x33e5,0x3053,0x2f59,0x2f5d,0x3057,0x33e9,0x33ed,0x2c59,
-0x33f1,0x2f61,0x33f5,0x33f9,0x33fd,0x3401,0x3401,0x3401,0x3405,0x340b,0x340f,0x3413,0x3417,0x341d,0x3421,0x3425,
-0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x3441,0x3445,0x3449,0x344d,0x344d,0x305f,0x3451,0x3455,0x3459,0x345d,
-0x2f69,0x3461,0x3465,0x3469,0x2ebd,0x346d,0x3471,0x3475,0x3479,0x347d,0x3481,0x3485,0x3489,0x348d,0x3493,0x3497,
-0x349b,0x349f,0x34a3,0x34a7,0x34ab,0x34b1,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34cb,0x34cf,0x34d3,0x34d7,0x34d7,
-0x34db,0x34e1,0x34e5,0x2c49,0x34e9,0x34ed,0x34f3,0x34f7,0x34fb,0x34ff,0x3503,0x3507,0x2f7d,0x350b,0x350f,0x3513,
-0x3519,0x351d,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3543,0x3547,0x354b,0x354f,0x3555,0x3559,
-0x355d,0x3561,0x2b71,0x3565,0x356b,0x356f,0x356f,0x3575,0x3579,0x3579,0x357d,0x3581,0x3587,0x358d,0x3591,0x3595,
-0x3599,0x359d,0x35a1,0x35a5,0x35a9,0x35ad,0x35b1,0x2f81,0x35b5,0x35bb,0x35bf,0x35c3,0x308f,0x35c3,0x35c7,0x2f89,
-0x35cb,0x35cf,0x35d3,0x35d7,0x2f8d,0x2b05,0x35db,0x35df,0x35e3,0x35e7,0x35eb,0x35ef,0x35f3,0x35f9,0x35fd,0x3601,
-0x3605,0x3609,0x360d,0x3613,0x3617,0x361b,0x361f,0x3623,0x3627,0x362b,0x362f,0x3633,0x2f91,0x3637,0x363b,0x3641,
-0x3645,0x3649,0x364d,0x2f99,0x3651,0x3655,0x3659,0x365d,0x3661,0x3665,0x3669,0x366d,0x2b75,0x30af,0x3671,0x3675,
-0x3679,0x367d,0x3683,0x3687,0x368b,0x368f,0x2f9d,0x3693,0x3699,0x369d,0x36a1,0x3161,0x36a5,0x36a9,0x36ad,0x36b1,
-0x36b5,0x36bb,0x36bf,0x36c3,0x36c7,0x36cd,0x36d1,0x36d5,0x36d9,0x2c8d,0x36dd,0x36e1,0x36e7,0x36ed,0x36f3,0x36f7,
-0x36fd,0x3701,0x3705,0x3709,0x370d,0x2fa1,0x2de9,0x3711,0x3715,0x3719,0x371d,0x3723,0x3727,0x372b,0x372f,0x30bf,
-0x3733,0x3737,0x373d,0x3741,0x3745,0x374b,0x3751,0x3755,0x30c3,0x3759,0x375d,0x3761,0x3765,0x3769,0x376d,0x3771,
-0x3777,0x377b,0x3781,0x3785,0x378b,0x30cb,0x378f,0x3793,0x3799,0x379d,0x37a1,0x37a7,0x37ad,0x37b1,0x37b5,0x37b9,
-0x37bd,0x37bd,0x37c1,0x37c5,0x30d3,0x37c9,0x37cd,0x37d1,0x37d5,0x37d9,0x37df,0x37e3,0x2c55,0x37e9,0x37ef,0x37f3,
-0x37f9,0x37ff,0x3805,0x3809,0x30eb,0x380d,0x3813,0x3819,0x381f,0x3825,0x3829,0x3829,0x30ef,0x3169,0x382d,0x3831,
-0x3835,0x3839,0x383f,0x2bbd,0x30f7,0x3843,0x3847,0x2fcd,0x384d,0x3853,0x2f15,0x3859,0x385d,0x2fdd,0x3861,0x3865,
-0x3869,0x386f,0x386f,0x3875,0x3879,0x387d,0x3883,0x3887,0x388b,0x388f,0x3895,0x3899,0x389d,0x38a1,0x38a5,0x38a9,
-0x38af,0x38b3,0x38b7,0x38bb,0x38bf,0x38c3,0x38c7,0x38cd,0x38d3,0x38d7,0x38dd,0x38e1,0x38e7,0x38eb,0x2ff5,0x38ef,
-0x38f5,0x38fb,0x38ff,0x3905,0x3909,0x390f,0x3913,0x3917,0x391b,0x391f,0x3923,0x3927,0x392d,0x3933,0x3939,0x3575,
-0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x395f,0x3963,0x3967,0x396b,0x2c9d,0x3971,0x3975,0x3979,
-0x397d,0x3981,0x3985,0x3001,0x3989,0x398d,0x3991,0x3995,0x3999,0x399f,0x39a5,0x39ab,0x39af,0x39b3,0x39b7,0x39bb,
-0x39c1,0x39c5,0x39cb,0x39cf,0x39d3,0x39d9,0x39df,0x39e3,0x2ba9,0x39e7,0x39eb,0x39ef,0x39f3,0x39f7,0x39fb,0x3113,
-0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a17,0x3a1b,0x3a1f,0x3a23,0x3a29,0x3a2d,0x3a31,0x3a35,0x3a39,0x3a3d,
-0x3a43,0x3a49,0x3a4d,0x3a51,0x3127,0x312b,0x3a55,0x3a59,0x3a5f,0x3a63,0x3a67,0x3a6b,0x3a6f,0x3a75,0x3a7b,0x3a7f,
-0x3a83,0x3a87,0x3a8d,0x312f,0x3a91,0x3a97,0x3a9d,0x3aa1,0x3aa5,0x3aa9,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3abf,0x3ac3,
-0x3ac7,0x3acb,0x3ad1,0x3ad5,0x3ad9,0x3add,0x3ae3,0x3ae7,0x3aeb,0x3aef,0x3af3,0x3af9,0x3aff,0x3b03,0x3b07,0x3b0b,
-0x3b11,0x3b15,0x3147,0x3147,0x3b1b,0x3b1f,0x3b25,0x3b29,0x3b2d,0x3b31,0x3b35,0x3b39,0x3b3d,0x3b41,0x314b,0x3b47,
-0x3b4b,0x3b4f,0x3b53,0x3b57,0x3b5b,0x3b61,0x3b65,0x3b6b,0x3b71,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,0x3b8b,0x3b8f,
-0x3b93,0x3b97,1,1
+0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,0x3c66,1,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3c66,
+0x3c66,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0x3c66,1,1,1,1,0x3c66,1,1,1,0x3c66,1,0x3c66,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3b97,1,
+0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae5,0x2ae9,0x2aed,0x2af1,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,
+0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,
+0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,
+0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,
+0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,
+0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d,0x2c31,0x2c35,0x2c39,0x2c3d,0x2b21,0x2c41,0x2c45,0x2c49,
+0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,
+0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,
+0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,
+0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2d41,0x2d45,0x2d49,
+0x2d4d,0x2c89,0x2d51,0x2d55,0x2d59,0x2d5d,0x2d61,0x2d65,0x2d69,0x2d6d,0x2c49,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,
+0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2db1,0x2db5,0x2db9,0x2dbd,0x2b21,
+0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,
+0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2e1d,0x2e21,0x2e25,0x2e29,0x2c51,0x2e2d,0x2e31,0x2e35,0x2e39,
+0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,
+0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,
+0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9,0x2edd,0x2ee1,0x2ee5,0x2ee9,0x2eed,0x2ef1,1,1,
+0x2ef5,1,0x2ef9,1,1,0x2efd,0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,0x2f15,0x2f19,0x2f1d,0x2f21,1,
+0x2f25,1,0x2f29,1,1,0x2f2d,0x2f31,1,1,1,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,
+0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,
+0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,
+0x2fcd,0x2fd1,0x2fd5,0x2fd9,0x2fdd,0x2fe1,0x2fe5,0x2d25,0x2fe9,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x2ffd,0x3001,
+0x3005,0x3009,0x300d,0x3011,0x3015,0x3019,0x301d,0x2f2d,0x3021,0x3025,0x3029,0x302d,0x3031,0x3037,1,1,
+0x303b,0x303f,0x3043,0x3047,0x304b,0x304f,0x3053,0x3057,0x2f65,0x305b,0x305f,0x3063,0x2ef5,0x3067,0x306b,0x306f,
+0x3073,0x3077,0x307b,0x307f,0x3083,0x3087,0x308b,0x308f,0x3093,0x2f89,0x3097,0x2f8d,0x309b,0x309f,0x30a3,0x30a7,
+0x30ab,0x2ef9,0x2b75,0x30af,0x30b3,0x30b7,0x2c8d,0x2de9,0x30bb,0x30bf,0x2fa9,0x30c3,0x2fad,0x30c7,0x30cb,0x30cf,
+0x2f01,0x30d3,0x30d7,0x30db,0x30df,0x30e3,0x2f05,0x30e7,0x30eb,0x30ef,0x30f3,0x30f7,0x30fb,0x2fe5,0x30ff,0x3103,
+0x2d25,0x3107,0x2ff5,0x310b,0x310f,0x3113,0x3117,0x311b,0x3009,0x311f,0x2f29,0x3123,0x300d,0x2c41,0x3127,0x3011,
+0x312b,0x3019,0x312f,0x3133,0x3137,0x313b,0x313f,0x3021,0x2f19,0x3143,0x3025,0x3147,0x3029,0x314b,0x2af1,0x314f,
+0x3155,0x315b,0x3161,0x3165,0x3169,0x316d,0x3173,0x3179,0x317f,0x3183,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0x3186,0xfe34,0x318c,1,1,1,1,1,1,1,1,1,1,0x3192,0x3198,0x31a0,
+0x31aa,0x31b2,0x31b8,0x31be,0x31c4,0x31ca,0x31d0,0x31d6,0x31dc,0x31e2,1,0x31e8,0x31ee,0x31f4,0x31fa,0x3200,
+1,0x3206,1,0x320c,0x3212,1,0x3218,0x321e,1,0x3224,0x322a,0x3230,0x3236,0x323c,0x3242,0x3248,
+0x324e,0x3254,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
+0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1,0xffcc,0xfe02,
+0xffb8,1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,
+1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,
+1,1,0xffcc,0xffb8,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,
+0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
+1,1,1,1,1,1,1,1,1,0xa94,0x295f,0xa9a,0x2969,1,1,1,
+1,1,0xaa0,1,1,1,1,1,0x2973,1,1,1,1,1,1,1,
+1,1,0xfe12,0xfc0e,1,1,1,1,1,1,1,0xfc00,1,1,1,1,
+1,1,0x297d,0x2987,1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,
+1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,
+1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,0xfe0e,
+0xfe12,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,
+1,1,1,1,1,1,1,0xab2,1,1,1,0x2991,0x299b,0xfe12,1,1,
+1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xfe12,1,
+1,1,0xfe0e,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,
+1,1,1,1,1,0xabe,0xfc00,0x29a5,0x29af,0xfc00,0x29b9,1,1,0xfe12,0xfe0e,1,
+1,1,1,1,1,1,1,1,1,1,1,0xad0,0xad6,0x29c3,0x29cd,1,
+1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,0xfc00,1,1,
+1,1,0xadc,1,1,0x29d7,1,1,1,1,0xfe12,0xfe12,1,0xfe02,0xfe02,0xfe02,
+0xfe02,0xfe02,1,1,1,1,1,1,1,1,1,1,1,0xfe0c,0xfe0c,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,0x325a,0x3264,0x3278,0x3290,
+0x32a8,0x32c0,0x32d8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,
+1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,0xffcc,
+0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x32e6,0x32f0,
+0x3304,0x331c,0x3334,0x334c,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,
+0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
+1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,
+1,1,1,1,1,0x335b,0x335f,0x3363,0x3367,0x336d,0x2f4d,0x3371,0x3375,0x3379,0x337d,0x2f51,
+0x3381,0x3385,0x3389,0x2f55,0x338f,0x3393,0x3397,0x339b,0x33a1,0x33a5,0x33a9,0x33ad,0x33b3,0x33b7,0x33bb,0x33bf,
+0x303f,0x33c3,0x33c9,0x33cd,0x33d1,0x33d5,0x33d9,0x33dd,0x33e1,0x33e5,0x3053,0x2f59,0x2f5d,0x3057,0x33e9,0x33ed,
+0x2c59,0x33f1,0x2f61,0x33f5,0x33f9,0x33fd,0x3401,0x3401,0x3401,0x3405,0x340b,0x340f,0x3413,0x3417,0x341d,0x3421,
+0x3425,0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x3441,0x3445,0x3449,0x344d,0x344d,0x305f,0x3451,0x3455,0x3459,
+0x345d,0x2f69,0x3461,0x3465,0x3469,0x2ebd,0x346d,0x3471,0x3475,0x3479,0x347d,0x3481,0x3485,0x3489,0x348d,0x3493,
+0x3497,0x349b,0x349f,0x34a3,0x34a7,0x34ab,0x34b1,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34cb,0x34cf,0x34d3,0x34d7,
+0x34d7,0x34db,0x34e1,0x34e5,0x2c49,0x34e9,0x34ed,0x34f3,0x34f7,0x34fb,0x34ff,0x3503,0x3507,0x2f7d,0x350b,0x350f,
+0x3513,0x3519,0x351d,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3543,0x3547,0x354b,0x354f,0x3555,
+0x3559,0x355d,0x3561,0x2b71,0x3565,0x356b,0x356f,0x356f,0x3575,0x3579,0x3579,0x357d,0x3581,0x3587,0x358d,0x3591,
+0x3595,0x3599,0x359d,0x35a1,0x35a5,0x35a9,0x35ad,0x35b1,0x2f81,0x35b5,0x35bb,0x35bf,0x35c3,0x308f,0x35c3,0x35c7,
+0x2f89,0x35cb,0x35cf,0x35d3,0x35d7,0x2f8d,0x2b05,0x35db,0x35df,0x35e3,0x35e7,0x35eb,0x35ef,0x35f3,0x35f9,0x35fd,
+0x3601,0x3605,0x3609,0x360d,0x3613,0x3617,0x361b,0x361f,0x3623,0x3627,0x362b,0x362f,0x3633,0x2f91,0x3637,0x363b,
+0x3641,0x3645,0x3649,0x364d,0x2f99,0x3651,0x3655,0x3659,0x365d,0x3661,0x3665,0x3669,0x366d,0x2b75,0x30af,0x3671,
+0x3675,0x3679,0x367d,0x3683,0x3687,0x368b,0x368f,0x2f9d,0x3693,0x3699,0x369d,0x36a1,0x3161,0x36a5,0x36a9,0x36ad,
+0x36b1,0x36b5,0x36bb,0x36bf,0x36c3,0x36c7,0x36cd,0x36d1,0x36d5,0x36d9,0x2c8d,0x36dd,0x36e1,0x36e7,0x36ed,0x36f3,
+0x36f7,0x36fd,0x3701,0x3705,0x3709,0x370d,0x2fa1,0x2de9,0x3711,0x3715,0x3719,0x371d,0x3723,0x3727,0x372b,0x372f,
+0x30bf,0x3733,0x3737,0x373d,0x3741,0x3745,0x374b,0x3751,0x3755,0x30c3,0x3759,0x375d,0x3761,0x3765,0x3769,0x376d,
+0x3771,0x3777,0x377b,0x3781,0x3785,0x378b,0x30cb,0x378f,0x3793,0x3799,0x379d,0x37a1,0x37a7,0x37ad,0x37b1,0x37b5,
+0x37b9,0x37bd,0x37bd,0x37c1,0x37c5,0x30d3,0x37c9,0x37cd,0x37d1,0x37d5,0x37d9,0x37df,0x37e3,0x2c55,0x37e9,0x37ef,
+0x37f3,0x37f9,0x37ff,0x3805,0x3809,0x30eb,0x380d,0x3813,0x3819,0x381f,0x3825,0x3829,0x3829,0x30ef,0x3169,0x382d,
+0x3831,0x3835,0x3839,0x383f,0x2bbd,0x30f7,0x3843,0x3847,0x2fcd,0x384d,0x3853,0x2f15,0x3859,0x385d,0x2fdd,0x3861,
+0x3865,0x3869,0x386f,0x386f,0x3875,0x3879,0x387d,0x3883,0x3887,0x388b,0x388f,0x3895,0x3899,0x389d,0x38a1,0x38a5,
+0x38a9,0x38af,0x38b3,0x38b7,0x38bb,0x38bf,0x38c3,0x38c7,0x38cd,0x38d3,0x38d7,0x38dd,0x38e1,0x38e7,0x38eb,0x2ff5,
+0x38ef,0x38f5,0x38fb,0x38ff,0x3905,0x3909,0x390f,0x3913,0x3917,0x391b,0x391f,0x3923,0x3927,0x392d,0x3933,0x3939,
+0x3575,0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x395f,0x3963,0x3967,0x396b,0x2c9d,0x3971,0x3975,
+0x3979,0x397d,0x3981,0x3985,0x3001,0x3989,0x398d,0x3991,0x3995,0x3999,0x399f,0x39a5,0x39ab,0x39af,0x39b3,0x39b7,
+0x39bb,0x39c1,0x39c5,0x39cb,0x39cf,0x39d3,0x39d9,0x39df,0x39e3,0x2ba9,0x39e7,0x39eb,0x39ef,0x39f3,0x39f7,0x39fb,
+0x3113,0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a17,0x3a1b,0x3a1f,0x3a23,0x3a29,0x3a2d,0x3a31,0x3a35,0x3a39,
+0x3a3d,0x3a43,0x3a49,0x3a4d,0x3a51,0x3127,0x312b,0x3a55,0x3a59,0x3a5f,0x3a63,0x3a67,0x3a6b,0x3a6f,0x3a75,0x3a7b,
+0x3a7f,0x3a83,0x3a87,0x3a8d,0x312f,0x3a91,0x3a97,0x3a9d,0x3aa1,0x3aa5,0x3aa9,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3abf,
+0x3ac3,0x3ac7,0x3acb,0x3ad1,0x3ad5,0x3ad9,0x3add,0x3ae3,0x3ae7,0x3aeb,0x3aef,0x3af3,0x3af9,0x3aff,0x3b03,0x3b07,
+0x3b0b,0x3b11,0x3b15,0x3147,0x3147,0x3b1b,0x3b1f,0x3b25,0x3b29,0x3b2d,0x3b31,0x3b35,0x3b39,0x3b3d,0x3b41,0x314b,
+0x3b47,0x3b4b,0x3b4f,0x3b53,0x3b57,0x3b5b,0x3b61,0x3b65,0x3b6b,0x3b71,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,0x3b8b,
+0x3b8f,0x3b93,0x3b97,1,1,1
 };
 
 static const UCPTrie norm2_nfc_data_trie={
     norm2_nfc_data_trieIndex,
     { norm2_nfc_data_trieData },
-    1746, 7892,
+    1748, 7974,
     0x2fc00, 0x30,
     0, 0,
     0, 0,
@@ -1128,7 +1131,7 @@ static const uint16_t norm2_nfc_data_extraData[7732]={
 };
 
 static const uint8_t norm2_nfc_data_smallFCD[256]={
-0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xc7,0xe6,0x66,0x46,0x64,0x46,0x66,0x5b,
+0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xd7,0xe6,0x66,0x46,0x66,0x46,0x66,0x5b,
 0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff,
 0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0,
 0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

+ 2 - 2
thirdparty/icu4c/common/normalizer2impl.cpp

@@ -86,7 +86,7 @@ UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) {
     case 4:
         return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f);
     default:
-        UPRV_UNREACHABLE;  // Should not occur.
+        UPRV_UNREACHABLE_EXIT;  // Should not occur.
     }
 }
 
@@ -2504,7 +2504,7 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode
             UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK);
             canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size();
             umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode);
-            canonStartSets.addElement(set, errorCode);
+            canonStartSets.addElementX(set, errorCode);
             if(firstOrigin!=0) {
                 set->add(firstOrigin);
             }

+ 2 - 2
thirdparty/icu4c/common/normalizer2impl.h

@@ -241,7 +241,7 @@ private:
  * Low-level implementation of the Unicode Normalization Algorithm.
  * For the data structure and details see the documentation at the end of
  * this normalizer2impl.h and in the design doc at
- * http://site.icu-project.org/design/normalization/custom
+ * https://icu.unicode.org/design/normalization/custom
  */
 class U_COMMON_API Normalizer2Impl : public UObject {
 public:
@@ -806,7 +806,7 @@ unorm_getFCD16(UChar32 c);
  * Constants are defined as enum values of the Normalizer2Impl class.
  *
  * Many details of the data structures are described in the design doc
- * which is at http://site.icu-project.org/design/normalization/custom
+ * which is at https://icu.unicode.org/design/normalization/custom
  *
  * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4;
  *

+ 1 - 1
thirdparty/icu4c/common/normlzr.cpp

@@ -108,7 +108,7 @@ int32_t Normalizer::hashCode() const
     return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex;
 }
     
-UBool Normalizer::operator==(const Normalizer& that) const
+bool Normalizer::operator==(const Normalizer& that) const
 {
     return
         this==&that ||

+ 2 - 2
thirdparty/icu4c/common/pluralmap.h

@@ -46,7 +46,7 @@ public:
 
     /**
      * Converts a category name such as "zero", "one", "two", "few", "many"
-     * or "other" to a category enum.  Returns NONE for urecongized
+     * or "other" to a category enum.  Returns NONE for unrecognized
      * category name.
      */
     static Category toCategory(const UnicodeString &categoryName);
@@ -62,7 +62,7 @@ public:
  * A Map of plural categories to values. It maintains ownership of the
  * values.
  *
- * Type T is the value type. T must provide the followng:
+ * Type T is the value type. T must provide the following:
  * 1) Default constructor
  * 2) Copy constructor
  * 3) Assignment operator

File diff suppressed because it is too large
+ 956 - 935
thirdparty/icu4c/common/propname_data.h


+ 10 - 1
thirdparty/icu4c/common/putil.cpp

@@ -727,8 +727,10 @@ static char *gTimeZoneBufferPtr = NULL;
 
 #if !U_PLATFORM_USES_ONLY_WIN32_API
 #define isNonDigit(ch) (ch < '0' || '9' < ch)
+#define isDigit(ch) ('0' <= ch && ch <= '9')
 static UBool isValidOlsonID(const char *id) {
     int32_t idx = 0;
+    int32_t idxMax = 0;
 
     /* Determine if this is something like Iceland (Olson ID)
     or AST4ADT (non-Olson ID) */
@@ -736,6 +738,13 @@ static UBool isValidOlsonID(const char *id) {
         idx++;
     }
 
+    /* Allow at maximum 2 numbers at the end of the id to support zone id's
+    like GMT+11. */
+    idxMax = idx + 2;
+    while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
+        idx++;
+    }
+
     /* If we went through the whole string, then it might be okay.
     The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
     "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
@@ -918,7 +927,7 @@ static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFil
         if (sizeFile != tzInfo->defaultTZFileSize) {
             result = FALSE;
         } else {
-            /* Store the data from the files in seperate buffers and
+            /* Store the data from the files in separate buffers and
              * compare each byte to determine equality.
              */
             if (tzInfo->defaultTZBuffer == NULL) {

+ 12 - 11
thirdparty/icu4c/common/rbbi.cpp

@@ -262,7 +262,7 @@ RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) {
     fCharIter = &fSCharIter;
 
     if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) {
-        // This is a little bit tricky - it will intially appear that
+        // This is a little bit tricky - it will initially appear that
         //  this->fCharIter is adopted, even if that->fCharIter was
         //  not adopted.  That's ok.
         fCharIter = that.fCharIter->clone();
@@ -366,16 +366,16 @@ RuleBasedBreakIterator::clone() const {
 }
 
 /**
- * Equality operator.  Returns TRUE if both BreakIterators are of the
+ * Equality operator.  Returns true if both BreakIterators are of the
  * same class, have the same behavior, and iterate over the same text.
  */
-UBool
+bool
 RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
     if (typeid(*this) != typeid(that)) {
-        return FALSE;
+        return false;
     }
     if (this == &that) {
-        return TRUE;
+        return true;
     }
 
     // The base class BreakIterator carries no state that participates in equality,
@@ -388,21 +388,21 @@ RuleBasedBreakIterator::operator==(const BreakIterator& that) const {
         // The two break iterators are operating on different text,
         //   or have a different iteration position.
         //   Note that fText's position is always the same as the break iterator's position.
-        return FALSE;
+        return false;
     }
 
     if (!(fPosition == that2.fPosition &&
             fRuleStatusIndex == that2.fRuleStatusIndex &&
             fDone == that2.fDone)) {
-        return FALSE;
+        return false;
     }
 
     if (that2.fData == fData ||
         (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) {
             // The two break iterators are using the same rules.
-            return TRUE;
+            return true;
         }
-    return FALSE;
+    return false;
 }
 
 /**
@@ -671,7 +671,7 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) {
 }
 
 /**
- * Returns true if the specfied position is a boundary position.  As a side
+ * Returns true if the specified position is a boundary position.  As a side
  * effect, leaves the iterator pointing to the first boundary position at
  * or after "offset".
  *
@@ -1037,7 +1037,7 @@ int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) {
 
         if (state == STOP_STATE) {
             // This is the normal exit from the lookup state machine.
-            // Transistion to state zero means we have found a safe point.
+            // Transition to state zero means we have found a safe point.
             break;
         }
     }
@@ -1260,6 +1260,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) {
         // first.
         fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status);
         // If we can't insert it, or creation failed, get rid of it
+        U_ASSERT(!fLanguageBreakEngines->hasDeleter());
         if (U_FAILURE(status)) {
             delete fUnhandledBreakEngine;
             fUnhandledBreakEngine = 0;

+ 5 - 5
thirdparty/icu4c/common/rbbi_cache.cpp

@@ -74,7 +74,7 @@ UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_
             return TRUE;
         }
     }
-    UPRV_UNREACHABLE;
+    UPRV_UNREACHABLE_EXIT;
 }
 
 
@@ -114,7 +114,7 @@ UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_
             return TRUE;
         }
     }
-    UPRV_UNREACHABLE;
+    UPRV_UNREACHABLE_EXIT;
 }
 
 void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
@@ -163,7 +163,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
         // Ask the language object if there are any breaks. It will add them to the cache and
         // leave the text pointer on the other side of its range, ready to search for the next one.
         if (lbe != NULL) {
-            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
+            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, status);
         }
 
         // Reload the loop variables for the next go-round
@@ -201,7 +201,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo
 
 
 /*
- *   BreakCache implemetation
+ *   BreakCache implementation
  */
 
 RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
@@ -386,7 +386,7 @@ UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorC
         // Add following position(s) to the cache.
         while (fBoundaries[fEndBufIdx] < position) {
             if (!populateFollowing()) {
-                UPRV_UNREACHABLE;
+                UPRV_UNREACHABLE_EXIT;
             }
         }
         fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.

+ 6 - 6
thirdparty/icu4c/common/rbbidata.cpp

@@ -170,17 +170,17 @@ RBBIDataWrapper::~RBBIDataWrapper() {
 //                  should still be ==.
 //
 //-----------------------------------------------------------------------------
-UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
+bool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const {
     if (fHeader == other.fHeader) {
-        return TRUE;
+        return true;
     }
     if (fHeader->fLength != other.fHeader->fLength) {
-        return FALSE;
+        return false;
     }
     if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) {
-        return TRUE;
+        return true;
     }
-    return FALSE;
+    return false;
 }
 
 int32_t  RBBIDataWrapper::hashCode() {
@@ -283,7 +283,7 @@ void  RBBIDataWrapper::printData() {
     printTable("Forward State Transition Table", fForwardTable);
     printTable("Reverse State Transition Table", fReverseTable);
 
-    RBBIDebugPrintf("\nOrignal Rules source:\n");
+    RBBIDebugPrintf("\nOriginal Rules source:\n");
     for (int32_t c=0; fRuleSource[c] != 0; c++) {
         RBBIDebugPrintf("%c", fRuleSource[c]);
     }

+ 1 - 1
thirdparty/icu4c/common/rbbidata.h

@@ -171,7 +171,7 @@ public:
     void                  init(const RBBIDataHeader *data, UErrorCode &status);
     RBBIDataWrapper      *addReference();
     void                  removeReference();
-    UBool                 operator ==(const RBBIDataWrapper &other) const;
+    bool                  operator ==(const RBBIDataWrapper &other) const;
     int32_t               hashCode();
     const UnicodeString  &getRuleSourceString() const;
     void                  printData();

+ 2 - 1
thirdparty/icu4c/common/rbbinode.cpp

@@ -266,6 +266,7 @@ void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &s
     if (U_FAILURE(status)) {
         return;
     }
+    U_ASSERT(!dest->hasDeleter());
     if (fType == kind) {
         dest->addElement(this, status);
     }
@@ -351,7 +352,7 @@ void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
     printNode(node);
     if (node != NULL) {
         // Only dump the definition under a variable reference if asked to.
-        // Unconditinally dump children of all other node types.
+        // Unconditionally dump children of all other node types.
         if (node->fType != varRef) {
             if (node->fLeftChild != NULL) {
                 printTree(node->fLeftChild, FALSE);

+ 2 - 2
thirdparty/icu4c/common/rbbinode.h

@@ -71,7 +71,7 @@ class RBBINode : public UMemory {
         int           fLastPos;             //  Last position in the rule source string
                                             //    of any text associated with this node.
                                             //    If there's a right child, this will be the same
-                                            //    as that child's last postion.
+                                            //    as that child's last position.
 
         UBool         fNullable;            // See Aho.
         int32_t       fVal;                 // For leafChar nodes, the value.
@@ -108,7 +108,7 @@ class RBBINode : public UMemory {
 
     private:
         RBBINode &operator = (const RBBINode &other); // No defs.
-        UBool operator == (const RBBINode &other);    // Private, so these functions won't accidently be used.
+        bool operator == (const RBBINode &other);     // Private, so these functions won't accidentally be used.
 
 #ifdef RBBI_DEBUG
     public:

+ 3 - 3
thirdparty/icu4c/common/rbbirb.h

@@ -73,10 +73,10 @@ private:
 
 public:
     //  API inherited from class SymbolTable
-    virtual const UnicodeString*  lookup(const UnicodeString& s) const;
-    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
+    virtual const UnicodeString*  lookup(const UnicodeString& s) const override;
+    virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const override;
     virtual UnicodeString parseReference(const UnicodeString& text,
-                                         ParsePosition& pos, int32_t limit) const;
+                                         ParsePosition& pos, int32_t limit) const override;
 
     //  Additional Functions
     RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);

+ 5 - 5
thirdparty/icu4c/common/rbbiscan.cpp

@@ -175,7 +175,7 @@ RBBIRuleScanner::~RBBIRuleScanner() {
 
     // Node Stack.
     //   Normally has one entry, which is the entire parse tree for the rules.
-    //   If errors occured, there may be additional subtrees left on the stack.
+    //   If errors occurred, there may be additional subtrees left on the stack.
     while (fNodeStackPtr > 0) {
         delete fNodeStack[fNodeStackPtr];
         fNodeStackPtr--;
@@ -375,7 +375,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
         RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree);
 
         if (*destRules != NULL) {
-            // This is not the first rule encounted.
+            // This is not the first rule encountered.
             // OR previous stuff  (from *destRules)
             // with the current rule expression (on the Node Stack)
             //  with the resulting OR expression going to *destRules
@@ -1223,7 +1223,7 @@ void RBBIRuleScanner::scanSet() {
         //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
         //         UnicodeSet appears to not be reporting correctly at this time.
         #ifdef RBBI_DEBUG
-            RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+            RBBIDebugPrintf("UnicodeSet parse position.ErrorIndex = %d\n", pos.getIndex());
         #endif
         error(localStatus);
         delete uset;
@@ -1244,7 +1244,7 @@ void RBBIRuleScanner::scanSet() {
     }
 
 
-    // Advance the RBBI parse postion over the UnicodeSet pattern.
+    // Advance the RBBI parse position over the UnicodeSet pattern.
     //   Don't just set fScanIndex because the line/char positions maintained
     //   for error reporting would be thrown off.
     i = pos.getIndex();
@@ -1267,7 +1267,7 @@ void RBBIRuleScanner::scanSet() {
         fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
         //  findSetFor() serves several purposes here:
         //     - Adopts storage for the UnicodeSet, will be responsible for deleting.
-        //     - Mantains collection of all sets in use, needed later for establishing
+        //     - Maintains collection of all sets in use, needed later for establishing
         //          character categories for run time engine.
         //     - Eliminates mulitiple instances of the same set.
         //     - Creates a new uset node if necessary (if this isn't a duplicate.)

+ 1 - 1
thirdparty/icu4c/common/rbbiscan.h

@@ -144,7 +144,7 @@ private:
 
     UnicodeSet                     fRuleSets[10];    // Unicode Sets that are needed during
                                                      //  the scanning of RBBI rules.  The
-                                                     //  indicies for these are assigned by the
+                                                     //  indices for these are assigned by the
                                                      //  perl script that builds the state tables.
                                                      //  See rbbirpt.h.
 

+ 1 - 1
thirdparty/icu4c/common/rbbistbl.cpp

@@ -63,7 +63,7 @@ RBBISymbolTable::~RBBISymbolTable()
 
 
 //
-//  RBBISymbolTable::lookup       This function from the abstract symbol table inteface
+//  RBBISymbolTable::lookup       This function from the abstract symbol table interface
 //                                looks up a variable name and returns a UnicodeString
 //                                containing the substitution text.
 //

+ 22 - 7
thirdparty/icu4c/common/rbbitblb.cpp

@@ -79,7 +79,7 @@ void  RBBITableBuilder::buildForwardTable() {
 
     //
     // Walk through the tree, replacing any references to $variables with a copy of the
-    //   parse tree for the substition expression.
+    //   parse tree for the substitution expression.
     //
     fTree = fTree->flattenVariables();
 #ifdef RBBI_DEBUG
@@ -390,6 +390,7 @@ void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
     if (node == NULL || U_FAILURE(*fStatus)) {
         return;
     }
+    U_ASSERT(!dest->hasDeleter());
     if (node->fRuleRoot) {
         dest->addElement(node, *fStatus);
         // Note: rules cannot nest. If we found a rule start node,
@@ -694,7 +695,7 @@ void RBBITableBuilder::buildStateTable() {
         }
     }
     return;
-    // delete local pointers only if error occured.
+    // delete local pointers only if error occurred.
 ExitBuildSTdeleteall:
     delete initialState;
     delete failState;
@@ -1042,6 +1043,8 @@ void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
 //
 //-----------------------------------------------------------------------------
 void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
+    U_ASSERT(!dest->hasDeleter());
+    U_ASSERT(!source->hasDeleter());
     int32_t destOriginalSize = dest->size();
     int32_t sourceSize       = source->size();
     int32_t di           = 0;
@@ -1070,6 +1073,9 @@ void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
     (void) source->toArray(sourcePtr);
 
     dest->setSize(sourceSize+destOriginalSize, *fStatus);
+    if (U_FAILURE(*fStatus)) {
+        return;
+    }
 
     while (sourcePtr < sourceLim && destPtr < destLim) {
         if (*destPtr == *sourcePtr) {
@@ -1431,7 +1437,7 @@ void RBBITableBuilder::exportTable(void *where) {
 void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
     // The safe table creation has three steps:
 
-    // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
+    // 1. Identify pairs of character classes that are "safe." Safe means that boundaries
     // following the pair do not depend on context or state before the pair. To test
     // whether a pair is safe, run it through the main forward state table, starting
     // from each state. If the the final state is the same, no matter what the starting state,
@@ -1445,7 +1451,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
     // the first of a pair. In each of these rows, the entry for the second character
     // of a safe pair is set to the stop state (0), indicating that a match was found.
     // All other table entries are set to the state corresponding the current input
-    // character, allowing that charcter to be the of a start following pair.
+    // character, allowing that character to be the of a start following pair.
     //
     // Because the safe rules are to be run in reverse, moving backwards in the text,
     // the first and second pair categories are swapped when building the table.
@@ -1490,16 +1496,25 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
     // The table as a whole is UVector<UnicodeString>
     // Each row is represented by a UnicodeString, being used as a Vector<int16>.
     // Row 0 is the stop state.
-    // Row 1 is the start sate.
+    // Row 1 is the start state.
     // Row 2 and beyond are other states, initially one per char class, but
     //   after initial construction, many of the states will be combined, compacting the table.
     // The String holds the nextState data only. The four leading fields of a row, fAccepting,
     // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
 
     U_ASSERT(fSafeTable == nullptr);
-    fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
+    LocalPointer<UVector> lpSafeTable(
+        new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status), status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+    fSafeTable = lpSafeTable.orphan();
     for (int32_t row=0; row<numCharClasses + 2; ++row) {
-        fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
+        LocalPointer<UnicodeString> lpString(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
+        fSafeTable->adoptElement(lpString.orphan(), status);
+    }
+    if (U_FAILURE(status)) {
+        return;
     }
 
     // From the start state, each input char class transitions to the state for that input.

+ 2 - 2
thirdparty/icu4c/common/rbbitblb.h

@@ -69,12 +69,12 @@ public:
     bool     findDuplCharClassFrom(IntPair *categories);
 
     /** Remove a column from the state table. Used when two character categories
-     *  have been found equivalent, and merged together, to eliminate the uneeded table column.
+     *  have been found equivalent, and merged together, to eliminate the unneeded table column.
      */
     void     removeColumn(int32_t column);
 
     /**
-     * Check for, and remove dupicate states (table rows).
+     * Check for, and remove duplicate states (table rows).
      * @return the number of states removed.
      */
     int32_t  removeDuplicateStates();

+ 1 - 1
thirdparty/icu4c/common/resbund.cpp

@@ -135,7 +135,7 @@ U_NAMESPACE_BEGIN
  * so forth, until the chain is exhausted or the tag is found.
  *
  * Thread-safety is implemented around caches, both the cache that
- * stores all the resouce data, and the cache that stores flags
+ * stores all the resource data, and the cache that stores flags
  * indicating whether or not a file has been visited.  These caches
  * delete their storage at static cleanup time, when the process
  * quits.

+ 1 - 1
thirdparty/icu4c/common/ruleiter.h

@@ -94,7 +94,7 @@ public:
      * position.
      * @param text the text to be iterated
      * @param sym the symbol table, or null if there is none.  If sym is null,
-     * then variables will not be deferenced, even if the PARSE_VARIABLES
+     * then variables will not be dereferenced, even if the PARSE_VARIABLES
      * option is set.
      * @param pos upon input, the index of the next character to return.  If a
      * variable has been dereferenced, then pos will <em>not</em> increment as

+ 3 - 3
thirdparty/icu4c/common/schriter.cpp

@@ -79,10 +79,10 @@ StringCharacterIterator::operator=(const StringCharacterIterator& that) {
     return *this;
 }
 
-UBool
+bool
 StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
     if (this == &that) {
-        return TRUE;
+        return true;
     }
 
     // do not call UCharCharacterIterator::operator==()
@@ -90,7 +90,7 @@ StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const
     // while we compare UnicodeString objects
 
     if (typeid(*this) != typeid(that)) {
-        return FALSE;
+        return false;
     }
 
     StringCharacterIterator&    realThat = (StringCharacterIterator&)that;

+ 45 - 56
thirdparty/icu4c/common/serv.cpp

@@ -8,6 +8,7 @@
 */
 
 #include "unicode/utypes.h"
+#include "unicode/localpointer.h"
 
 #if !UCONFIG_NO_SERVICE
 
@@ -237,7 +238,7 @@ public:
     * you're removing pointer to this somewhere.  Management of that
     * pointer will have to know how to deal with refcounts.  Once
     * the refcount drops to zero, the resource is released.  Return
-    * false if the resouce has been released.
+    * false if the resource has been released.
     */
     CacheEntry* unref() {
         if ((--refcount) == 0) {
@@ -256,20 +257,13 @@ public:
     }
 };
 
-// UObjectDeleter for serviceCache
+// Deleter for serviceCache
 U_CDECL_BEGIN
 static void U_CALLCONV
 cacheDeleter(void* obj) {
     U_NAMESPACE_USE ((CacheEntry*)obj)->unref();
 }
 
-/**
-* Deleter for UObjects
-*/
-static void U_CALLCONV
-deleteUObject(void *obj) {
-    U_NAMESPACE_USE delete (UObject*) obj;
-}
 U_CDECL_END
 
 /*
@@ -418,12 +412,6 @@ private:
     UBool fActive;
 };
 
-struct UVectorDeleter {
-    UVector* _obj;
-    UVectorDeleter() : _obj(NULL) {}
-    ~UVectorDeleter() { delete _obj; }
-};
-
 // called only by factories, treat as private
 UObject* 
 ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const 
@@ -454,6 +442,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
         if (serviceCache == NULL) {
             ncthis->serviceCache = new Hashtable(status);
             if (ncthis->serviceCache == NULL) {
+                status = U_MEMORY_ALLOCATION_ERROR;
                 return NULL;
             }
             if (U_FAILURE(status)) {
@@ -464,7 +453,7 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
         }
 
         UnicodeString currentDescriptor;
-        UVectorDeleter cacheDescriptorList;
+        LocalPointer<UVector> cacheDescriptorList;
         UBool putInCache = FALSE;
 
         int32_t startIndex = 0;
@@ -502,18 +491,17 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
             int32_t index = startIndex;
             while (index < limit) {
                 ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++);
-                UObject* service = f->create(key, this, status);
+                LocalPointer<UObject> service(f->create(key, this, status));
                 if (U_FAILURE(status)) {
-                    delete service;
                     return NULL;
                 }
-                if (service != NULL) {
-                    result = new CacheEntry(currentDescriptor, service);
+                if (service.isValid()) {
+                    result = new CacheEntry(currentDescriptor, service.getAlias());
                     if (result == NULL) {
-                        delete service;
                         status = U_MEMORY_ALLOCATION_ERROR;
                         return NULL;
                     }
+                    service.orphan(); // result now owns service.
 
                     goto outerEnd;
                 }
@@ -524,19 +512,22 @@ ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUSer
             // don't want to keep querying on an id that's going to
             // fallback to the one that succeeded, we want to hit the
             // cache the first time next goaround.
-            if (cacheDescriptorList._obj == NULL) {
-                cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status);
+            if (cacheDescriptorList.isNull()) {
+                cacheDescriptorList.adoptInsteadAndCheckErrorCode(new UVector(uprv_deleteUObject, NULL, 5, status), status);
                 if (U_FAILURE(status)) {
                     return NULL;
                 }
             }
-            UnicodeString* idToCache = new UnicodeString(currentDescriptor);
-            if (idToCache == NULL || idToCache->isBogus()) {
+
+            LocalPointer<UnicodeString> idToCache(new UnicodeString(currentDescriptor), status);
+            if (U_FAILURE(status)) {
+                return NULL;
+            }
+            if (idToCache->isBogus()) {
                 status = U_MEMORY_ALLOCATION_ERROR;
                 return NULL;
             }
-
-            cacheDescriptorList._obj->addElement(idToCache, status);
+            cacheDescriptorList->adoptElement(idToCache.orphan(), status);
             if (U_FAILURE(status)) {
                 return NULL;
             }
@@ -550,9 +541,9 @@ outerEnd:
                     return NULL;
                 }
 
-                if (cacheDescriptorList._obj != NULL) {
-                    for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) {
-                        UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i);
+                if (cacheDescriptorList.isValid()) {
+                    for (int32_t i = cacheDescriptorList->size(); --i >= 0;) {
+                        UnicodeString* desc = (UnicodeString*)cacheDescriptorList->elementAt(i);
 
                         serviceCache->put(*desc, result, status);
                         if (U_FAILURE(status)) {
@@ -560,7 +551,7 @@ outerEnd:
                         }
 
                         result->ref();
-                        cacheDescriptorList._obj->removeElementAt(i);
+                        cacheDescriptorList->removeElementAt(i);
                     }
                 }
             }
@@ -613,6 +604,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
     if (U_FAILURE(status)) {
         return result;
     }
+    UObjectDeleter *savedDeleter = result.setDeleter(uprv_deleteUObject);
 
     {
         Mutex mutex(&lock);
@@ -620,7 +612,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
         if (map != NULL) {
             ICUServiceKey* fallbackKey = createKey(matchID, status);
 
-            for (int32_t pos = UHASH_FIRST;;) {
+            for (int32_t pos = UHASH_FIRST; U_SUCCESS(status); ) {
                 const UHashElement* e = map->nextElement(pos);
                 if (e == NULL) {
                     break;
@@ -633,17 +625,11 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
                     }
                 }
 
-                UnicodeString* idClone = new UnicodeString(*id);
-                if (idClone == NULL || idClone->isBogus()) {
-                    delete idClone;
+                LocalPointer<UnicodeString> idClone(new UnicodeString(*id), status);
+                if (U_SUCCESS(status) && idClone->isBogus()) {
                     status = U_MEMORY_ALLOCATION_ERROR;
-                    break;
-                }
-                result.addElement(idClone, status);
-                if (U_FAILURE(status)) {
-                    delete idClone;
-                    break;
                 }
+                result.adoptElement(idClone.orphan(), status);
             }
             delete fallbackKey;
         }
@@ -651,6 +637,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC
     if (U_FAILURE(status)) {
         result.removeAllElements();
     }
+    result.setDeleter(savedDeleter);
     return result;
 }
 
@@ -798,7 +785,7 @@ ICUService::getDisplayNames(UVector& result,
         }
         const UnicodeString* dn = (const UnicodeString*)entry->key.pointer;
         StringPair* sp = StringPair::create(*id, *dn, status);
-        result.addElement(sp, status);
+        result.adoptElement(sp, status);
         if (U_FAILURE(status)) {
             result.removeAllElements();
             break;
@@ -846,32 +833,34 @@ ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UB
 }
 
 URegistryKey
-ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status) 
+ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status)
 {
-    if (U_SUCCESS(status) && factoryToAdopt != NULL) {
+    LocalPointer<ICUServiceFactory>lpFactoryToAdopt(factoryToAdopt);
+    if (U_FAILURE(status) || factoryToAdopt == nullptr) {
+        return nullptr;
+    }
+    {
         Mutex mutex(&lock);
 
-        if (factories == NULL) {
-            factories = new UVector(deleteUObject, NULL, status);
+        if (factories == nullptr) {
+            LocalPointer<UVector> lpFactories(new UVector(uprv_deleteUObject, nullptr, status), status);
             if (U_FAILURE(status)) {
-                delete factories;
-                return NULL;
+                return nullptr;
             }
+            factories = lpFactories.orphan();
         }
-        factories->insertElementAt(factoryToAdopt, 0, status);
+        factories->insertElementAt(lpFactoryToAdopt.orphan(), 0, status);
         if (U_SUCCESS(status)) {
             clearCaches();
-        } else {
-            delete factoryToAdopt;
-            factoryToAdopt = NULL;
         }
-    }
+    }   // Close of mutex lock block.
 
-    if (factoryToAdopt != NULL) {
+    if (U_SUCCESS(status)) {
         notifyChanged();
+        return (URegistryKey)factoryToAdopt;
+    } else {
+        return nullptr;
     }
-
-    return (URegistryKey)factoryToAdopt;
 }
 
 UBool 

+ 9 - 9
thirdparty/icu4c/common/serv.h

@@ -191,7 +191,7 @@ public:
   /**
    * UObject RTTI boilerplate.
    */
-  virtual UClassID getDynamicClassID() const;
+  virtual UClassID getDynamicClassID() const override;
 
 #ifdef SERVICE_DEBUG
  public:
@@ -315,7 +315,7 @@ class U_COMMON_API SimpleFactory : public ICUServiceFactory {
    * @param status the error code status.
    * @return the service object, or NULL if the factory does not support the key.
    */
-  virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+  virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override;
 
   /**
    * <p>This implementation adds a mapping from ID -> this to result if visible is true, 
@@ -324,7 +324,7 @@ class U_COMMON_API SimpleFactory : public ICUServiceFactory {
    * @param result the mapping table to update.
    * @param status the error code status.
    */
-  virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+  virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const override;
 
   /**
    * <p>This implementation returns the factory ID if it equals id and visible is true,
@@ -336,7 +336,7 @@ class U_COMMON_API SimpleFactory : public ICUServiceFactory {
    * @param result output parameter to hold the display name.
    * @return result.
    */
-  virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
+  virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const override;
 
 public:
  /**
@@ -347,7 +347,7 @@ public:
  /**
   * UObject RTTI boilerplate.
   */
-  virtual UClassID getDynamicClassID() const;
+  virtual UClassID getDynamicClassID() const override;
 
 #ifdef SERVICE_DEBUG
  public:
@@ -363,7 +363,7 @@ public:
 
 /**
  * <p>ServiceListener is the listener that ICUService provides by default.
- * ICUService will notifiy this listener when factories are added to
+ * ICUService will notify this listener when factories are added to
  * or removed from the service.  Subclasses can provide
  * different listener interfaces that extend EventListener, and modify
  * acceptsListener and notifyListener as appropriate.</p>
@@ -390,7 +390,7 @@ public:
     /**
      * UObject RTTI boilerplate.
      */
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
     
 };
 
@@ -930,7 +930,7 @@ class U_COMMON_API ICUService : public ICUNotifier {
      * @param l the listener to test.
      * @return true if the service accepts the listener.
      */
-    virtual UBool acceptsListener(const EventListener& l) const;
+    virtual UBool acceptsListener(const EventListener& l) const override;
 
     /**
      * <p>Notify the listener of a service change.</p>
@@ -941,7 +941,7 @@ class U_COMMON_API ICUService : public ICUNotifier {
      *
      * @param l the listener to notify.
      */
-    virtual void notifyListener(EventListener& l) const;
+    virtual void notifyListener(EventListener& l) const override;
 
     /************************************************************************
      * Utilities for subclasses.

+ 19 - 19
thirdparty/icu4c/common/servloc.h

@@ -106,7 +106,7 @@ class U_COMMON_API LocaleKey : public ICUServiceKey {
     /**
      * Append the prefix associated with the kind, or nothing if the kind is KIND_ANY.
      */
-    virtual UnicodeString& prefix(UnicodeString& result) const;
+    virtual UnicodeString& prefix(UnicodeString& result) const override;
 
     /**
      * Return the kind code associated with this key.
@@ -116,17 +116,17 @@ class U_COMMON_API LocaleKey : public ICUServiceKey {
     /**
      * Return the canonicalID.
      */
-    virtual UnicodeString& canonicalID(UnicodeString& result) const;
+    virtual UnicodeString& canonicalID(UnicodeString& result) const override;
 
     /**
      * Return the currentID.
      */
-    virtual UnicodeString& currentID(UnicodeString& result) const;
+    virtual UnicodeString& currentID(UnicodeString& result) const override;
 
     /**
      * Return the (canonical) current descriptor, or null if no current id.
      */
-    virtual UnicodeString& currentDescriptor(UnicodeString& result) const;
+    virtual UnicodeString& currentDescriptor(UnicodeString& result) const override;
 
     /**
      * Convenience method to return the locale corresponding to the (canonical) original ID.
@@ -147,13 +147,13 @@ class U_COMMON_API LocaleKey : public ICUServiceKey {
      * unless the primary id was the empty string, in which case
      * there is no fallback.  
      */
-    virtual UBool fallback();
+    virtual UBool fallback() override;
 
     /**
      * Return true if a key created from id matches, or would eventually
      * fallback to match, the canonical ID of this key.  
      */
-    virtual UBool isFallbackOf(const UnicodeString& id) const;
+    virtual UBool isFallbackOf(const UnicodeString& id) const override;
     
  public:
     /**
@@ -161,7 +161,7 @@ class U_COMMON_API LocaleKey : public ICUServiceKey {
      */
     static UClassID U_EXPORT2 getStaticClassID();
 
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
     /**
      * Destructor.
@@ -238,7 +238,7 @@ protected:
      * kind off to handleCreate (which subclasses must implement).
      */
 public:
-    virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+    virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override;
 
 protected:
     virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const;
@@ -248,12 +248,12 @@ public:
      * Override of superclass method.  This adjusts the result based
      * on the coverage rule for this factory.
      */
-    virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+    virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const override;
 
     /**
      * Return a localized name for the locale represented by id.
      */
-    virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const;
+    virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const override;
 
 protected:
     /**
@@ -281,7 +281,7 @@ public:
      */
     static UClassID U_EXPORT2 getStaticClassID();
 
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
 #ifdef SERVICE_DEBUG
  public:
@@ -324,13 +324,13 @@ class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory {
     /**
      * Override of superclass method.  Returns the service object if kind/locale match.  Service is not used.
      */
-    virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const;
+    virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const override;
 
     /**
      * Override of superclass method.  This adjusts the result based
      * on the coverage rule for this factory.
      */
-    virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const;
+    virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const override;
 
  protected:
     /**
@@ -345,7 +345,7 @@ public:
      */
     static UClassID U_EXPORT2 getStaticClassID();
 
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
 #ifdef SERVICE_DEBUG
  public:
@@ -394,20 +394,20 @@ protected:
     /**
      * Return the supported IDs.  This is the set of all locale names in ICULocaleData.
      */
-    virtual const Hashtable* getSupportedIDs(UErrorCode& status) const;
+    virtual const Hashtable* getSupportedIDs(UErrorCode& status) const override;
 
     /**
      * Create the service.  The default implementation returns the resource bundle
      * for the locale, ignoring kind, and service.
      */
-    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const;
+    virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const override;
 
 public:
     /**
      * UObject boilerplate.
      */
     static UClassID U_EXPORT2 getStaticClassID();
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
 
 #ifdef SERVICE_DEBUG
@@ -512,7 +512,7 @@ class U_COMMON_API ICULocaleService : public ICUService
    * We really need a flag that is understood by all compilers that will suppress the warning about
    * hidden overrides.
    */
-  virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status);
+  virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) override;
 
   /**
    * Convenience method for callers using locales.  This returns the standard
@@ -531,7 +531,7 @@ class U_COMMON_API ICULocaleService : public ICUService
   /**
    * Override superclass createKey method.
    */
-  virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const;
+  virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const override;
 
   /**
    * Additional createKey that takes a kind.

+ 6 - 6
thirdparty/icu4c/common/servls.cpp

@@ -179,7 +179,7 @@ private:
 
             length = other._ids.size();
             for(i = 0; i < length; ++i) {
-                _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
+                _ids.addElementX(((UnicodeString *)other._ids.elementAt(i))->clone(), status);
             }
 
             if(U_SUCCESS(status)) {
@@ -201,7 +201,7 @@ public:
 
     virtual ~ServiceEnumeration();
 
-    virtual StringEnumeration *clone() const {
+    virtual StringEnumeration *clone() const override {
         UErrorCode status = U_ZERO_ERROR;
         ServiceEnumeration *cl = new ServiceEnumeration(*this, status);
         if(U_FAILURE(status)) {
@@ -221,18 +221,18 @@ public:
         return FALSE;
     }
 
-    virtual int32_t count(UErrorCode& status) const {
+    virtual int32_t count(UErrorCode& status) const override {
         return upToDate(status) ? _ids.size() : 0;
     }
 
-    virtual const UnicodeString* snext(UErrorCode& status) {
+    virtual const UnicodeString* snext(UErrorCode& status) override {
         if (upToDate(status) && (_pos < _ids.size())) {
             return (const UnicodeString*)_ids[_pos++];
         }
         return NULL;
     }
 
-    virtual void reset(UErrorCode& status) {
+    virtual void reset(UErrorCode& status) override {
         if (status == U_ENUM_OUT_OF_SYNC_ERROR) {
             status = U_ZERO_ERROR;
         }
@@ -245,7 +245,7 @@ public:
 
 public:
     static UClassID U_EXPORT2 getStaticClassID(void);
-    virtual UClassID getDynamicClassID(void) const;
+    virtual UClassID getDynamicClassID(void) const override;
 };
 
 ServiceEnumeration::~ServiceEnumeration() {}

+ 1 - 1
thirdparty/icu4c/common/servnotf.cpp

@@ -59,7 +59,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status)
                 }
             }
 
-            listeners->addElement((void*)l, status); // cast away const
+            listeners->addElementX((void*)l, status); // cast away const
         }
 #ifdef NOTIFIER_DEBUG
         else {

+ 1 - 1
thirdparty/icu4c/common/servnotf.h

@@ -40,7 +40,7 @@ public:
 public:
     static UClassID U_EXPORT2 getStaticClassID();
 
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
 public:
 #ifdef SERVICE_DEBUG

+ 24 - 24
thirdparty/icu4c/common/stringtriebuilder.cpp

@@ -383,7 +383,7 @@ StringTrieBuilder::equalNodes(const void *left, const void *right) {
     return *(const Node *)left==*(const Node *)right;
 }
 
-UBool
+bool
 StringTrieBuilder::Node::operator==(const Node &other) const {
     return this==&other || (typeid(*this)==typeid(other) && hash==other.hash);
 }
@@ -396,13 +396,13 @@ StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) {
     return edgeNumber;
 }
 
-UBool
+bool
 StringTrieBuilder::FinalValueNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!Node::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const FinalValueNode &o=(const FinalValueNode &)other;
     return value==o.value;
@@ -413,25 +413,25 @@ StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) {
     offset=builder.writeValueAndFinal(value, TRUE);
 }
 
-UBool
+bool
 StringTrieBuilder::ValueNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!Node::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const ValueNode &o=(const ValueNode &)other;
     return hasValue==o.hasValue && (!hasValue || value==o.value);
 }
 
-UBool
+bool
 StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!ValueNode::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const IntermediateValueNode &o=(const IntermediateValueNode &)other;
     return next==o.next;
@@ -451,13 +451,13 @@ StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) {
     offset=builder.writeValueAndFinal(value, FALSE);
 }
 
-UBool
+bool
 StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!ValueNode::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const LinearMatchNode &o=(const LinearMatchNode &)other;
     return length==o.length && next==o.next;
@@ -471,21 +471,21 @@ StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) {
     return edgeNumber;
 }
 
-UBool
+bool
 StringTrieBuilder::ListBranchNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!Node::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const ListBranchNode &o=(const ListBranchNode &)other;
     for(int32_t i=0; i<length; ++i) {
         if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) {
-            return FALSE;
+            return false;
         }
     }
-    return TRUE;
+    return true;
 }
 
 int32_t
@@ -550,13 +550,13 @@ StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) {
     }
 }
 
-UBool
+bool
 StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!Node::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const SplitBranchNode &o=(const SplitBranchNode &)other;
     return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual;
@@ -584,13 +584,13 @@ StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) {
     offset=builder.write(unit);
 }
 
-UBool
+bool
 StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!ValueNode::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const BranchHeadNode &o=(const BranchHeadNode &)other;
     return length==o.length && next==o.next;

+ 24 - 4
thirdparty/icu4c/common/uassert.h

@@ -10,7 +10,7 @@
 *
 * File uassert.h
 *
-*  Contains the U_ASSERT and UPRV_UNREACHABLE macros
+*  Contains the U_ASSERT and UPRV_UNREACHABLE_* macros
 *
 ******************************************************************************
 */
@@ -38,14 +38,34 @@
 #endif
 
 /**
- * \def UPRV_UNREACHABLE
+ * \def UPRV_UNREACHABLE_ASSERT
+ * This macro is used in places that we had believed were unreachable, but
+ * experience has shown otherwise (possibly due to memory corruption, etc).
+ * In this case we call assert() in debug versions as with U_ASSERT, instead
+ * of unconditionally calling abort(). However we also allow redefinition as
+ * with UPRV_UNREACHABLE_EXIT.
+ * @internal
+*/
+#if defined(UPRV_UNREACHABLE_ASSERT)
+    // Use the predefined value.
+#elif U_DEBUG
+#   include <assert.h>
+#   define UPRV_UNREACHABLE_ASSERT assert(false)
+#elif U_CPLUSPLUS_VERSION
+#   define UPRV_UNREACHABLE_ASSERT (void)0
+#else
+#   define UPRV_UNREACHABLE_ASSERT
+#endif
+
+/**
+ * \def UPRV_UNREACHABLE_EXIT
  * This macro is used to unconditionally abort if unreachable code is ever executed.
  * @internal
 */
-#if defined(UPRV_UNREACHABLE)
+#if defined(UPRV_UNREACHABLE_EXIT)
     // Use the predefined value.
 #else
-#   define UPRV_UNREACHABLE abort()
+#   define UPRV_UNREACHABLE_EXIT abort()
 #endif
 
 #endif

+ 3 - 3
thirdparty/icu4c/common/ubidi.cpp

@@ -2047,7 +2047,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop,
             break;
 
         default:                        /* we should never get here */
-            UPRV_UNREACHABLE;
+            UPRV_UNREACHABLE_EXIT;
         }
     }
     if((addLevel) || (start < start0)) {
@@ -2250,7 +2250,7 @@ resolveImplicitLevels(UBiDi *pBiDi,
                 start2=i;
                 break;
             default:            /* we should never get here */
-                UPRV_UNREACHABLE;
+                UPRV_UNREACHABLE_EXIT;
             }
         }
     }
@@ -2724,7 +2724,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
             break;
         default:
             /* we should never get here */
-            UPRV_UNREACHABLE;
+            UPRV_UNREACHABLE_EXIT;
         }
         /*
          * If there are no external levels specified and there

File diff suppressed because it is too large
+ 695 - 676
thirdparty/icu4c/common/ubidi_props_data.h


+ 2 - 2
thirdparty/icu4c/common/ubidiln.cpp

@@ -31,7 +31,7 @@
  * text in a single paragraph or in a line of a single paragraph
  * which has already been processed according to
  * the Unicode 6.3 BiDi algorithm as defined in
- * http://www.unicode.org/unicode/reports/tr9/ , version 28,
+ * https://www.unicode.org/reports/tr9/ , version 28,
  * also described in The Unicode Standard, Version 6.3.0 .
  *
  * This means that there is a UBiDi object with a levels
@@ -530,7 +530,7 @@ static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) {
         visualStart+=length;
     }
     /* we should never get here */
-    UPRV_UNREACHABLE;
+    UPRV_UNREACHABLE_EXIT;
 }
 
 /*

+ 1 - 1
thirdparty/icu4c/common/ucase.cpp

@@ -351,7 +351,7 @@ strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) {
     if(max==0 || *t==0) {
         return 0; /* equal to length of both strings */
     } else {
-        return -max; /* return lengh difference */
+        return -max; /* return length difference */
     }
 }
 

File diff suppressed because it is too large
+ 508 - 497
thirdparty/icu4c/common/ucase_props_data.h


File diff suppressed because it is too large
+ 1014 - 1002
thirdparty/icu4c/common/uchar_props_data.h


+ 3 - 3
thirdparty/icu4c/common/ucharstriebuilder.cpp

@@ -290,13 +290,13 @@ UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, in
     hash=hash*37u+ustr_hashUCharsN(units, len);
 }
 
-UBool
+bool
 UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const {
     if(this==&other) {
-        return TRUE;
+        return true;
     }
     if(!LinearMatchNode::operator==(other)) {
-        return FALSE;
+        return false;
     }
     const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other;
     return 0==u_memcmp(s, o.s, length);

+ 3 - 3
thirdparty/icu4c/common/uchriter.cpp

@@ -66,13 +66,13 @@ UCharCharacterIterator::operator=(const UCharCharacterIterator& that) {
 UCharCharacterIterator::~UCharCharacterIterator() {
 }
 
-UBool
+bool
 UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const {
     if (this == &that) {
-        return TRUE;
+        return true;
     }
     if (typeid(*this) != typeid(that)) {
-        return FALSE;
+        return false;
     }
 
     UCharCharacterIterator&    realThat = (UCharCharacterIterator&)that;

+ 1 - 0
thirdparty/icu4c/common/ucln_cmn.h

@@ -51,6 +51,7 @@ typedef enum ECleanupCommonType {
     UCLN_COMMON_USET,
     UCLN_COMMON_UNAMES,
     UCLN_COMMON_UPROPS,
+    UCLN_COMMON_EMOJIPROPS,
     UCLN_COMMON_UCNV,
     UCLN_COMMON_UCNV_IO,
     UCLN_COMMON_UDATA,

+ 1 - 1
thirdparty/icu4c/common/ucmndata.cpp

@@ -18,7 +18,7 @@
  *                 contents for locating the individual items by name.
  *
  *                 Two formats for the table of contents are supported, which is
- *                 why there is an abstract inteface involved.
+ *                 why there is an abstract interface involved.
  *
  */
 

+ 2 - 2
thirdparty/icu4c/common/ucmndata.h

@@ -18,10 +18,10 @@
  *                 contents for locating the individual items by name.
  *
  *                 Two formats for the table of contents are supported, which is
- *                 why there is an abstract inteface involved.
+ *                 why there is an abstract interface involved.
  *
  *                 These functions are part of the ICU internal implementation, and
- *                 are not inteded to be used directly by applications.
+ *                 are not intended to be used directly by applications.
  */
 
 #ifndef __UCMNDATA_H__

+ 7 - 7
thirdparty/icu4c/common/ucnv2022.cpp

@@ -527,7 +527,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
                     ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode);
             }
 
-            /* set the function pointers to appropriate funtions */
+            /* set the function pointers to appropriate functions */
             cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData);
             uprv_strcpy(myConverterData->locale,"ja");
 
@@ -578,7 +578,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
                 setInitialStateToUnicodeKR(cnv, myConverterData);
                 setInitialStateFromUnicodeKR(cnv, myConverterData);
 
-                /* set the function pointers to appropriate funtions */
+                /* set the function pointers to appropriate functions */
                 cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData;
                 uprv_strcpy(myConverterData->locale,"ko");
             }
@@ -605,7 +605,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
                 ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode);
 
 
-            /* set the function pointers to appropriate funtions */
+            /* set the function pointers to appropriate functions */
             cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData;
             uprv_strcpy(myConverterData->locale,"cn");
 
@@ -2147,7 +2147,7 @@ escape:
                     changeState_2022(args->converter,&(mySource),
                         mySourceLimit, ISO_2022_JP,err);
 
-                    /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */
+                    /* If in ISO-2022-JP only and we successfully completed an escape sequence, but previous segment was empty, create an error */
                     if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) {
                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
@@ -2849,21 +2849,21 @@ getTrailByte:
 *       SS2 is a Chinese character as defined in CNS
 *       11643-plane-2, until another SS2designation
 *       appears
-*       (Meaning <ESC>N must preceed every 2 byte
+*       (Meaning <ESC>N must precede every 2 byte
 *        sequence.)
 *
 *      ESC $ + I       Indicates the immediate two bytes following SS3
 *       is a Chinese character as defined in CNS
 *       11643-plane-3, until another SS3designation
 *       appears
-*       (Meaning <ESC>O must preceed every 2 byte
+*       (Meaning <ESC>O must precede every 2 byte
 *        sequence.)
 *
 *      ESC $ + J       Indicates the immediate two bytes following SS3
 *       is a Chinese character as defined in CNS
 *       11643-plane-4, until another SS3designation
 *       appears
-*       (In English: <ESC>O must preceed every 2 byte
+*       (In English: <ESC>O must precede every 2 byte
 *        sequence.)
 *
 *      ESC $ + K       Indicates the immediate two bytes following SS3

+ 1 - 1
thirdparty/icu4c/common/ucnv_cnv.h

@@ -164,7 +164,7 @@ typedef const char * (*UConverterGetName) (const UConverter *cnv);
  * If this function is not set, then ucnv_cbFromUWriteSub() writes
  * the substitution character from UConverter.
  * For stateful converters, it is typically necessary to handle this
- * specificially for the converter in order to properly maintain the state.
+ * specifically for the converter in order to properly maintain the state.
  */
 typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode);
 

+ 2 - 2
thirdparty/icu4c/common/ucnv_err.cpp

@@ -61,7 +61,7 @@
  * When an ignorable code point is found and is unmappable, the default callbacks
  * will ignore them.
  * For a list of the default ignorable code points, use this link:
- * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
+ * https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
  *
  * This list should be sync with the one in CharsetCallback.java
  */
@@ -72,7 +72,7 @@
     (c == 0x115F) || \
     (c == 0x1160) || \
     (0x17B4 <= c && c <= 0x17B5) || \
-    (0x180B <= c && c <= 0x180E) || \
+    (0x180B <= c && c <= 0x180F) || \
     (0x200B <= c && c <= 0x200F) || \
     (0x202A <= c && c <= 0x202E) || \
     (0x2060 <= c && c <= 0x206F) || \

+ 1 - 1
thirdparty/icu4c/common/ucnv_imp.h

@@ -9,7 +9,7 @@
 *
 *  ucnv_imp.h:
 *  Contains all internal and external data structure definitions
-* Created & Maitained by Bertrand A. Damiba
+* Created & Maintained by Bertrand A. Damiba
 *
 *
 *

+ 4 - 4
thirdparty/icu4c/common/ucnv_lmb.cpp

@@ -149,7 +149,7 @@ Next, you will notice that the list of group bytes has some gaps.
 These are used in various ways.
 
 We reserve a few special single byte values for common control 
-characters. These are in the same place as their ANSI eqivalents for speed.
+characters. These are in the same place as their ANSI equivalents for speed.
 */
                      
 #define ULMBCS_HT    0x09   /* Fixed control char - Horizontal Tab */
@@ -192,7 +192,7 @@ LMBCS, was to use up the spaces of the form
  LOTUS added a new group 0x14 to hold Unicode values not otherwise 
  represented in LMBCS: */
 #define ULMBCS_GRP_UNICODE    0x14   
-/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE
+/* The two bytes appearing after a 0x14 are interpreted as UFT-16 BE
 (Big-Endian) characters. The exception comes when the UTF16 
 representation would have a zero as the second byte. In that case,
 'F6' is used in its place, and the bytes are swapped. (This prevents 
@@ -878,7 +878,7 @@ _LMBCSFromUnicode(UConverterFromUnicodeArgs*     args,
          A) The optimization group
          B) The locale group
          C) The last group that succeeded with this string.
-         D) every other group that's relevent (single or double)
+         D) every other group that's relevant (single or double)
          E) If its single-byte ambiguous, try the exceptions group
 
       4. And as a grand fallback: Unicode
@@ -1049,7 +1049,7 @@ _LMBCSFromUnicode(UConverterFromUnicodeArgs*     args,
          }
       }
   
-      /* we have a translation. increment source and write as much as posible to target */
+      /* we have a translation. increment source and write as much as possible to target */
       args->source++;
       pLMBCS = LMBCS;
       while (args->target < args->targetLimit && bytes_written--)

+ 3 - 3
thirdparty/icu4c/common/ucnv_u32.cpp

@@ -488,7 +488,7 @@ static const UConverterImpl _UTF32BEImpl = {
     NULL
 };
 
-/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
+/* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */
 static const UConverterStaticData _UTF32BEStaticData = {
     sizeof(UConverterStaticData),
     "UTF-32BE",
@@ -983,7 +983,7 @@ static const UConverterImpl _UTF32LEImpl = {
     NULL
 };
 
-/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
+/* The 1232 CCSID refers to any version of Unicode with any endianness of UTF-32 */
 static const UConverterStaticData _UTF32LEStaticData = {
     sizeof(UConverterStaticData),
     "UTF-32LE",
@@ -1230,7 +1230,7 @@ static const UConverterImpl _UTF32Impl = {
     NULL
 };
 
-/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
+/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianness of UTF-32 */
 static const UConverterStaticData _UTF32StaticData = {
     sizeof(UConverterStaticData),
     "UTF-32",

+ 8 - 8
thirdparty/icu4c/common/ucnvisci.cpp

@@ -128,7 +128,7 @@ typedef struct {
     MaskEnum currentMaskToUnicode;      /* mask for current state in toUnicode */
     MaskEnum defMaskToUnicode;          /* mask for default state in toUnicode */
     UBool isFirstBuffer;                /* boolean for fromUnicode to see if we need to announce the first script */
-    UBool resetToDefaultToUnicode;      /* boolean for reseting to default delta and mask when a newline is encountered*/
+    UBool resetToDefaultToUnicode;      /* boolean for resetting to default delta and mask when a newline is encountered*/
     char name[sizeof(ISCII_CNV_PREFIX) + 1];
     UChar32 prevToUnicodeStatus;        /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */
 } UConverterDataISCII;
@@ -1105,7 +1105,7 @@ getTrail:
 }
 
 static const uint16_t lookupTable[][2]={
-    { ZERO,       ZERO     },     /*DEFALT*/
+    { ZERO,       ZERO     },     /*DEFAULT*/
     { ZERO,       ZERO     },     /*ROMAN*/
     { DEVANAGARI, DEV_MASK },
     { BENGALI,    BNG_MASK },
@@ -1164,15 +1164,15 @@ static const uint16_t lookupTable[][2]={
  *  Post context
  *  i)  ATR : Attribute code is used to declare the font and script switching.
  *      Currently we only switch scripts and font codes consumed without generating an error
- *  ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure,
+ *  ii) EXT : Extension code is used to declare switching to Sanskrit and for obscure,
  *      obsolete characters
  *  Pre context
- *  i)  Halant: if preceeded by a halant then it is a explicit halant
+ *  i)  Halant: if preceded by a halant then it is a explicit halant
  *  ii) Nukta :
- *       a) if preceeded by a halant then it is a soft halant
- *       b) if preceeded by specific consonants and the ligatures have pre-composed
+ *       a) if preceded by a halant then it is a soft halant
+ *       b) if preceded by specific consonants and the ligatures have pre-composed
  *          characters in Unicode then convert to pre-composed characters
- *  iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda
+ *  iii) Danda: If Danda is preceded by a Danda then convert to Double Danda
  *
  */
 
@@ -1208,7 +1208,7 @@ UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCo
         if (target < targetLimit) {
             sourceChar = (unsigned char)*(source)++;
 
-            /* look at the post-context preform special processing */
+            /* look at the post-context perform special processing */
             if (*contextCharToUnicode==ATR) {
 
                 /* If we have ATR in *contextCharToUnicode then we need to change our

+ 2 - 2
thirdparty/icu4c/common/ucnvmbcs.cpp

@@ -1091,7 +1091,7 @@ ucnv_MBCSGetUnicodeSet(const UConverter *cnv,
  * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file.
  *
  * In the future, conversion extensions may handle m:n mappings and delta tables,
- * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html
+ * see https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/conversion/conversion_extensions.html
  *
  * If an input character cannot be mapped, then these functions set an error
  * code. The framework will then call the callback function.
@@ -4444,7 +4444,7 @@ getTrail:
                  * For EUC encodings that use only either 0x8e or 0x8f as the first
                  * byte of their longest byte sequences, the first two bytes in
                  * this third stage indicate with their 7th bits whether these bytes
-                 * are to be written directly or actually need to be preceeded by
+                 * are to be written directly or actually need to be preceded by
                  * one of the two Single-Shift codes. With this, the third stage
                  * stores one byte fewer per character than the actual maximum length of
                  * EUC byte sequences.

+ 2 - 2
thirdparty/icu4c/common/ucnvscsu.cpp

@@ -16,7 +16,7 @@
 *   created by: Markus W. Scherer
 *
 *   This is an implementation of the Standard Compression Scheme for Unicode
-*   as defined in http://www.unicode.org/unicode/reports/tr6/ .
+*   as defined in https://www.unicode.org/reports/tr6/ .
 *   Reserved commands and window settings are treated as illegal sequences and
 *   will result in callback calls.
 */
@@ -119,7 +119,7 @@ enum {
 };
 
 typedef struct SCSUData {
-    /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
+    /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
     uint32_t toUDynamicOffsets[8];
     uint32_t fromUDynamicOffsets[8];
 

+ 1 - 1
thirdparty/icu4c/common/ucptrie_impl.h

@@ -148,7 +148,7 @@ U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
 
 /*
  * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
- * For overview information see http://site.icu-project.org/design/struct/utrie
+ * For overview information see https://icu.unicode.org/design/struct/utrie
  *
  * The binary trie data should be 32-bit-aligned.
  * The overall layout is:

+ 4 - 3
thirdparty/icu4c/common/ucurr.cpp

@@ -287,7 +287,7 @@ myUCharsToChars(char* resultOfLen4, const UChar* currency) {
  * four integers.  The first is the fraction digits.  The second is the
  * rounding increment, or 0 if none.  The rounding increment is in
  * units of 10^(-fraction_digits).  The third and fourth are the same
- * except that they are those used in cash transations ( cashDigits
+ * except that they are those used in cash transactions ( cashDigits
  * and cashRounding ).
  */
 static const int32_t*
@@ -1312,7 +1312,7 @@ searchCurrencyName(const CurrencyNameStruct* currencyNames,
     // The 2nd round binary search search the second "B" in the text against
     // the 2nd char in currency names, and narrow the matching range to
     // "BB BBEX BBEXYZ" (and the maximum matching "BB").
-    // The 3rd round returnes the range as "BBEX BBEXYZ" (without changing
+    // The 3rd round returns the range as "BBEX BBEXYZ" (without changing
     // maximum matching).
     // The 4th round returns the same range (the maximum matching is "BBEX").
     // The 5th round returns no matching range.
@@ -1791,7 +1791,6 @@ static const struct CurrencyList {
     {"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED},
     {"EEK", UCURR_COMMON|UCURR_DEPRECATED},
     {"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED},
-    {"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove?
     {"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED},
     {"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED},
     {"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED},
@@ -1963,9 +1962,11 @@ static const struct CurrencyList {
     {"UYI", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
     {"UYP", UCURR_COMMON|UCURR_DEPRECATED},
     {"UYU", UCURR_COMMON|UCURR_NON_DEPRECATED},
+    {"UYW", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
     {"UZS", UCURR_COMMON|UCURR_NON_DEPRECATED},
     {"VEB", UCURR_COMMON|UCURR_DEPRECATED},
     {"VEF", UCURR_COMMON|UCURR_NON_DEPRECATED},
+    {"VES", UCURR_COMMON|UCURR_NON_DEPRECATED},
     {"VND", UCURR_COMMON|UCURR_NON_DEPRECATED},
     {"VNN", UCURR_COMMON|UCURR_DEPRECATED},
     {"VUV", UCURR_COMMON|UCURR_NON_DEPRECATED},

+ 5 - 2
thirdparty/icu4c/common/uelement.h

@@ -54,9 +54,12 @@ typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2)
  * An element sorting (three-way) comparison function.
  * @param e1 An element (object or integer)
  * @param e2 An element (object or integer)
- * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2.
+ * @return 32-bit signed integer comparison result:
+ *               ==0 if the two elements are equal,
+ *                <0 if e1 is < e2, or
+ *                >0 if e1 is > e2.
  */
-typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
+typedef int32_t U_CALLCONV UElementComparator(UElement e1, UElement e2);
 
 /**
  * An element assignment function.  It may copy an integer, copy

+ 2 - 2
thirdparty/icu4c/common/uhash.cpp

@@ -320,7 +320,7 @@ _uhash_create(UHashFunction *keyHash,
  * Stop if it is identical or empty, otherwise continue by adding a
  * "jump" value (moduloing by the length again to keep it within
  * range) and retesting.  For efficiency, there need enough empty
- * values so that the searchs stop within a reasonable amount of time.
+ * values so that the searches stop within a reasonable amount of time.
  * This can be changed by changing the high/low water marks.
  *
  * In theory, this function can return NULL, if it is full (no empty
@@ -379,7 +379,7 @@ _uhash_find(const UHashtable *hash, UHashTok key,
          * WILL NEVER HAPPEN as long as uhash_put() makes sure that
          * count is always < length.
          */
-        UPRV_UNREACHABLE;
+        UPRV_UNREACHABLE_EXIT;
     }
     return &(elements[theIndex]);
 }

+ 5 - 5
thirdparty/icu4c/common/uhash.h

@@ -128,7 +128,7 @@ typedef UElementsAreEqual UValueComparator;
 /* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */
 
 /**
- * This specifies whether or not, and how, the hastable resizes itself.
+ * This specifies whether or not, and how, the hashtable resizes itself.
  * See uhash_setResizePolicy().
  */
 enum UHashResizePolicy {
@@ -209,7 +209,7 @@ uhash_open(UHashFunction *keyHash,
  * NULL.
  * @param keyComp A pointer to the function that compares keys.  Must
  * not be NULL.
- * @param size The initial capacity of this hash table.
+ * @param size The initial capacity of this hashtable.
  * @param status A pointer to an UErrorCode to receive any errors.
  * @return A pointer to a UHashtable, or 0 if an error occurred.
  * @see uhash_open
@@ -244,7 +244,7 @@ uhash_init(UHashtable *hash,
  * NULL.
  * @param keyComp A pointer to the function that compares keys.  Must
  * not be NULL.
- * @param size The initial capacity of this hash table.
+ * @param size The initial capacity of this hashtable.
  * @param status A pointer to an UErrorCode to receive any errors.
  * @return A pointer to a UHashtable, or 0 if an error occurred.
  * @see uhash_openSize
@@ -322,7 +322,7 @@ U_CAPI UObjectDeleter *U_EXPORT2
 uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn);
 
 /**
- * Specify whether or not, and how, the hastable resizes itself.
+ * Specify whether or not, and how, the hashtable resizes itself.
  * By default, tables grow but do not shrink (policy U_GROW).
  * See enum UHashResizePolicy.
  * @param hash The UHashtable to set
@@ -780,7 +780,7 @@ uhash_deleteHashtable(void *obj);
 /* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */
 
 /**
- * Checks if the given hash tables are equal or not.
+ * Checks if the given hashtables are equal or not.
  * @param hash1
  * @param hash2
  * @return true if the hashtables are equal and false if not.

+ 52 - 19
thirdparty/icu4c/common/uloc.cpp

@@ -478,15 +478,24 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = {
 /* Test if the locale id has BCP47 u extension and does not have '@' */
 #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
 /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
-#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
-    if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \
-            U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
-        finalID=id; \
-        if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
-    } else { \
-        finalID=buffer; \
-    } \
-} UPRV_BLOCK_MACRO_END
+static const char* _ConvertBCP47(
+        const char* id, char* buffer, int32_t length,
+        UErrorCode* err, int32_t* pLocaleIdSize) {
+    const char* finalID;
+    int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
+    if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
+        finalID=id;
+        if (*err == U_STRING_NOT_TERMINATED_WARNING) {
+            *err = U_BUFFER_OVERFLOW_ERROR;
+        }
+    } else {
+        finalID=buffer;
+    }
+    if (pLocaleIdSize != nullptr) {
+        *pLocaleIdSize = localeIDSize;
+    }
+    return finalID;
+}
 /* Gets the size of the shortest subtag in the given localeID. */
 static int32_t getShortestSubtagLength(const char *localeID) {
     int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
@@ -767,7 +776,8 @@ ulocimp_getKeywordValue(const char* localeID,
       }
 
       if (_hasBCP47Extension(localeID)) {
-          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
+          tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
+                                      sizeof(tempBuffer), status, nullptr);
       } else {
           tmpLocaleID=localeID;
       }
@@ -1404,10 +1414,11 @@ uloc_openKeywords(const char* localeID,
     }
 
     if (_hasBCP47Extension(localeID)) {
-        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
+        tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
+                                    sizeof(tempBuffer), status, nullptr);
     } else {
         if (localeID==NULL) {
-           localeID=uloc_getDefault();
+            localeID=uloc_getDefault();
         }
         tmpLocaleID=localeID;
     }
@@ -1473,19 +1484,41 @@ _canonicalize(const char* localeID,
               ByteSink& sink,
               uint32_t options,
               UErrorCode* err) {
+    if (U_FAILURE(*err)) {
+        return;
+    }
+
     int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
-    char tempBuffer[ULOC_FULLNAME_CAPACITY];
+    PreflightingLocaleIDBuffer tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
+    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
     const char* origLocaleID;
     const char* tmpLocaleID;
     const char* keywordAssign = NULL;
     const char* separatorIndicator = NULL;
 
-    if (U_FAILURE(*err)) {
-        return;
-    }
-
     if (_hasBCP47Extension(localeID)) {
-        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
+        const char* localeIDPtr = localeID;
+
+        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
+        if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
+            localeIDWithHyphens.append(localeID, -1, *err);
+            if (U_SUCCESS(*err)) {
+                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
+                    if (*p == '_') {
+                        *p = '-';
+                    }
+                }
+                localeIDPtr = localeIDWithHyphens.data();
+            }
+        }
+
+        do {
+            // After this call tmpLocaleID may point to localeIDPtr which may
+            // point to either localeID or localeIDWithHyphens.data().
+            tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
+                                        tempBuffer.getCapacity(), err,
+                                        &(tempBuffer.requestedCapacity));
+        } while (tempBuffer.needToTryAgain(err));
     } else {
         if (localeID==NULL) {
            localeID=uloc_getDefault();
@@ -1771,7 +1804,7 @@ uloc_getVariant(const char* localeID,
     }
 
     if (_hasBCP47Extension(localeID)) {
-        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
+        tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
     } else {
         if (localeID==NULL) {
            localeID=uloc_getDefault();

+ 7 - 5
thirdparty/icu4c/common/uloc_keytype.cpp

@@ -168,11 +168,13 @@ initFromResourceBundle(UErrorCode& sts) {
         }
 
         // look up type map for the key, and walk through the mapping data
-        tmpSts = U_ZERO_ERROR;
-        LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts));
-        if (U_FAILURE(tmpSts)) {
-            // type map for each key must exist
-            UPRV_UNREACHABLE;
+        LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &sts));
+        if (U_FAILURE(sts)) {
+            // We fail here if typeMap does not have an entry corresponding to every entry in keyMap (should
+            // not happen for valid keyTypeData), or if ures_getByKeyfails fails for some other reason
+            // (e.g. data file cannot be loaded, using stubdata, over-aggressive data filtering has removed
+            // something like timezoneTypes.res, etc.). Error code is already set. See ICU-21669.
+            UPRV_UNREACHABLE_ASSERT;
         } else {
             LocalUResourceBundlePointer typeMapEntry;
 

+ 8 - 4
thirdparty/icu4c/common/uloc_tag.cpp

@@ -139,7 +139,7 @@ static const char* const LEGACY[] = {
  Updated on 2018-09-12 from
  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
 
- The table lists redundant tags with preferred value in the IANA languate tag registry.
+ The table lists redundant tags with preferred value in the IANA language tag registry.
  It's generated with the following command:
 
  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
@@ -1612,7 +1612,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
     }
 
     if (pKwds) {
-        const char *pBcpKey = NULL;     /* u extenstion key subtag */
+        const char *pBcpKey = NULL;     /* u extension key subtag */
         const char *pBcpType = NULL;    /* beginning of u extension type subtag(s) */
         int32_t bcpKeyLen = 0;
         int32_t bcpTypeLen = 0;
@@ -2089,6 +2089,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
             legacyLen = checkLegacyLen;  /* back up for output parsedLen */
             int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1]));
             newTagLength = replacementLen + tagLen - checkLegacyLen;
+            int32_t oldTagLength = tagLen;
             if (tagLen < newTagLength) {
                 uprv_free(tagBuf);
                 tagBuf = (char*)uprv_malloc(newTagLength + 1);
@@ -2102,7 +2103,10 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
             parsedLenDelta = checkLegacyLen - replacementLen;
             uprv_strcpy(t->buf, LEGACY[i + 1]);
             if (checkLegacyLen != tagLen) {
-                uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen);
+                uprv_memcpy(t->buf + replacementLen, tag + checkLegacyLen,
+                            oldTagLength - checkLegacyLen);
+                // NUL-terminate after memcpy().
+                t->buf[replacementLen + oldTagLength - checkLegacyLen] = 0;
             }
             break;
         }
@@ -2306,7 +2310,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         if (next & EXTV) {
             if (_isExtensionSubtag(pSubtag, subtagLen)) {
                 if (pExtValueSubtag == NULL) {
-                    /* if the start postion of this extension's value is not yet,
+                    /* if the start position of this extension's value is not yet,
                         this one is the first value subtag */
                     pExtValueSubtag = pSubtag;
                 }

+ 68 - 0
thirdparty/icu4c/common/ulocimp.h

@@ -307,4 +307,72 @@ U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* le
 // Return true if the value is already canonicalized.
 U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
 
+/**
+ * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
+ * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
+ * and then, if it's not big enough, reallocate it on the heap and try again.
+ *
+ * You use it like this:
+ * UErrorCode err = U_ZERO_ERROR;
+ *
+ * PreflightingLocaleIDBuffer tempBuffer;
+ * do {
+ *     tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
+ * } while (tempBuffer.needToTryAgain(&err));
+ * if (U_SUCCESS(err)) {
+ *     uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
+ * }
+ */
+class PreflightingLocaleIDBuffer {
+private:
+    char stackBuffer[ULOC_FULLNAME_CAPACITY];
+    char* heapBuffer = nullptr;
+    int32_t capacity = ULOC_FULLNAME_CAPACITY;
+    
+public:
+    int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
+
+    // No heap allocation. Use only on the stack.
+    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
+    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
+#if U_HAVE_PLACEMENT_NEW
+    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
+#endif
+
+    PreflightingLocaleIDBuffer() {}
+    
+    ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
+    
+    char* getBuffer() {
+        if (heapBuffer == nullptr) {
+            return stackBuffer;
+        } else {
+            return heapBuffer;
+        }
+    }
+    
+    int32_t getCapacity() {
+        return capacity;
+    }
+    
+    bool needToTryAgain(UErrorCode* err) {
+        if (heapBuffer != nullptr) {
+            return false;
+        }
+    
+        if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
+            int32_t newCapacity = requestedCapacity + 2;    // one for the terminating null, one just for paranoia
+            heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
+            if (heapBuffer == nullptr) {
+                *err = U_MEMORY_ALLOCATION_ERROR;
+            } else {
+                *err = U_ZERO_ERROR;
+                capacity = newCapacity;
+            }
+            return U_SUCCESS(*err);
+        }
+        return false;
+    }
+};
+
 #endif

+ 1 - 1
thirdparty/icu4c/common/umapfile.h

@@ -18,7 +18,7 @@
  *             whatever means are available.
  *
  *            These functions are part of the ICU internal implementation, and
- *            are not inteded to be used directly by applications.
+ *            are not intended to be used directly by applications.
  *
  *----------------------------------------------------------------------------------*/
 

+ 5 - 5
thirdparty/icu4c/common/unicode/appendable.h

@@ -174,7 +174,7 @@ public:
      * @return true if the operation succeeded
      * @stable ICU 4.8
      */
-    virtual UBool appendCodeUnit(char16_t c);
+    virtual UBool appendCodeUnit(char16_t c) override;
 
     /**
      * Appends a code point to the string.
@@ -182,7 +182,7 @@ public:
      * @return true if the operation succeeded
      * @stable ICU 4.8
      */
-    virtual UBool appendCodePoint(UChar32 c);
+    virtual UBool appendCodePoint(UChar32 c) override;
 
     /**
      * Appends a string to the UnicodeString.
@@ -191,7 +191,7 @@ public:
      * @return true if the operation succeeded
      * @stable ICU 4.8
      */
-    virtual UBool appendString(const char16_t *s, int32_t length);
+    virtual UBool appendString(const char16_t *s, int32_t length) override;
 
     /**
      * Tells the UnicodeString that the caller is going to append roughly
@@ -200,7 +200,7 @@ public:
      * @return true if the operation succeeded
      * @stable ICU 4.8
      */
-    virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+    virtual UBool reserveAppendCapacity(int32_t appendCapacity) override;
 
     /**
      * Returns a writable buffer for appending and writes the buffer's capacity to
@@ -226,7 +226,7 @@ public:
     virtual char16_t *getAppendBuffer(int32_t minCapacity,
                                    int32_t desiredCapacityHint,
                                    char16_t *scratch, int32_t scratchCapacity,
-                                   int32_t *resultCapacity);
+                                   int32_t *resultCapacity) override;
 
 private:
     UnicodeString &str;

+ 4 - 4
thirdparty/icu4c/common/unicode/brkiter.h

@@ -99,7 +99,7 @@ U_NAMESPACE_BEGIN
  * <p>
  * Code snippets illustrating the use of the Break Iterator APIs
  * are available in the ICU User Guide,
- * http://icu-project.org/userguide/boundaryAnalysis.html
+ * https://unicode-org.github.io/icu/userguide/boundaryanalysis/
  * and in the sample program icu/source/samples/break/break.cpp
  *
  */
@@ -124,7 +124,7 @@ public:
      * object, and styles are not considered.
      * @stable ICU 2.0
      */
-    virtual UBool operator==(const BreakIterator&) const = 0;
+    virtual bool operator==(const BreakIterator&) const = 0;
 
     /**
      * Returns the complement of the result of operator==
@@ -132,7 +132,7 @@ public:
      * @return the complement of the result of operator==
      * @stable ICU 2.0
      */
-    UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+    bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
 
     /**
      * Return a polymorphic copy of this object.  This is an abstract
@@ -146,7 +146,7 @@ public:
      * will return distinct unequal values.
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID(void) const = 0;
+    virtual UClassID getDynamicClassID(void) const override = 0;
 
     /**
      * Return a CharacterIterator over the text being analyzed.

+ 3 - 3
thirdparty/icu4c/common/unicode/bytestream.h

@@ -206,7 +206,7 @@ public:
    * @param n the number of bytes; must be non-negative
    * @stable ICU 4.2
    */
-  virtual void Append(const char* bytes, int32_t n);
+  virtual void Append(const char* bytes, int32_t n) override;
   /**
    * Returns a writable buffer for appending and writes the buffer's capacity to
    * *result_capacity. For details see the base class documentation.
@@ -224,7 +224,7 @@ public:
   virtual char* GetAppendBuffer(int32_t min_capacity,
                                 int32_t desired_capacity_hint,
                                 char* scratch, int32_t scratch_capacity,
-                                int32_t* result_capacity);
+                                int32_t* result_capacity) override;
   /**
    * Returns the number of bytes actually written to the sink.
    * @return number of bytes written to the buffer
@@ -291,7 +291,7 @@ class StringByteSink : public ByteSink {
    * @param n the number of bytes; must be non-negative
    * @stable ICU 4.2
    */
-  virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
+  virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); }
  private:
   StringClass* dest_;
 

+ 19 - 19
thirdparty/icu4c/common/unicode/bytestriebuilder.h

@@ -134,21 +134,21 @@ private:
 
     void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
 
-    virtual int32_t getElementStringLength(int32_t i) const;
-    virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const;
-    virtual int32_t getElementValue(int32_t i) const;
+    virtual int32_t getElementStringLength(int32_t i) const override;
+    virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const override;
+    virtual int32_t getElementValue(int32_t i) const override;
 
-    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;
+    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const override;
 
-    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
-    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
-    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const;
+    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const override;
+    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const override;
+    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const override;
 
-    virtual UBool matchNodesCanHaveValues() const { return false; }
+    virtual UBool matchNodesCanHaveValues() const override { return false; }
 
-    virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
-    virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
-    virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
+    virtual int32_t getMaxBranchLinearSubNodeLength() const override { return BytesTrie::kMaxBranchLinearSubNodeLength; }
+    virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; }
+    virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; }
 
     /**
      * @internal (private)
@@ -156,22 +156,22 @@ private:
     class BTLinearMatchNode : public LinearMatchNode {
     public:
         BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
-        virtual UBool operator==(const Node &other) const;
-        virtual void write(StringTrieBuilder &builder);
+        virtual bool operator==(const Node &other) const override;
+        virtual void write(StringTrieBuilder &builder) override;
     private:
         const char *s;
     };
     
     virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
-                                        Node *nextNode) const;
+                                        Node *nextNode) const override;
 
     UBool ensureCapacity(int32_t length);
-    virtual int32_t write(int32_t byte);
+    virtual int32_t write(int32_t byte) override;
     int32_t write(const char *b, int32_t length);
-    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
-    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
-    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
-    virtual int32_t writeDeltaTo(int32_t jumpTarget);
+    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) override;
+    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override;
+    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override;
+    virtual int32_t writeDeltaTo(int32_t jumpTarget) override;
     static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
 
     CharString *strings;  // Pointer not object so we need not #include internal charstr.h.

+ 2 - 2
thirdparty/icu4c/common/unicode/caniter.h

@@ -145,7 +145,7 @@ public:
      *
      * @stable ICU 2.2
      */
-    virtual UClassID getDynamicClassID() const;
+    virtual UClassID getDynamicClassID() const override;
 
 private:
     // ===================== PRIVATES ==============================
@@ -194,7 +194,7 @@ private:
 
     /**
      * See if the decomposition of cp2 is at segment starting at segmentPos
-     * (with canonical rearrangment!)
+     * (with canonical rearrangement!)
      * If so, take the remainder, and return the equivalents
      */
     //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);

+ 4 - 4
thirdparty/icu4c/common/unicode/chariter.h

@@ -114,7 +114,7 @@ public:
      * character in the same character-storage object
      * @stable ICU 2.0
      */
-    virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+    virtual bool operator==(const ForwardCharacterIterator& that) const = 0;
     
     /**
      * Returns true when the iterators refer to different
@@ -126,7 +126,7 @@ public:
      * same text-storage object
      * @stable ICU 2.0
      */
-    inline UBool operator!=(const ForwardCharacterIterator& that) const;
+    inline bool operator!=(const ForwardCharacterIterator& that) const;
     
     /**
      * Generates a hash code for this iterator.  
@@ -142,7 +142,7 @@ public:
      * @return a UClassID for this ForwardCharacterIterator 
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID(void) const = 0;
+    virtual UClassID getDynamicClassID(void) const override = 0;
     
     /**
      * Gets the current code unit for returning and advances to the next code unit
@@ -692,7 +692,7 @@ protected:
     int32_t  end;
 };
 
-inline UBool
+inline bool
 ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
     return !operator==(that);
 }

+ 1 - 1
thirdparty/icu4c/common/unicode/docmain.h

@@ -47,7 +47,7 @@
  * <h3>API References for Previous Releases</h3>
  * <p>The API References for each release of ICU are also available as
  * a zip file from the ICU 
- * <a href="http://site.icu-project.org/download">download page</a>.</p>
+ * <a href="https://icu.unicode.org/download">download page</a>.</p>
  *
  * <hr>
  *

+ 4 - 4
thirdparty/icu4c/common/unicode/dtintrv.h

@@ -89,7 +89,7 @@ public:
      *                  other classes have different class IDs.
      * @stable ICU 4.0
      */
-    virtual UClassID getDynamicClassID(void) const;
+    virtual UClassID getDynamicClassID(void) const override;
 
     
     /**
@@ -109,14 +109,14 @@ public:
      * @return true if the two DateIntervals are the same
      * @stable ICU 4.0
      */
-    virtual UBool operator==(const DateInterval& other) const;
+    virtual bool operator==(const DateInterval& other) const;
 
     /**
      * Non-equality operator
      * @return true if the two DateIntervals are not the same
      * @stable ICU 4.0
      */
-    inline UBool operator!=(const DateInterval& other) const;
+    inline bool operator!=(const DateInterval& other) const;
 
 
     /**
@@ -151,7 +151,7 @@ DateInterval::getToDate() const {
 }
 
 
-inline UBool 
+inline bool
 DateInterval::operator!=(const DateInterval& other) const { 
     return ( !operator==(other) );
 }

+ 2 - 4
thirdparty/icu4c/common/unicode/localebuilder.h

@@ -90,8 +90,7 @@ public:
     LocaleBuilder& setLocale(const Locale& locale);
 
     /**
-     * Resets the LocaleBuilder to match the provided
-     * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) .
+     * Resets the LocaleBuilder to match the provided IETF BCP 47 language tag.
      * Discards the existing state.
      * The empty string causes the builder to be reset, like {@link #clear}.
      * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
@@ -101,8 +100,7 @@ public:
      *
      * <p>This method clears the internal UErrorCode.
      *
-     * @param tag the language tag, defined as
-     *   [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id).
+     * @param tag the language tag, defined as IETF BCP 47 language tag.
      * @return This builder.
      * @stable ICU 64
      */

Some files were not shown because too many files changed in this diff