Browse Source

Update ICU to 78.1

Pāvels Nadtočajevs 1 month ago
parent
commit
1ca8f1d7f6
86 changed files with 7693 additions and 4084 deletions
  1. 1 0
      modules/gdscript/tests/scripts/parser/features/unicode_identifiers.gd
  2. 1 0
      modules/text_server_adv/SCsub
  3. 1 0
      modules/text_server_adv/gdextension_build/SConstruct
  4. 1 1
      thirdparty/README.md
  5. 26 0
      thirdparty/icu4c/LICENSE
  6. 18 29
      thirdparty/icu4c/common/brkiter.cpp
  7. 41 45
      thirdparty/icu4c/common/charstr.h
  8. 0 4
      thirdparty/icu4c/common/cmemory.h
  9. 4 4
      thirdparty/icu4c/common/cstr.h
  10. 29 0
      thirdparty/icu4c/common/fixedstring.cpp
  11. 104 0
      thirdparty/icu4c/common/fixedstring.h
  12. 5 5
      thirdparty/icu4c/common/localebuilder.cpp
  13. 1153 1131
      thirdparty/icu4c/common/localefallback_data.h
  14. 12 61
      thirdparty/icu4c/common/locbased.cpp
  15. 3 58
      thirdparty/icu4c/common/locbased.h
  16. 10 10
      thirdparty/icu4c/common/locdispnames.cpp
  17. 411 243
      thirdparty/icu4c/common/locid.cpp
  18. 1 1
      thirdparty/icu4c/common/loclikely.cpp
  19. 27 6
      thirdparty/icu4c/common/loclikelysubtags.cpp
  20. 9 1
      thirdparty/icu4c/common/lstmbe.cpp
  21. 45 42
      thirdparty/icu4c/common/norm2_nfc_data.h
  22. 118 86
      thirdparty/icu4c/common/normalizer2impl.h
  23. 499 490
      thirdparty/icu4c/common/propname_data.h
  24. 1 1
      thirdparty/icu4c/common/rbbidata.h
  25. 1 1
      thirdparty/icu4c/common/rbbiscan.cpp
  26. 4 3
      thirdparty/icu4c/common/rbbisetb.cpp
  27. 1 1
      thirdparty/icu4c/common/rbbisetb.h
  28. 1 1
      thirdparty/icu4c/common/rbbitblb.cpp
  29. 16 16
      thirdparty/icu4c/common/servloc.h
  30. 10 10
      thirdparty/icu4c/common/sharedobject.h
  31. 6 0
      thirdparty/icu4c/common/static_unicode_sets.cpp
  32. 1 0
      thirdparty/icu4c/common/static_unicode_sets.h
  33. 430 421
      thirdparty/icu4c/common/ubidi_props_data.h
  34. 168 157
      thirdparty/icu4c/common/ucase_props_data.h
  35. 3 3
      thirdparty/icu4c/common/ucasemap.cpp
  36. 27 0
      thirdparty/icu4c/common/uchar.cpp
  37. 629 636
      thirdparty/icu4c/common/uchar_props_data.h
  38. 18 10
      thirdparty/icu4c/common/ucnv.cpp
  39. 1 1
      thirdparty/icu4c/common/ucnv2022.cpp
  40. 32 9
      thirdparty/icu4c/common/ucnv_io.cpp
  41. 1 0
      thirdparty/icu4c/common/udata.cpp
  42. 63 52
      thirdparty/icu4c/common/uidna.cpp
  43. 16 2
      thirdparty/icu4c/common/uloc.cpp
  44. 20 24
      thirdparty/icu4c/common/uloc_keytype.cpp
  45. 39 39
      thirdparty/icu4c/common/ulocimp.h
  46. 7 0
      thirdparty/icu4c/common/umapfile.cpp
  47. 25 39
      thirdparty/icu4c/common/umutex.h
  48. 56 57
      thirdparty/icu4c/common/unicode/brkiter.h
  49. 33 2
      thirdparty/icu4c/common/unicode/bytestream.h
  50. 0 3
      thirdparty/icu4c/common/unicode/bytestriebuilder.h
  51. 0 2
      thirdparty/icu4c/common/unicode/caniter.h
  52. 4 2
      thirdparty/icu4c/common/unicode/docmain.h
  53. 2 2
      thirdparty/icu4c/common/unicode/localebuilder.h
  54. 0 2
      thirdparty/icu4c/common/unicode/localpointer.h
  55. 246 138
      thirdparty/icu4c/common/unicode/locid.h
  56. 8 17
      thirdparty/icu4c/common/unicode/platform.h
  57. 0 13
      thirdparty/icu4c/common/unicode/rbbi.h
  58. 1 1
      thirdparty/icu4c/common/unicode/stringpiece.h
  59. 34 17
      thirdparty/icu4c/common/unicode/uchar.h
  60. 22 0
      thirdparty/icu4c/common/unicode/umachine.h
  61. 25 27
      thirdparty/icu4c/common/unicode/uniset.h
  62. 89 30
      thirdparty/icu4c/common/unicode/unistr.h
  63. 1 2
      thirdparty/icu4c/common/unicode/uobject.h
  64. 5 3
      thirdparty/icu4c/common/unicode/urename.h
  65. 12 1
      thirdparty/icu4c/common/unicode/uscript.h
  66. 56 61
      thirdparty/icu4c/common/unicode/uset.h
  67. 32 1
      thirdparty/icu4c/common/unicode/utf.h
  68. 29 3
      thirdparty/icu4c/common/unicode/utf8.h
  69. 2 0
      thirdparty/icu4c/common/unicode/utf_old.h
  70. 2677 0
      thirdparty/icu4c/common/unicode/utfiterator.h
  71. 161 0
      thirdparty/icu4c/common/unicode/utfstring.h
  72. 79 0
      thirdparty/icu4c/common/unicode/utypes.h
  73. 5 5
      thirdparty/icu4c/common/unicode/uvernum.h
  74. 4 4
      thirdparty/icu4c/common/unicode/uversion.h
  75. 3 3
      thirdparty/icu4c/common/uniset.cpp
  76. 15 10
      thirdparty/icu4c/common/unistr_cnv.cpp
  77. 5 0
      thirdparty/icu4c/common/uposixdefs.h
  78. 5 1
      thirdparty/icu4c/common/uprops.h
  79. 0 2
      thirdparty/icu4c/common/uresimp.h
  80. 7 3
      thirdparty/icu4c/common/uscript_props.cpp
  81. 9 7
      thirdparty/icu4c/common/usprep.cpp
  82. 16 16
      thirdparty/icu4c/common/ustr_wcs.cpp
  83. 7 5
      thirdparty/icu4c/common/uts46.cpp
  84. 3 1
      thirdparty/icu4c/i18n/scriptset.cpp
  85. 1 0
      thirdparty/icu4c/i18n/ucln_in.h
  86. BIN
      thirdparty/icu4c/icudt_godot.dat

+ 1 - 0
modules/gdscript/tests/scripts/parser/features/unicode_identifiers.gd

@@ -1,4 +1,5 @@
 const π = PI
+@warning_ignore("confusable_identifier")
 var ㄥ = π
 
 func test():

+ 1 - 0
modules/text_server_adv/SCsub

@@ -272,6 +272,7 @@ if env["builtin_icu4c"]:
         "common/errorcode.cpp",
         "common/filteredbrk.cpp",
         "common/filterednormalizer2.cpp",
+        "common/fixedstring.cpp",
         "common/icudataver.cpp",
         "common/icuplug.cpp",
         "common/loadednormalizer2impl.cpp",

+ 1 - 0
modules/text_server_adv/gdextension_build/SConstruct

@@ -515,6 +515,7 @@ thirdparty_icu_sources = [
     "common/errorcode.cpp",
     "common/filteredbrk.cpp",
     "common/filterednormalizer2.cpp",
+    "common/fixedstring.cpp",
     "common/icudataver.cpp",
     "common/icuplug.cpp",
     "common/loadednormalizer2impl.cpp",

+ 1 - 1
thirdparty/README.md

@@ -480,7 +480,7 @@ The files of hidapi are stored in `thirdparty/sdl/hidapi/` folder.
 ## icu4c
 
 - Upstream: https://github.com/unicode-org/icu
-- Version: 77.1 (457157a92aa053e632cc7fcfd0e12f8a943b2d11, 2025)
+- Version: 78.1 (049e0d6a420629ac7db77256987d083a563287b5, 2025)
 - License: Unicode
 
 Files extracted from upstream source:

+ 26 - 0
thirdparty/icu4c/LICENSE

@@ -540,3 +540,29 @@ publicity pertaining to distribution of the software without specific,
 written prior permission.  M.I.T. makes no representations about the
 suitability of this software for any purpose.  It is provided "as is"
 without express or implied warranty.
+
+----------------------------------------------------------------------
+
+File: sorttable.js (only for ICU4J)
+
+The MIT Licence, for code from kryogenix.org
+
+Code downloaded from the Browser Experiments section of kryogenix.org is
+licenced under the so-called MIT licence. The licence is below.
+
+Copyright (c) 1997-date Stuart Langridge
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+the rights to use, copy, modify, merge, publish, distribute, sublicense,
+and/or sell copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following conditions:
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 18 - 29
thirdparty/icu4c/common/brkiter.cpp

@@ -121,11 +121,9 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
 
     // If there is a result, set the valid locale and actual locale, and the kind
     if (U_SUCCESS(status) && result != nullptr) {
-        U_LOCALE_BASED(locBased, *(BreakIterator*)result);
-
-        locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status),
-                              actual.data(), status);
-        LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status);
+        result->actualLocale = Locale(actual.data());
+        result->validLocale = Locale(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status));
+        result->requestLocale = loc;
     }
 
     ures_close(b);
@@ -204,33 +202,28 @@ BreakIterator::getAvailableLocales(int32_t& count)
 //-------------------------------------------
 
 BreakIterator::BreakIterator()
+    : actualLocale(Locale::getRoot()), validLocale(Locale::getRoot()), requestLocale(Locale::getRoot())
 {
 }
 
-BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) {
-    UErrorCode status = U_ZERO_ERROR;
-    U_LOCALE_BASED(locBased, *this);
-    locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
-    LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
-    U_ASSERT(U_SUCCESS(status));
+BreakIterator::BreakIterator(const BreakIterator &other)
+    : UObject(other),
+      actualLocale(other.actualLocale),
+      validLocale(other.validLocale),
+      requestLocale(other.requestLocale) {
 }
 
 BreakIterator &BreakIterator::operator =(const BreakIterator &other) {
     if (this != &other) {
-        UErrorCode status = U_ZERO_ERROR;
-        U_LOCALE_BASED(locBased, *this);
-        locBased.setLocaleIDs(other.validLocale, other.actualLocale, status);
-        LocaleBased::setLocaleID(other.requestLocale, requestLocale, status);
-        U_ASSERT(U_SUCCESS(status));
+        actualLocale = other.actualLocale;
+        validLocale = other.validLocale;
+        requestLocale = other.requestLocale;
     }
     return *this;
 }
 
 BreakIterator::~BreakIterator()
 {
-    delete validLocale;
-    delete actualLocale;
-    delete requestLocale;
 }
 
 // ------------------------------------------
@@ -398,8 +391,8 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu
         // THIS LONG is a sign of bad code -- so the action item is to
         // revisit this in ICU 3.0 and clean it up/fix it/remove it.
         if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) {
-            U_LOCALE_BASED(locBased, *result);
-            locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status);
+            result->actualLocale = actualLoc;
+            result->validLocale = actualLoc;
         }
         return result;
     }
@@ -506,8 +499,7 @@ BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
         return Locale::getRoot();
     }
     if (type == ULOC_REQUESTED_LOCALE) {
-        return requestLocale == nullptr ?
-            Locale::getRoot() : Locale(requestLocale->data());
+        return requestLocale;
     }
     return LocaleBased::getLocale(validLocale, actualLocale, type, status);
 }
@@ -518,7 +510,7 @@ BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
         return nullptr;
     }
     if (type == ULOC_REQUESTED_LOCALE) {
-        return requestLocale == nullptr ?  "" : requestLocale->data();
+        return requestLocale.getName();
     }
     return LocaleBased::getLocaleID(validLocale, actualLocale, type, status);
 }
@@ -546,11 +538,8 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE
     return 1;
 }
 
-BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) {
-  UErrorCode status = U_ZERO_ERROR;
-  U_LOCALE_BASED(locBased, (*this));
-  locBased.setLocaleIDs(valid.getName(), actual.getName(), status);
-  U_ASSERT(U_SUCCESS(status));
+BreakIterator::BreakIterator(const Locale& valid, const Locale& actual)
+    : actualLocale(actual), validLocale(valid), requestLocale(Locale::getRoot()) {
 }
 
 U_NAMESPACE_END

+ 41 - 45
thirdparty/icu4c/common/charstr.h

@@ -21,12 +21,6 @@
 
 U_NAMESPACE_BEGIN
 
-// Windows needs us to DLL-export the MaybeStackArray template specialization,
-// but MacOS X cannot handle it. Same as in digitlst.h.
-#if !U_PLATFORM_IS_DARWIN_BASED
-template class U_COMMON_API MaybeStackArray<char, 40>;
-#endif
-
 /**
  * ICU-internal char * string class.
  * This class does not assume or enforce any particular character encoding.
@@ -38,34 +32,34 @@ template class U_COMMON_API MaybeStackArray<char, 40>;
  * For example:
  *   cs.data()[5]='a';  // no need for setCharAt(5, 'a')
  */
-class U_COMMON_API CharString : public UMemory {
+class U_COMMON_API_CLASS CharString : public UMemory {
 public:
-    CharString() : len(0) { buffer[0]=0; }
-    CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString() : len(0) { buffer[0]=0; }
+    U_COMMON_API CharString(StringPiece s, UErrorCode &errorCode) : len(0) {
         buffer[0]=0;
         append(s, errorCode);
     }
-    CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString(const CharString &s, UErrorCode &errorCode) : len(0) {
         buffer[0]=0;
         append(s, errorCode);
     }
-    CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
+    U_COMMON_API CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) {
         buffer[0]=0;
         append(s, sLength, errorCode);
     }
-    ~CharString() {}
+    U_COMMON_API ~CharString() {}
 
     /**
      * Move constructor; might leave src in an undefined state.
      * This string will have the same contents and state that the source string had.
      */
-    CharString(CharString &&src) noexcept;
+    U_COMMON_API CharString(CharString &&src) noexcept;
     /**
      * Move assignment operator; might leave src in an undefined state.
      * This string will have the same contents and state that the source string had.
      * The behavior is undefined if *this and src are the same object.
      */
-    CharString &operator=(CharString &&src) noexcept;
+    U_COMMON_API CharString &operator=(CharString &&src) noexcept;
 
     /**
      * Replaces this string's contents with the other string's contents.
@@ -73,21 +67,21 @@ public:
      * the assignment operator, to make copies explicit and to
      * use a UErrorCode where memory allocations might be needed.
      */
-    CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
-    CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
+    U_COMMON_API CharString &copyFrom(const CharString &other, UErrorCode &errorCode);
+    U_COMMON_API CharString &copyFrom(StringPiece s, UErrorCode &errorCode);
 
-    UBool isEmpty() const { return len==0; }
-    int32_t length() const { return len; }
-    char operator[](int32_t index) const { return buffer[index]; }
-    StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
+    U_COMMON_API UBool isEmpty() const { return len==0; }
+    U_COMMON_API int32_t length() const { return len; }
+    U_COMMON_API char operator[](int32_t index) const { return buffer[index]; }
+    U_COMMON_API StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); }
 
-    const char *data() const { return buffer.getAlias(); }
-    char *data() { return buffer.getAlias(); }
+    U_COMMON_API const char *data() const { return buffer.getAlias(); }
+    U_COMMON_API char *data() { return buffer.getAlias(); }
     /**
      * Allocates length()+1 chars and copies the NUL-terminated data().
      * The caller must uprv_free() the result.
      */
-    char *cloneData(UErrorCode &errorCode) const;
+    U_COMMON_API char *cloneData(UErrorCode &errorCode) const;
     /**
      * Copies the contents of the string into dest.
      * Checks if there is enough space in dest, extracts the entire string if possible,
@@ -103,40 +97,40 @@ public:
      * @param errorCode ICU error code.
      * @return length()
      */
-    int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
+    U_COMMON_API int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
 
-    bool operator==(const CharString& other) const {
+    U_COMMON_API bool operator==(const CharString& other) const {
         return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
     }
-    bool operator!=(const CharString& other) const {
+    U_COMMON_API bool operator!=(const CharString& other) const {
         return !operator==(other);
     }
 
-    bool operator==(StringPiece other) const {
+    U_COMMON_API bool operator==(StringPiece other) const {
         return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
     }
-    bool operator!=(StringPiece other) const {
+    U_COMMON_API bool operator!=(StringPiece other) const {
         return !operator==(other);
     }
 
     /** @return last index of c, or -1 if c is not in this string */
-    int32_t lastIndexOf(char c) const;
+    U_COMMON_API int32_t lastIndexOf(char c) const;
 
-    bool contains(StringPiece s) const;
+    U_COMMON_API bool contains(StringPiece s) const;
 
-    CharString &clear() { len=0; buffer[0]=0; return *this; }
-    CharString &truncate(int32_t newLength);
+    U_COMMON_API CharString &clear() { len=0; buffer[0]=0; return *this; }
+    U_COMMON_API CharString &truncate(int32_t newLength);
 
-    CharString &append(char c, UErrorCode &errorCode);
-    CharString &append(StringPiece s, UErrorCode &errorCode) {
+    U_COMMON_API CharString &append(char c, UErrorCode &errorCode);
+    U_COMMON_API CharString &append(StringPiece s, UErrorCode &errorCode) {
         return append(s.data(), s.length(), errorCode);
     }
-    CharString &append(const CharString &s, UErrorCode &errorCode) {
+    U_COMMON_API CharString &append(const CharString &s, UErrorCode &errorCode) {
         return append(s.data(), s.length(), errorCode);
     }
-    CharString &append(const char *s, int32_t sLength, UErrorCode &status);
+    U_COMMON_API CharString &append(const char *s, int32_t sLength, UErrorCode &status);
 
-    CharString &appendNumber(int64_t number, UErrorCode &status);
+    U_COMMON_API CharString &appendNumber(int64_t number, UErrorCode &status);
 
     /**
      * Returns a writable buffer for appending and writes the buffer's capacity to
@@ -158,26 +152,28 @@ public:
      * @param errorCode in/out error code
      * @return a buffer with resultCapacity>=min_capacity
      */
-    char *getAppendBuffer(int32_t minCapacity,
-                          int32_t desiredCapacityHint,
-                          int32_t &resultCapacity,
-                          UErrorCode &errorCode);
+    U_COMMON_API char *getAppendBuffer(int32_t minCapacity,
+                                       int32_t desiredCapacityHint,
+                                       int32_t &resultCapacity,
+                                       UErrorCode &errorCode);
 
-    CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
-    CharString &appendInvariantChars(const char16_t* uchars, int32_t ucharsLen, UErrorCode& errorCode);
+    U_COMMON_API CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode);
+    U_COMMON_API CharString &appendInvariantChars(const char16_t* uchars,
+                                                  int32_t ucharsLen,
+                                                  UErrorCode& errorCode);
 
     /**
      * Appends a filename/path part, e.g., a directory name.
      * First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary.
      * Does nothing if s is empty.
      */
-    CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
+    U_COMMON_API CharString &appendPathPart(StringPiece s, UErrorCode &errorCode);
 
     /**
      * Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty
      * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR.
      */
-    CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
+    U_COMMON_API CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode);
 
 private:
     MaybeStackArray<char, 40> buffer;

+ 0 - 4
thirdparty/icu4c/common/cmemory.h

@@ -334,9 +334,7 @@ public:
     // No heap allocation. Use only on the stack.
     static void* U_EXPORT2 operator new(size_t) noexcept = delete;
     static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
-#if U_HAVE_PLACEMENT_NEW
     static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
-#endif
 
     /**
      * Default constructor initializes with internal T[stackCapacity] buffer.
@@ -570,9 +568,7 @@ public:
     // No heap allocation. Use only on the stack.
     static void* U_EXPORT2 operator new(size_t) noexcept = delete;
     static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
-#if U_HAVE_PLACEMENT_NEW
     static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
-#endif
 
     /**
      * Default constructor initializes with internal H+T[stackCapacity] buffer.

+ 4 - 4
thirdparty/icu4c/common/cstr.h

@@ -43,11 +43,11 @@
 
 U_NAMESPACE_BEGIN
 
-class U_COMMON_API CStr : public UMemory {
+class U_COMMON_API_CLASS CStr : public UMemory {
   public:
-    CStr(const UnicodeString &in);
-    ~CStr();
-    const char * operator ()() const;
+    U_COMMON_API CStr(const UnicodeString &in);
+    U_COMMON_API ~CStr();
+    U_COMMON_API const char * operator ()() const;
 
   private:
     CharString s;

+ 29 - 0
thirdparty/icu4c/common/fixedstring.cpp

@@ -0,0 +1,29 @@
+// © 2025 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+#include "fixedstring.h"
+
+#include "unicode/unistr.h"
+#include "unicode/utypes.h"
+
+U_NAMESPACE_BEGIN
+
+U_EXPORT void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
+    if (src.isEmpty()) {
+        dst.clear();
+        return;
+    }
+
+    int32_t length = src.length();
+    if (!dst.reserve(length + 1)) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return;
+    }
+    src.extract(0, length, dst.getAlias(), length + 1, US_INV);
+}
+
+U_NAMESPACE_END

+ 104 - 0
thirdparty/icu4c/common/fixedstring.h

@@ -0,0 +1,104 @@
+// © 2025 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+#ifndef FIXEDSTRING_H
+#define FIXEDSTRING_H
+
+#include <string_view>
+#include <utility>
+
+#include "unicode/uobject.h"
+#include "unicode/utypes.h"
+#include "cmemory.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * ICU-internal fixed-length char* string class.
+ * This is a complement to CharString to store fixed-length strings efficiently
+ * (not allocating any unnecessary storage for future additions to the string).
+ *
+ * A terminating NUL is always stored, but the length of the string isn't.
+ * An empty string is stored as nullptr, allocating no storage at all.
+ *
+ * This class wants to be convenient but is also deliberately minimalist.
+ * Please do not add methods if they only add minor convenience.
+ */
+class FixedString : public UMemory {
+public:
+    FixedString() = default;
+    ~FixedString() { operator delete[](ptr); }
+
+    FixedString(const FixedString& other) : FixedString(other.data()) {}
+
+    FixedString(std::string_view init) {
+        size_t size = init.size();
+        if (size > 0 && reserve(size + 1)) {
+            uprv_memcpy(ptr, init.data(), size);
+            ptr[size] = '\0';
+        }
+    }
+
+    FixedString& operator=(const FixedString& other) {
+        *this = other.data();
+        return *this;
+    }
+
+    FixedString& operator=(std::string_view init) {
+        if (init.empty()) {
+            operator delete[](ptr);
+            ptr = nullptr;
+        } else {
+            size_t size = init.size();
+            if (reserve(size + 1)) {
+                uprv_memcpy(ptr, init.data(), size);
+                ptr[size] = '\0';
+            }
+        }
+        return *this;
+    }
+
+    FixedString(FixedString&& other) noexcept : ptr(std::exchange(other.ptr, nullptr)) {}
+
+    FixedString& operator=(FixedString&& other) noexcept {
+        operator delete[](ptr);
+        ptr = other.ptr;
+        other.ptr = nullptr;
+        return *this;
+    }
+
+    void clear() {
+        operator delete[](ptr);
+        ptr = nullptr;
+    }
+
+    const char* data() const {
+        return isEmpty() ? "" : ptr;
+    }
+
+    char* getAlias() {
+        return ptr;
+    }
+
+    bool isEmpty() const {
+        return ptr == nullptr;
+    }
+
+    /** Allocate storage for a new string, without initializing it. */
+    bool reserve(size_t size) {
+        operator delete[](ptr);
+        ptr = static_cast<char*>(operator new[](size));
+        return ptr != nullptr;
+    }
+
+private:
+    char* ptr = nullptr;
+};
+
+U_COMMON_API void copyInvariantChars(const UnicodeString& src, FixedString& dst, UErrorCode& status);
+
+U_NAMESPACE_END
+
+#endif

+ 5 - 5
thirdparty/icu4c/common/localebuilder.cpp

@@ -8,6 +8,7 @@
 #include "bytesinkutil.h"  // StringByteSink<CharString>
 #include "charstr.h"
 #include "cstring.h"
+#include "fixedstring.h"
 #include "ulocimp.h"
 #include "unicode/localebuilder.h"
 #include "unicode/locid.h"
@@ -131,14 +132,13 @@ LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
         variant_ = nullptr;
         return *this;
     }
-    CharString* new_variant = new CharString(variant, status_);
-    if (U_FAILURE(status_)) { return *this; }
-    if (new_variant == nullptr) {
+    FixedString* new_variant = new FixedString(variant);
+    if (new_variant == nullptr || new_variant->isEmpty()) {
         status_ = U_MEMORY_ALLOCATION_ERROR;
         return *this;
     }
-    transform(new_variant->data(), new_variant->length());
-    if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
+    transform(new_variant->getAlias(), variant.length());
+    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
         delete new_variant;
         status_ = U_ILLEGAL_ARGUMENT_ERROR;
         return *this;

File diff suppressed because it is too large
+ 1153 - 1131
thirdparty/icu4c/common/localefallback_data.h


+ 12 - 61
thirdparty/icu4c/common/locbased.cpp

@@ -11,85 +11,36 @@
 **********************************************************************
 */
 #include "locbased.h"
-#include "cstring.h"
-#include "charstr.h"
+#include "uresimp.h"
 
 U_NAMESPACE_BEGIN
 
-Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual,
-                              ULocDataLocaleType type, UErrorCode& status) {
-    const char* id = getLocaleID(valid, actual, type, status);
-    return Locale(id != nullptr ? id : "");
-}
-
-const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual,
+const Locale& LocaleBased::getLocale(const Locale& valid, const Locale& actual,
                                      ULocDataLocaleType type, UErrorCode& status) {
     if (U_FAILURE(status)) {
-        return nullptr;
+        return Locale::getRoot();
     }
 
     switch(type) {
     case ULOC_VALID_LOCALE:
-        return valid == nullptr ? "" : valid->data();
+        return valid;
     case ULOC_ACTUAL_LOCALE:
-        return actual == nullptr ? "" : actual->data();
+        return actual;
     default:
         status = U_ILLEGAL_ARGUMENT_ERROR;
-        return nullptr;
+        return Locale::getRoot();
     }
 }
 
-void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) {
-    setValidLocaleID(validID, status);
-    setActualLocaleID(actualID,status);
-}
-void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) {
-    setValidLocaleID(validID, status);
-    setActualLocaleID(actualID,status);
-}
-
-void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) {
-    if (U_FAILURE(status)) { return; }
-    if (id == nullptr || *id == 0) {
-        delete dest;
-        dest = nullptr;
-    } else {
-        if (dest == nullptr) {
-            dest = new CharString(id, status);
-            if (dest == nullptr) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return;
-            }
-        } else {
-            dest->copyFrom(id, status);
-        }
-    }
-}
+const char* LocaleBased::getLocaleID(const Locale& valid, const Locale& actual,
+                                     ULocDataLocaleType type, UErrorCode& status) {
+    const Locale& locale = getLocale(valid, actual, type, status);
 
-void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) {
-    if (U_FAILURE(status)) { return; }
-    if (id == nullptr || id->isEmpty()) {
-        delete dest;
-        dest = nullptr;
-    } else {
-        if (dest == nullptr) {
-            dest = new CharString(*id, status);
-            if (dest == nullptr) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return;
-            }
-        } else {
-            dest->copyFrom(*id, status);
-        }
+    if (U_FAILURE(status)) {
+        return nullptr;
     }
-}
 
-bool LocaleBased::equalIDs(const CharString* left, const CharString* right) {
-    // true if both are nullptr
-    if (left == nullptr && right == nullptr) return true;
-    // false if only one is nullptr
-    if (left == nullptr || right == nullptr) return false;
-    return *left == *right;
+    return locale == Locale::getRoot() ? kRootLocaleName : locale.getName();
 }
 
 U_NAMESPACE_END

+ 3 - 58
thirdparty/icu4c/common/locbased.h

@@ -16,17 +16,8 @@
 #include "unicode/locid.h"
 #include "unicode/uobject.h"
 
-/**
- * Macro to declare a locale LocaleBased wrapper object for the given
- * object, which must have two members named `validLocale' and
- * `actualLocale' of which are pointers to the internal icu::CharString.
- */
-#define U_LOCALE_BASED(varname, objname) \
-  LocaleBased varname((objname).validLocale, (objname).actualLocale)
-
 U_NAMESPACE_BEGIN
 
-class CharString;
 /**
  * A utility class that unifies the implementation of getLocale() by
  * various ICU services.  This class is likely to be removed in the
@@ -38,12 +29,6 @@ class U_COMMON_API LocaleBased : public UMemory {
 
  public:
 
-    /**
-     * Construct a LocaleBased wrapper around the two pointers.  These
-     * will be aliased for the lifetime of this object.
-     */
-    inline LocaleBased(CharString*& validAlias, CharString*& actualAlias);
-
     /**
      * Return locale meta-data for the service object wrapped by this
      * object.  Either the valid or the actual locale may be
@@ -54,8 +39,8 @@ class U_COMMON_API LocaleBased : public UMemory {
      * @param status input-output error code
      * @return the indicated locale
      */
-    static Locale getLocale(
-        const CharString* valid, const CharString* actual,
+    static const Locale& getLocale(
+        const Locale& valid, const Locale& actual,
         ULocDataLocaleType type, UErrorCode& status);
 
     /**
@@ -69,51 +54,11 @@ class U_COMMON_API LocaleBased : public UMemory {
      * @return the indicated locale ID
      */
     static const char* getLocaleID(
-        const CharString* valid, const CharString* actual,
+        const Locale& valid, const Locale& actual,
         ULocDataLocaleType type, UErrorCode& status);
 
-    /**
-     * Set the locale meta-data for the service object wrapped by this
-     * object.  If either parameter is zero, it is ignored.
-     * @param valid the ID of the valid locale
-     * @param actual the ID of the actual locale
-     */
-    void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status);
-    void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status);
-
-    static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status);
-    static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status);
-
-    static bool equalIDs(const CharString* left, const CharString* right);
-
- private:
-
-    void setValidLocaleID(const CharString* id, UErrorCode& status);
-    void setActualLocaleID(const CharString* id, UErrorCode& status);
-    void setValidLocaleID(const char* id, UErrorCode& status);
-    void setActualLocaleID(const char* id, UErrorCode& status);
-
-    CharString*& valid;
-    CharString*& actual;
 };
 
-inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) :
-    valid(validAlias), actual(actualAlias) {
-}
-
-inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) {
-    setLocaleID(id, valid, status);
-}
-inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) {
-    setLocaleID(id, actual, status);
-}
-inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) {
-    setLocaleID(id, valid, status);
-}
-inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) {
-    setLocaleID(id, actual, status);
-}
-
 U_NAMESPACE_END
 
 #endif

+ 10 - 10
thirdparty/icu4c/common/locdispnames.cpp

@@ -66,7 +66,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
         return result;
     }
 
-    length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+    length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
                                    buffer, result.getCapacity(),
                                    &errorCode);
     result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -78,7 +78,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
             return result;
         }
         errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayLanguage(fullName, displayLocale.fullName,
+        length=uloc_getDisplayLanguage(getName(), displayLocale.getName(),
                                        buffer, result.getCapacity(),
                                        &errorCode);
         result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -106,7 +106,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
         return result;
     }
 
-    length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+    length=uloc_getDisplayScript(getName(), displayLocale.getName(),
                                   buffer, result.getCapacity(),
                                   &errorCode);
     result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -118,7 +118,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
             return result;
         }
         errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayScript(fullName, displayLocale.fullName,
+        length=uloc_getDisplayScript(getName(), displayLocale.getName(),
                                       buffer, result.getCapacity(),
                                       &errorCode);
         result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -146,7 +146,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
         return result;
     }
 
-    length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+    length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
                                   buffer, result.getCapacity(),
                                   &errorCode);
     result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -158,7 +158,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
             return result;
         }
         errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayCountry(fullName, displayLocale.fullName,
+        length=uloc_getDisplayCountry(getName(), displayLocale.getName(),
                                       buffer, result.getCapacity(),
                                       &errorCode);
         result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -186,7 +186,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
         return result;
     }
 
-    length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+    length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
                                   buffer, result.getCapacity(),
                                   &errorCode);
     result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -198,7 +198,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
             return result;
         }
         errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayVariant(fullName, displayLocale.fullName,
+        length=uloc_getDisplayVariant(getName(), displayLocale.getName(),
                                       buffer, result.getCapacity(),
                                       &errorCode);
         result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -226,7 +226,7 @@ Locale::getDisplayName(const Locale &displayLocale,
         return result;
     }
 
-    length=uloc_getDisplayName(fullName, displayLocale.fullName,
+    length=uloc_getDisplayName(getName(), displayLocale.getName(),
                                buffer, result.getCapacity(),
                                &errorCode);
     result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
@@ -238,7 +238,7 @@ Locale::getDisplayName(const Locale &displayLocale,
             return result;
         }
         errorCode=U_ZERO_ERROR;
-        length=uloc_getDisplayName(fullName, displayLocale.fullName,
+        length=uloc_getDisplayName(getName(), displayLocale.getName(),
                                    buffer, result.getCapacity(),
                                    &errorCode);
         result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

+ 411 - 243
thirdparty/icu4c/common/locid.cpp

@@ -31,13 +31,16 @@
 ******************************************************************************
 */
 
+#include <cstddef>
 #include <optional>
 #include <string_view>
+#include <type_traits>
 #include <utility>
 
 #include "unicode/bytestream.h"
 #include "unicode/locid.h"
 #include "unicode/localebuilder.h"
+#include "unicode/localpointer.h"
 #include "unicode/strenum.h"
 #include "unicode/stringpiece.h"
 #include "unicode/uloc.h"
@@ -48,6 +51,7 @@
 #include "charstrmap.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "fixedstring.h"
 #include "mutex.h"
 #include "putilimp.h"
 #include "uassert.h"
@@ -232,9 +236,214 @@ locale_get_default()
     return Locale::getDefault().getName();
 }
 
+namespace {
+
+template <auto FIELD, typename T>
+void copyToArray(std::string_view sv, T* that) {
+    auto& field = that->*FIELD;
+    constexpr size_t capacity = std::extent_v<std::remove_reference_t<decltype(field)>>;
+    static_assert(capacity > 0);
+    if (!sv.empty()) {
+        U_ASSERT(sv.size() < capacity);
+        uprv_memcpy(field, sv.data(), sv.size());
+    }
+    field[sv.size()] = '\0';
+}
+
+} // namespace
 
 U_NAMESPACE_BEGIN
 
+void Locale::Nest::init(std::string_view language,
+                        std::string_view script,
+                        std::string_view region,
+                        uint8_t variantBegin) {
+    copyToArray<&Nest::language>(language, this);
+    copyToArray<&Nest::script>(script, this);
+    copyToArray<&Nest::region>(region, this);
+    this->variantBegin = variantBegin;
+}
+
+Locale::Nest::Nest(Heap&& heap, uint8_t variantBegin) {
+    // When moving from Heap to Nest the language field can be left untouched
+    // (as it has the same offset in both) and only the script and region fields
+    // need to be copied to their new locations, which is safe to do because the
+    // new locations come before the old locations in memory and don't overlap.
+    static_assert(offsetof(Nest, region) <= offsetof(Heap, script));
+    static_assert(offsetof(Nest, variantBegin) <= offsetof(Heap, region));
+    U_ASSERT(this == reinterpret_cast<Nest*>(&heap));
+    copyToArray<&Nest::script>(heap.script, this);
+    copyToArray<&Nest::region>(heap.region, this);
+    this->variantBegin = variantBegin;
+    *this->baseName = '\0';
+}
+
+struct Locale::Heap::Alloc : public UMemory {
+    FixedString fullName;
+    FixedString baseName;
+    int32_t variantBegin;
+
+    const char* getVariant() const { return variantBegin == 0 ? "" : getBaseName() + variantBegin; }
+    const char* getFullName() const { return fullName.data(); }
+    const char* getBaseName() const {
+        if (baseName.isEmpty()) {
+            if (const char* name = fullName.data(); *name != '@') {
+                return name;
+            }
+        }
+        return baseName.data();
+    }
+
+    Alloc(int32_t variantBegin) : fullName(), baseName(), variantBegin(variantBegin) {}
+
+    Alloc(const Alloc& other, UErrorCode& status)
+        : fullName(), baseName(), variantBegin(other.variantBegin) {
+        if (U_SUCCESS(status)) {
+            if (!other.fullName.isEmpty()) {
+                fullName = other.fullName;
+                if (fullName.isEmpty()) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                } else {
+                    if (!other.baseName.isEmpty()) {
+                        baseName = other.baseName;
+                        if (baseName.isEmpty()) {
+                            status = U_MEMORY_ALLOCATION_ERROR;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    // Move should be done on the owner of the pointer to this object.
+    Alloc(Alloc&&) noexcept = delete;
+
+    ~Alloc() = default;
+};
+
+const char* Locale::Heap::getVariant() const { return ptr->getVariant(); }
+const char* Locale::Heap::getFullName() const { return ptr->getFullName(); }
+const char* Locale::Heap::getBaseName() const { return ptr->getBaseName(); }
+
+Locale::Heap::Heap(std::string_view language,
+                   std::string_view script,
+                   std::string_view region,
+                   int32_t variantBegin) {
+    ptr = new Alloc(variantBegin);
+    if (ptr == nullptr) {
+        type = eBOGUS;
+    } else {
+        type = eHEAP;
+        copyToArray<&Heap::language>(language, this);
+        copyToArray<&Heap::script>(script, this);
+        copyToArray<&Heap::region>(region, this);
+    }
+}
+
+Locale::Heap::~Heap() {
+    U_ASSERT(type == eHEAP);
+    delete ptr;
+}
+
+Locale::Heap& Locale::Heap::operator=(const Heap& other) {
+    U_ASSERT(type == eBOGUS);
+    UErrorCode status = U_ZERO_ERROR;
+    ptr = new Alloc(*other.ptr, status);
+    if (ptr == nullptr || U_FAILURE(status)) {
+        delete ptr;
+    } else {
+        type = eHEAP;
+        uprv_memcpy(language, other.language, sizeof language);
+        uprv_memcpy(script, other.script, sizeof script);
+        uprv_memcpy(region, other.region, sizeof region);
+    }
+    return *this;
+}
+
+Locale::Heap& Locale::Heap::operator=(Heap&& other) noexcept {
+    U_ASSERT(type == eBOGUS);
+    ptr = other.ptr;
+    type = eHEAP;
+    other.type = eBOGUS;
+    uprv_memcpy(language, other.language, sizeof language);
+    uprv_memcpy(script, other.script, sizeof script);
+    uprv_memcpy(region, other.region, sizeof region);
+    return *this;
+}
+
+template <typename BogusFn, typename NestFn, typename HeapFn, typename... Args>
+auto Locale::Payload::visit(BogusFn bogusFn, NestFn nestFn, HeapFn heapFn, Args... args) const {
+    switch (type) {
+        case eBOGUS:
+            return bogusFn(args...);
+        case eNEST:
+            return nestFn(nest, args...);
+        case eHEAP:
+            return heapFn(heap, args...);
+        default:
+            UPRV_UNREACHABLE_EXIT;
+    };
+}
+
+void Locale::Payload::copy(const Payload& other) {
+    other.visit([](Payload*) {},
+                [](const Nest& nest, Payload* dst) { dst->nest = nest; },
+                [](const Heap& heap, Payload* dst) { dst->heap = heap; },
+                this);
+}
+
+void Locale::Payload::move(Payload&& other) noexcept {
+    other.visit(
+        [](Payload*) {},
+        [](const Nest& nest, Payload* dst) { dst->nest = nest; },
+        [](const Heap& heap, Payload* dst) { dst->heap = std::move(const_cast<Heap&>(heap)); },
+        this);
+}
+
+Locale::Payload::~Payload() {
+    if (type == eHEAP) { heap.~Heap(); }
+}
+
+Locale::Payload::Payload(const Payload& other) : type{eBOGUS} { copy(other); }
+Locale::Payload::Payload(Payload&& other) noexcept : type{eBOGUS} { move(std::move(other)); }
+
+Locale::Payload& Locale::Payload::operator=(const Payload& other) {
+    if (this != &other) {
+        setToBogus();
+        copy(other);
+    }
+    return *this;
+}
+
+Locale::Payload& Locale::Payload::operator=(Payload&& other) noexcept {
+    if (this != &other) {
+        setToBogus();
+        move(std::move(other));
+    }
+    return *this;
+}
+
+void Locale::Payload::setToBogus() {
+    this->~Payload();
+    type = eBOGUS;
+}
+
+template <typename T, typename... Args> T& Locale::Payload::emplace(Args&&... args) {
+    if constexpr (std::is_same_v<T, Nest>) {
+        this->~Payload();
+        ::new (&nest) Nest(std::forward<Args>(args)...);
+        return nest;
+    }
+    if constexpr (std::is_same_v<T, Heap>) {
+        U_ASSERT(type != eHEAP);
+        ::new (&heap) Heap(std::forward<Args>(args)...);
+        return heap;
+    }
+}
+
+template <> Locale::Nest* Locale::Payload::get() { return type == eNEST ? &nest : nullptr; }
+template <> Locale::Heap* Locale::Payload::get() { return type == eHEAP ? &heap : nullptr; }
+
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
 
 /*Character separating the posix id fields*/
@@ -243,22 +452,10 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
 #define SEP_CHAR '_'
 #define NULL_CHAR '\0'
 
-Locale::~Locale()
-{
-    if ((baseName != fullName) && (baseName != fullNameBuffer)) {
-        uprv_free(baseName);
-    }
-    baseName = nullptr;
-    /*if fullName is on the heap, we free it*/
-    if (fullName != fullNameBuffer)
-    {
-        uprv_free(fullName);
-        fullName = nullptr;
-    }
-}
+Locale::~Locale() = default;
 
 Locale::Locale()
-    : UObject(), fullName(fullNameBuffer), baseName(nullptr)
+    : UObject(), payload()
 {
     init(nullptr, false);
 }
@@ -269,9 +466,8 @@ Locale::Locale()
  *   the default locale.)
  */
 Locale::Locale(Locale::ELocaleType)
-    : UObject(), fullName(fullNameBuffer), baseName(nullptr)
+    : UObject(), payload()
 {
-    setToBogus();
 }
 
 
@@ -279,7 +475,7 @@ Locale::Locale( const   char * newLanguage,
                 const   char * newCountry,
                 const   char * newVariant,
                 const   char * newKeywords)
-    : UObject(), fullName(fullNameBuffer), baseName(nullptr)
+    : UObject(), payload()
 {
     if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
     {
@@ -300,7 +496,6 @@ Locale::Locale( const   char * newLanguage,
         {
             lsize = static_cast<int32_t>(uprv_strlen(newLanguage));
             if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
-                setToBogus();
                 return;
             }
         }
@@ -312,7 +507,6 @@ Locale::Locale( const   char * newLanguage,
         {
             csize = static_cast<int32_t>(uprv_strlen(newCountry));
             if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
-                setToBogus();
                 return;
             }
         }
@@ -329,7 +523,6 @@ Locale::Locale( const   char * newLanguage,
             // remove trailing _'s
             vsize = static_cast<int32_t>(uprv_strlen(newVariant));
             if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
-                setToBogus();
                 return;
             }
             while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
@@ -342,7 +535,6 @@ Locale::Locale( const   char * newLanguage,
         {
             ksize = static_cast<int32_t>(uprv_strlen(newKeywords));
             if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
-              setToBogus();
               return;
             }
         }
@@ -383,7 +575,6 @@ Locale::Locale( const   char * newLanguage,
 
         if (U_FAILURE(status)) {
             // Something went wrong with appending, etc.
-            setToBogus();
             return;
         }
         // Parse it, because for example 'language' might really be a complete
@@ -392,82 +583,11 @@ Locale::Locale( const   char * newLanguage,
     }
 }
 
-Locale::Locale(const Locale &other)
-    : UObject(other), fullName(fullNameBuffer), baseName(nullptr)
-{
-    *this = other;
-}
-
-Locale::Locale(Locale&& other) noexcept
-    : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
-  *this = std::move(other);
-}
-
-Locale& Locale::operator=(const Locale& other) {
-    if (this == &other) {
-        return *this;
-    }
-
-    setToBogus();
-
-    if (other.fullName == other.fullNameBuffer) {
-        uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
-    } else if (other.fullName == nullptr) {
-        fullName = nullptr;
-    } else {
-        fullName = uprv_strdup(other.fullName);
-        if (fullName == nullptr) return *this;
-    }
-
-    if (other.baseName == other.fullName) {
-        baseName = fullName;
-    } else if (other.baseName != nullptr) {
-        baseName = uprv_strdup(other.baseName);
-        if (baseName == nullptr) return *this;
-    }
-
-    uprv_strcpy(language, other.language);
-    uprv_strcpy(script, other.script);
-    uprv_strcpy(country, other.country);
-
-    variantBegin = other.variantBegin;
-    fIsBogus = other.fIsBogus;
-
-    return *this;
-}
-
-Locale& Locale::operator=(Locale&& other) noexcept {
-    if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
-    if (fullName != fullNameBuffer) uprv_free(fullName);
+Locale::Locale(const Locale&) = default;
+Locale::Locale(Locale&&) noexcept = default;
 
-    if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
-        uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
-    }
-    if (other.fullName == other.fullNameBuffer) {
-        fullName = fullNameBuffer;
-    } else {
-        fullName = other.fullName;
-    }
-
-    if (other.baseName == other.fullNameBuffer) {
-        baseName = fullNameBuffer;
-    } else if (other.baseName == other.fullName) {
-        baseName = fullName;
-    } else {
-        baseName = other.baseName;
-    }
-
-    uprv_strcpy(language, other.language);
-    uprv_strcpy(script, other.script);
-    uprv_strcpy(country, other.country);
-
-    variantBegin = other.variantBegin;
-    fIsBogus = other.fIsBogus;
-
-    other.baseName = other.fullName = other.fullNameBuffer;
-
-    return *this;
-}
+Locale& Locale::operator=(const Locale&) = default;
+Locale& Locale::operator=(Locale&&) noexcept = default;
 
 Locale *
 Locale::clone() const {
@@ -477,7 +597,7 @@ Locale::clone() const {
 bool
 Locale::operator==( const   Locale& other) const
 {
-    return (uprv_strcmp(other.fullName, fullName) == 0);
+    return uprv_strcmp(other.getName(), getName()) == 0;
 }
 
 namespace {
@@ -1073,7 +1193,7 @@ public:
     }
 
     // Check the fields inside locale, if need to replace fields,
-    // place the the replaced locale ID in out and return true.
+    // place the replaced locale ID in out and return true.
     // Otherwise return false for no replacement or error.
     bool replace(
         const Locale& locale, CharString& out, UErrorCode& status);
@@ -1836,16 +1956,8 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
 /*This function initializes a Locale from a C locale ID*/
 Locale& Locale::init(StringPiece localeID, UBool canonicalize)
 {
-    fIsBogus = false;
     /* Free our current storage */
-    if ((baseName != fullName) && (baseName != fullNameBuffer)) {
-        uprv_free(baseName);
-    }
-    baseName = nullptr;
-    if(fullName != fullNameBuffer) {
-        uprv_free(fullName);
-        fullName = fullNameBuffer;
-    }
+    Nest& nest = payload.emplace<Nest>();
 
     // not a loop:
     // just an easy way to have a common error-exit
@@ -1859,9 +1971,6 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
         int32_t length;
         UErrorCode err;
 
-        /* preset all fields to empty */
-        language[0] = script[0] = country[0] = 0;
-
         const auto parse = [canonicalize](std::string_view localeID,
                                           char* name,
                                           int32_t nameCapacity,
@@ -1879,17 +1988,17 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
         };
 
         // "canonicalize" the locale ID to ICU/Java format
+        char* fullName = nest.baseName;
         err = U_ZERO_ERROR;
-        length = parse(localeID, fullName, sizeof fullNameBuffer, err);
+        length = parse(localeID, fullName, sizeof Nest::baseName, err);
 
-        if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
-            U_ASSERT(baseName == nullptr);
+        FixedString fullNameBuffer;
+        if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof Nest::baseName)) {
             /*Go to heap for the fullName if necessary*/
-            char* newFullName = static_cast<char*>(uprv_malloc(sizeof(char) * (length + 1)));
-            if (newFullName == nullptr) {
+            if (!fullNameBuffer.reserve(length + 1)) {
                 break; // error: out of memory
             }
-            fullName = newFullName;
+            fullName = fullNameBuffer.getAlias();
             err = U_ZERO_ERROR;
             length = parse(localeID, fullName, length + 1, err);
         }
@@ -1898,7 +2007,10 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
             break;
         }
 
-        variantBegin = length;
+        std::string_view language;
+        std::string_view script;
+        std::string_view region;
+        int32_t variantBegin = length;
 
         /* after uloc_getName/canonicalize() we know that only '_' are separators */
         /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
@@ -1923,8 +2035,9 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
         } else {
             fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
         }
+        bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr;
 
-        if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
+        if (fieldLen[0] >= ULOC_LANG_CAPACITY)
         {
             break; // error: the language field is too long
         }
@@ -1932,22 +2045,19 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
         variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
         if (fieldLen[0] > 0) {
             /* We have a language */
-            uprv_memcpy(language, fullName, fieldLen[0]);
-            language[fieldLen[0]] = 0;
+            language = {fullName, static_cast<std::string_view::size_type>(fieldLen[0])};
         }
         if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
                 uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
                 uprv_isASCIILetter(field[1][3])) {
             /* We have at least a script */
-            uprv_memcpy(script, field[1], fieldLen[1]);
-            script[fieldLen[1]] = 0;
+            script = {field[1], static_cast<std::string_view::size_type>(fieldLen[1])};
             variantField++;
         }
 
         if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
             /* We have a country */
-            uprv_memcpy(country, field[variantField], fieldLen[variantField]);
-            country[fieldLen[variantField]] = 0;
+            region = {field[variantField], static_cast<std::string_view::size_type>(fieldLen[variantField])};
             variantField++;
         } else if (fieldLen[variantField] == 0) {
             variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
@@ -1956,16 +2066,52 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
         if (fieldLen[variantField] > 0) {
             /* We have a variant */
             variantBegin = static_cast<int32_t>(field[variantField] - fullName);
+        } else if (hasKeywords) {
+            // The original computation of variantBegin leaves it equal to the length
+            // of fullName if there is no variant.  It should instead be
+            // the length of the baseName.
+            variantBegin = static_cast<int32_t>(at - fullName);
         }
 
-        err = U_ZERO_ERROR;
-        initBaseName(err);
-        if (U_FAILURE(err)) {
-            break;
+        if (!hasKeywords && Nest::fits(length, language, script, region)) {
+            U_ASSERT(fullName == nest.baseName);
+            U_ASSERT(fullNameBuffer.isEmpty());
+            nest.init(language, script, region, variantBegin);
+        } else {
+            if (fullName == nest.baseName) {
+                U_ASSERT(fullNameBuffer.isEmpty());
+                fullNameBuffer = {fullName, static_cast<std::string_view::size_type>(length)};
+                if (fullNameBuffer.isEmpty()) {
+                    break; // error: out of memory
+                }
+                if (!language.empty()) {
+                    language = {fullNameBuffer.data(), language.size()};
+                }
+                if (!script.empty()) {
+                    script = {fullNameBuffer.data() + (script.data() - fullName), script.size()};
+                }
+                if (!region.empty()) {
+                    region = {fullNameBuffer.data() + (region.data() - fullName), region.size()};
+                }
+            }
+            Heap& heap = payload.emplace<Heap>(language, script, region, variantBegin);
+            if (isBogus()) {
+                break; // error: out of memory
+            }
+            U_ASSERT(!fullNameBuffer.isEmpty());
+            heap.ptr->fullName = std::move(fullNameBuffer);
+            if (hasKeywords) {
+                if (std::string_view::size_type baseNameLength = at - fullName; baseNameLength > 0) {
+                    heap.ptr->baseName = {heap.ptr->fullName.data(), baseNameLength};
+                    if (heap.ptr->baseName.isEmpty()) {
+                        break; // error: out of memory
+                    }
+                }
+            }
         }
 
         if (canonicalize) {
-            if (!isKnownCanonicalizedLocale(fullName, err)) {
+            if (!isKnownCanonicalizedLocale(getName(), err)) {
                 CharString replaced;
                 // Not sure it is already canonicalized
                 if (canonicalizeLocale(*this, replaced, err)) {
@@ -1989,67 +2135,16 @@ Locale& Locale::init(StringPiece localeID, UBool canonicalize)
     return *this;
 }
 
-/*
- * Set up the base name.
- * If there are no key words, it's exactly the full name.
- * If key words exist, it's the full name truncated at the '@' character.
- * Need to set up both at init() and after setting a keyword.
- */
-void
-Locale::initBaseName(UErrorCode &status) {
-    if (U_FAILURE(status)) {
-        return;
-    }
-    U_ASSERT(baseName==nullptr || baseName==fullName);
-    const char *atPtr = uprv_strchr(fullName, '@');
-    const char *eqPtr = uprv_strchr(fullName, '=');
-    if (atPtr && eqPtr && atPtr < eqPtr) {
-        // Key words exist.
-        int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
-        char* newBaseName = static_cast<char*>(uprv_malloc(baseNameLength + 1));
-        if (newBaseName == nullptr) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return;
-        }
-        baseName = newBaseName;
-        uprv_strncpy(baseName, fullName, baseNameLength);
-        baseName[baseNameLength] = 0;
-
-        // The original computation of variantBegin leaves it equal to the length
-        // of fullName if there is no variant.  It should instead be
-        // the length of the baseName.
-        if (variantBegin > baseNameLength) {
-            variantBegin = baseNameLength;
-        }
-    } else {
-        baseName = fullName;
-    }
-}
-
-
 int32_t
 Locale::hashCode() const
 {
-    return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
+    return ustr_hashCharsN(getName(), static_cast<int32_t>(uprv_strlen(getName())));
 }
 
 void
 Locale::setToBogus() {
     /* Free our current storage */
-    if((baseName != fullName) && (baseName != fullNameBuffer)) {
-        uprv_free(baseName);
-    }
-    baseName = nullptr;
-    if(fullName != fullNameBuffer) {
-        uprv_free(fullName);
-        fullName = fullNameBuffer;
-    }
-    *fullNameBuffer = 0;
-    *language = 0;
-    *script = 0;
-    *country = 0;
-    fIsBogus = true;
-    variantBegin = 0;
+    payload.setToBogus();
 }
 
 const Locale& U_EXPORT2
@@ -2088,9 +2183,12 @@ Locale::addLikelySubtags(UErrorCode& status) {
         return;
     }
 
-    CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
+    CharString maximizedLocaleID = ulocimp_addLikelySubtags(getName(), status);
 
     if (U_FAILURE(status)) {
+        if (status == U_MEMORY_ALLOCATION_ERROR) {
+            setToBogus();
+        }
         return;
     }
 
@@ -2110,9 +2208,12 @@ Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
         return;
     }
 
-    CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
+    CharString minimizedLocaleID = ulocimp_minimizeSubtags(getName(), favorScript, status);
 
     if (U_FAILURE(status)) {
+        if (status == U_MEMORY_ALLOCATION_ERROR) {
+            setToBogus();
+        }
         return;
     }
 
@@ -2131,8 +2232,11 @@ Locale::canonicalize(UErrorCode& status) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
-    CharString uncanonicalized(fullName, status);
+    CharString uncanonicalized(getName(), status);
     if (U_FAILURE(status)) {
+        if (status == U_MEMORY_ALLOCATION_ERROR) {
+            setToBogus();
+        }
         return;
     }
     init(uncanonicalized.data(), /*canonicalize=*/true);
@@ -2191,12 +2295,12 @@ Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
         return;
     }
 
-    if (fIsBogus) {
+    if (isBogus()) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 
-    ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status);
+    ulocimp_toLanguageTag(getName(), sink, /*strict=*/false, status);
 }
 
 Locale U_EXPORT2
@@ -2229,14 +2333,14 @@ Locale::createCanonical(const char* name) {
 const char *
 Locale::getISO3Language() const
 {
-    return uloc_getISO3Language(fullName);
+    return uloc_getISO3Language(getName());
 }
 
 
 const char *
 Locale::getISO3Country() const
 {
-    return uloc_getISO3Country(fullName);
+    return uloc_getISO3Country(getName());
 }
 
 /**
@@ -2249,7 +2353,7 @@ Locale::getISO3Country() const
 uint32_t
 Locale::getLCID() const
 {
-    return uloc_getLCID(fullName);
+    return uloc_getLCID(getName());
 }
 
 const char* const* U_EXPORT2 Locale::getISOCountries()
@@ -2428,8 +2532,9 @@ Locale::getLocaleCache()
 
 class KeywordEnumeration : public StringEnumeration {
 protected:
-    CharString keywords;
+    FixedString keywords;
 private:
+    int32_t length;
     const char *current;
     static const char fgClassID;
 
@@ -2438,13 +2543,17 @@ public:
     virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
 public:
     KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
-        : keywords(), current(keywords.data()) {
+        : keywords(), length(keywordLen), current(nullptr) {
         if(U_SUCCESS(status) && keywordLen != 0) {
             if(keys == nullptr || keywordLen < 0) {
                 status = U_ILLEGAL_ARGUMENT_ERROR;
             } else {
-                keywords.append(keys, keywordLen, status);
-                current = keywords.data() + currentIndex;
+                keywords = {keys, static_cast<std::string_view::size_type>(length)};
+                if (keywords.isEmpty()) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                } else {
+                    current = keywords.data() + currentIndex;
+                }
             }
         }
     }
@@ -2455,7 +2564,7 @@ public:
     {
         UErrorCode status = U_ZERO_ERROR;
         return new KeywordEnumeration(
-                keywords.data(), keywords.length(),
+                keywords.data(), length,
                 static_cast<int32_t>(current - keywords.data()), status);
     }
 
@@ -2556,8 +2665,8 @@ Locale::createKeywords(UErrorCode &status) const
         return result;
     }
 
-    const char* variantStart = uprv_strchr(fullName, '@');
-    const char* assignment = uprv_strchr(fullName, '=');
+    const char* variantStart = uprv_strchr(getName(), '@');
+    const char* assignment = uprv_strchr(getName(), '=');
     if(variantStart) {
         if(assignment > variantStart) {
             CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
@@ -2583,8 +2692,8 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
         return result;
     }
 
-    const char* variantStart = uprv_strchr(fullName, '@');
-    const char* assignment = uprv_strchr(fullName, '=');
+    const char* variantStart = uprv_strchr(getName(), '@');
+    const char* assignment = uprv_strchr(getName(), '=');
     if(variantStart) {
         if(assignment > variantStart) {
             CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
@@ -2604,7 +2713,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
 int32_t
 Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
 {
-    return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
+    return uloc_getKeywordValue(getName(), keywordName, buffer, bufLen, &status);
 }
 
 void
@@ -2613,12 +2722,12 @@ Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& sta
         return;
     }
 
-    if (fIsBogus) {
+    if (isBogus()) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 
-    ulocimp_getKeywordValue(fullName, keywordName, sink, status);
+    ulocimp_getKeywordValue(getName(), keywordName, sink, status);
 }
 
 void
@@ -2664,51 +2773,77 @@ Locale::setKeywordValue(StringPiece keywordName,
         status = U_ZERO_ERROR;
     }
 
-    int32_t length = static_cast<int32_t>(uprv_strlen(fullName));
-    int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1;
-
-    const char* start = locale_getKeywordsStart(fullName);
-    int32_t offset = start == nullptr ? length : start - fullName;
-
-    for (;;) {
-        // Remove -1 from the capacity so that this function can guarantee NUL termination.
-        CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
-
-        int32_t reslen = ulocimp_setKeywordValue(
-            {fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
-            keywordName,
-            keywordValue,
-            sink,
-            status);
+    CharString localeID(getName(), -1, status);
+    ulocimp_setKeywordValue(keywordName, keywordValue, localeID, status);
+    if (U_FAILURE(status)) {
+        if (status == U_MEMORY_ALLOCATION_ERROR) {
+            setToBogus();
+        }
+        return;
+    }
 
-        if (status == U_BUFFER_OVERFLOW_ERROR) {
-            capacity = reslen + offset + 1;
-            char* newFullName = static_cast<char*>(uprv_malloc(capacity));
-            if (newFullName == nullptr) {
+    const char* at = locale_getKeywordsStart(localeID.toStringPiece());
+    bool hasKeywords = at != nullptr && uprv_strchr(at + 1, '=') != nullptr;
+
+    Nest* nest = payload.get<Nest>();
+    if (!hasKeywords) {
+        if (nest == nullptr) {
+            // There are no longer any keywords left, so it might now be
+            // possible to move the payload from Heap to Nest.
+            Heap* heap = payload.get<Heap>();
+            U_ASSERT(heap != nullptr);
+            if (Nest::fits(localeID.length(), heap->language, heap->script, heap->region)) {
+                int32_t variantBegin = heap->ptr->variantBegin;
+                U_ASSERT(variantBegin >= 0);
+                U_ASSERT(static_cast<size_t>(variantBegin) < sizeof Nest::baseName);
+                nest = &payload.emplace<Nest>(std::move(*heap), static_cast<uint8_t>(variantBegin));
+                localeID.extract(nest->baseName, sizeof Nest::baseName, status);
+            } else {
+                heap->ptr->baseName.clear();
+                heap->ptr->fullName = localeID.toStringPiece();
+                if (heap->ptr->fullName.isEmpty()) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    setToBogus();
+                    return;
+                }
+            }
+        }
+    } else {
+        Heap* heap = nullptr;
+        if (nest != nullptr) {
+            // A keyword has been added, so the payload now needs to be moved
+            // from Nest to Heap so that it can get a baseName.
+            Nest copy(*nest);
+            heap = &payload.emplace<Heap>(copy.language,
+                                          copy.script,
+                                          copy.region,
+                                          copy.variantBegin);
+            if (isBogus()) {
                 status = U_MEMORY_ALLOCATION_ERROR;
                 return;
             }
-            uprv_memcpy(newFullName, fullName, length + 1);
-            if (fullName != fullNameBuffer) {
-                if (baseName == fullName) {
-                    baseName = newFullName; // baseName should not point to freed memory.
+        } else {
+            heap = payload.get<Heap>();
+        }
+        U_ASSERT(heap != nullptr);
+        heap->ptr->fullName = localeID.toStringPiece();
+        if (heap->ptr->fullName.isEmpty()) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            setToBogus();
+            return;
+        }
+
+        if (heap->ptr->baseName.isEmpty()) {
+            // Has added the first keyword, meaning that the fullName is no longer also the baseName.
+            if (std::string_view::size_type baseNameLength = at - localeID.data(); baseNameLength > 0) {
+                heap->ptr->baseName = {heap->ptr->fullName.data(), baseNameLength};
+                if (heap->ptr->baseName.isEmpty()) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    setToBogus();
+                    return;
                 }
-                // if fullName is already on the heap, need to free it.
-                uprv_free(fullName);
             }
-            fullName = newFullName;
-            status = U_ZERO_ERROR;
-            continue;
         }
-
-        if (U_FAILURE(status)) { return; }
-        u_terminateChars(fullName, capacity, reslen + offset, &status);
-        break;
-    }
-
-    if (baseName == fullName) {
-        // May have added the first keyword, meaning that the fullName is no longer also the baseName.
-        initBaseName(status);
     }
 }
 
@@ -2741,9 +2876,42 @@ Locale::setUnicodeKeywordValue(StringPiece keywordName,
     setKeywordValue(*legacy_key, value, status);
 }
 
-const char *
+const char*
+Locale::getCountry() const {
+    return getField<&Nest::getRegion, &Heap::getRegion>();
+}
+
+const char*
+Locale::getLanguage() const {
+    return getField<&Nest::getLanguage, &Heap::getLanguage>();
+}
+
+const char*
+Locale::getScript() const {
+    return getField<&Nest::getScript, &Heap::getScript>();
+}
+
+const char*
+Locale::getVariant() const {
+    return getField<&Nest::getVariant, &Heap::getVariant>();
+}
+
+const char*
+Locale::getName() const {
+    return getField<&Nest::getBaseName, &Heap::getFullName>();
+}
+
+const char*
 Locale::getBaseName() const {
-    return baseName;
+    return getField<&Nest::getBaseName, &Heap::getBaseName>();
+}
+
+template <const char* (Locale::Nest::*const NEST)() const,
+          const char* (Locale::Heap::*const HEAP)() const>
+const char* Locale::getField() const {
+    return payload.visit([] { return ""; },
+                         [](const Nest& nest) { return (nest.*NEST)(); },
+                         [](const Heap& heap) { return (heap.*HEAP)(); });
 }
 
 Locale::Iterator::~Iterator() = default;

+ 1 - 1
thirdparty/icu4c/common/loclikely.cpp

@@ -495,7 +495,7 @@ bool RegionValidateMap::equals(const RegionValidateMap& that) const {
 // The code transform two letter a-z to a integer valued between -1, 26x26.
 // -1 indicate the region is outside the range of two letter a-z
 // the rest of value is between 0 and 676 (= 26x26) and used as an index
-// the the bigmap in map. The map is an array of 22 int32_t.
+// the bigmap in map. The map is an array of 22 int32_t.
 // since 32x21 < 676/32 < 32x22 we store this 676 bits bitmap into 22 int32_t.
 int32_t RegionValidateMap::value(const char* region) const {
     if (uprv_isASCIILetter(region[0]) && uprv_isASCIILetter(region[1]) &&

+ 27 - 6
thirdparty/icu4c/common/loclikelysubtags.cpp

@@ -715,13 +715,29 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
             } else {
                 iter.resetToState64(state);
                 value = trieNext(iter, "", 0);
-                U_ASSERT(value > 0);
+                U_ASSERT(value != 0);
+                // For the case of und_Latn
+                if (value < 0) {
+                    retainLanguage = !language.empty();
+                    retainScript = !script.empty();
+                    retainRegion = !region.empty();
+                    // Fallback to und_$region =>
+                    iter.resetToState64(trieUndState);  // "und" ("*")
+                    value = trieNext(iter, "", 0);
+                    U_ASSERT(value == 0);
+                    int64_t trieUndEmptyState = iter.getState64();
+                    value = trieNext(iter, region, 0);
+                    // Fallback to und =>
+                    if (value < 0) {
+                        iter.resetToState64(trieUndEmptyState);
+                        value = trieNext(iter, "", 0);
+                        U_ASSERT(value > 0);
+                    }
+                }
             }
         }
     }
     U_ASSERT(value < lsrsLength);
-    const LSR &matched = lsrs[value];
-
     if (returnInputIfUnmatch &&
         (!(matchLanguage || matchScript || (matchRegion && language.empty())))) {
       return LSR("", "", "", LSR::EXPLICIT_LSR, errorCode);  // no matching.
@@ -731,18 +747,23 @@ LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiec
     }
 
     if (!(retainLanguage || retainScript || retainRegion)) {
+        U_ASSERT(value >= 0);
         // Quickly return a copy of the lookup-result LSR
         // without new allocation of the subtags.
+        const LSR &matched = lsrs[value];
         return LSR(matched.language, matched.script, matched.region, matched.flags);
     }
     if (!retainLanguage) {
-        language = matched.language;
+        U_ASSERT(value >= 0);
+        language = lsrs[value].language;
     }
     if (!retainScript) {
-        script = matched.script;
+        U_ASSERT(value >= 0);
+        script = lsrs[value].script;
     }
     if (!retainRegion) {
-        region = matched.region;
+        U_ASSERT(value >= 0);
+        region = lsrs[value].region;
     }
     int32_t retainMask = (retainLanguage ? 4 : 0) + (retainScript ? 2 : 0) + (retainRegion ? 1 : 0);
     // retainOldMask flags = LSR explicit-subtag flags

+ 9 - 1
thirdparty/icu4c/common/lstmbe.cpp

@@ -809,7 +809,15 @@ U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(UScriptCode script, UEr
 
 U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(UResourceBundle* rb, UErrorCode& status)
 {
-    return new LSTMData(rb, status);
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+    const LSTMData* result = new LSTMData(rb, status);
+    if (U_FAILURE(status)) {
+        delete result;
+        return nullptr;
+    }
+    return result;
 }
 
 U_CAPI const LanguageBreakEngine* U_EXPORT2

+ 45 - 42
thirdparty/icu4c/common/norm2_nfc_data.h

@@ -10,14 +10,14 @@
 #ifdef INCLUDED_FROM_NORMALIZER2_CPP
 
 static const UVersionInfo norm2_nfc_data_formatVersion={5,0,0,0};
-static const UVersionInfo norm2_nfc_data_dataVersion={0x10,0,0,0};
+static const UVersionInfo norm2_nfc_data_dataVersion={0x11,0,0,0};
 
 static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={
-0x58,0x4e84,0x8c60,0x8d60,0x8d60,0x8d60,0x8d60,0x8d60,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26,
+0x58,0x4eec,0x8cc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0x8dc8,0xc0,0x300,0xb0c,0x2a6a,0x3cf0,0xfbc4,0x12c2,0x3c26,
 0x3cbe,0x3cf0,0x300,0,0xfb10,0xfb9e
 };
 
-static const uint16_t norm2_nfc_data_trieIndex[1869]={
+static const uint16_t norm2_nfc_data_trieIndex[1888]={
 0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353,
 0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3,
 0x631,0x65f,0x688,0x6be,0x6fe,0x73b,0x75b,0x79a,0x7d9,0x816,0x835,0x872,0x75b,0x8ab,0x8d9,0x918,
@@ -82,7 +82,7 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x1881,0x18c1,0x1901,0x1941,0x1981,0x19c1,0x1a01,0x1a41,0x1a64,0x1aa4,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x6cf,0x6df,0x6f7,0x716,0x72b,0x72b,0x72b,0x72f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x6e2,0x6f2,0x70a,0x729,0x73e,0x73e,0x73e,0x742,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc0c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c,
@@ -91,53 +91,54 @@ static const uint16_t norm2_nfc_data_trieIndex[1869]={
 0x1b1a,0x226,0x226,0x1b2a,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xdf8,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x1b3a,0x226,0x226,0x226,0x1b42,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x1608,0x226,0x226,0x226,0x226,0x66b,0x226,0x226,0x226,0x226,0x1b50,0x54f,0x226,0x226,0x1b60,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b70,0x226,0x1b80,0x1b8d,0x1b99,0x226,0x226,
-0x226,0x226,0x414,0x226,0x1ba4,0x1bb4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bc4,0x226,
-0x226,0x226,0x1bcf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bd6,0x226,0x226,
-0x226,0x226,0x1be1,0x1bf0,0x928,0x1bfe,0x412,0x1c0c,0x1c1c,0x226,0x1c24,0x1c32,0x87f,0x226,0x226,0x226,
-0x226,0x1c42,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c52,0x1c61,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c69,0x1c79,0x226,0x226,0x226,0x9ec,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x1c83,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c80,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c93,0x812,0x226,0x226,0x226,0x226,
+0x1608,0x226,0x226,0x226,0x226,0x1b50,0x226,0x226,0x226,0x226,0x1b60,0x54f,0x226,0x226,0x1b70,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x81d,0x226,0x226,0x1b80,0x226,0x1b90,0x1b9d,0x1ba9,0x226,0x226,
+0x226,0x226,0x414,0x226,0x1bb4,0x1bc4,0x226,0x226,0x226,0x812,0x226,0x226,0x226,0x226,0x1bd4,0x226,
+0x226,0x226,0x1bdf,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1be6,0x226,0x226,
+0x226,0x226,0x1bf1,0x1c00,0x928,0x1c0e,0x412,0x1c1c,0x1c2c,0x226,0x1c34,0x1c42,0x87f,0x226,0x226,0x226,
+0x226,0x1c52,0x7ca,0x226,0x226,0x226,0x226,0x226,0x1c62,0x1c71,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x908,0x1c79,0x1c89,0x226,0x226,0x226,0x9ec,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x1c93,0x226,0x226,0x226,0x226,0x226,0x226,0x818,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c90,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca3,0x812,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x87f,0x226,0x226,0x226,0x81f,0x81c,0x226,0x226,0x226,0x226,0x81a,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x9ec,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xc06,0x226,0x226,0x226,0x226,0x81c,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0xc09,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x1ca2,0x1cb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x1cc1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cce,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x1cb2,0x1cc1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x1cd1,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cde,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ce0,0x226,0x226,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x1cef,0x1cff,0x1d0d,0x1d1a,0x226,0x1d26,0x1d34,0x1d44,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cee,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1cf0,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x1cff,0x1d0f,0x1d1d,0x1d2a,0x226,0x1d36,0x1d44,0x1d54,0x226,0x226,0x226,0x226,0xd1c,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d54,0x1d5c,0x1d6a,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d64,0x1d6c,0x1d7a,0x226,0x226,0x226,0x226,0x226,
 0x4f9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xf2d,0x226,0x226,0x226,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ca,0x226,0x226,0x226,0x4fc,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d75,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226,
 0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x5c1,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d85,0x226,0x226,0x226,
-0x226,0x226,0x226,0x1d91,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da1,
-0x1db1,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91,0x1ea1,
-0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91,0x1fa1,
-0x1fb1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
-0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408,
-0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536,0x553,0x56d,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x582,
-0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1d95,0x7d3,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1da5,
+0x226,0x226,0x226,0x226,0x226,0x226,0x1db1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x1dc1,0x1dd1,0x1de1,0x1df1,0x1e01,0x1e11,0x1e21,0x1e31,0x1e41,0x1e51,0x1e61,0x1e71,0x1e81,0x1e91,
+0x1ea1,0x1eb1,0x1ec1,0x1ed1,0x1ee1,0x1ef1,0x1f01,0x1f11,0x1f21,0x1f31,0x1f41,0x1f51,0x1f61,0x1f71,0x1f81,0x1f91,
+0x1fa1,0x1fb1,0x1fc1,0x1fd1,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,
+0x226,0x226,0x408,0x428,0x440,0xc4,0xc4,0x460,0x46f,0x486,0x4a2,0x4bf,0x4dd,0x4fa,0x517,0x536,
+0x553,0x56d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0x582,0xc4,0xc4,0xc4,0xc4,0x595,0x5a9,0x5c0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5eb,0x608,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0xc4,0x66f,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5e0,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
+0xc4,0x5eb,0x608,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x628,0x63e,0x650,0x66f,0x682,0xc4,0xc4,
 0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,
-0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x68f,0x6af
+0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x6a2,0x6c2
 };
 
-static const uint16_t norm2_nfc_data_trieData[8129]={
+static const uint16_t norm2_nfc_data_trieData[8162]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -371,9 +372,9 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,
 0xffb8,0xffcc,0xffcc,0xffb8,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,
-0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,
+0xffd4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,0x8c4,0x1a65,0x8c8,0x1a6b,0x8cc,0x1a71,0x8d0,0x1a77,0x8d4,0x1a7d,1,
 1,0x8d8,0x1a83,1,1,1,1,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -575,6 +576,7 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
 1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,
 1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,
 0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,
+1,1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,1,0xffb8,0xffb8,0xffb8,
 1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,
 1,1,0xffcc,0xffb8,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,
 0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,
@@ -610,7 +612,8 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
 1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,
 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,
 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,
-1,0xffd0,0xffd0,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,
+1,0xffd0,0xffd0,0xffb8,0xffcc,1,1,1,0xffcc,1,1,0xffcc,1,1,1,1,
+1,1,1,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,
 1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1,
 1,0x33e5,0x33e9,0x33ed,0x33f1,0x33f7,0x2fd7,0x33fb,0x33ff,0x3403,0x3407,0x2fdb,0x340b,0x340f,0x3413,0x2fdf,
 0x3419,0x341d,0x3421,0x3425,0x342b,0x342f,0x3433,0x3437,0x343d,0x3441,0x3445,0x3449,0x30c9,0x344d,0x3453,0x3457,
@@ -646,13 +649,13 @@ static const uint16_t norm2_nfc_data_trieData[8129]={
 0x3b5f,0x3b63,0x3b67,0x3b6d,0x3b71,0x3b75,0x3b79,0x3b7d,0x3b83,0x3b89,0x3b8d,0x3b91,0x3b95,0x3b9b,0x3b9f,0x31d1,
 0x31d1,0x3ba5,0x3ba9,0x3baf,0x3bb3,0x3bb7,0x3bbb,0x3bbf,0x3bc3,0x3bc7,0x3bcb,0x31d5,0x3bd1,0x3bd5,0x3bd9,0x3bdd,
 0x3be1,0x3be5,0x3beb,0x3bef,0x3bf5,0x3bfb,0x3c01,0x3c05,0x3c09,0x3c0d,0x3c11,0x3c15,0x3c19,0x3c1d,0x3c21,1,
-1
+1,1
 };
 
 static const UCPTrie norm2_nfc_data_trie={
     norm2_nfc_data_trieIndex,
     { norm2_nfc_data_trieData },
-    1869, 8129,
+    1888, 8162,
     0x2fc00, 0x30,
     0, 0,
     0, 0,
@@ -1160,7 +1163,7 @@ static const uint16_t norm2_nfc_data_extraData[7918]={
 
 static const uint8_t norm2_nfc_data_smallFCD[256]={
 0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xd7,0xe6,0x66,0x46,0x66,0x46,0x66,0x5b,
-0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff,
+0x12,0,0,4,0,0,0,0x43,0x20,2,0xe9,0xae,0xc2,0xc0,0xff,0xff,
 0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0,
 0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,

+ 118 - 86
thirdparty/icu4c/common/normalizer2impl.h

@@ -243,32 +243,36 @@ private:
  * this normalizer2impl.h and in the design doc at
  * https://unicode-org.github.io/icu/design/normalization/custom.html
  */
-class U_COMMON_API Normalizer2Impl : public UObject {
+class U_COMMON_API_CLASS Normalizer2Impl : public UObject {
 public:
-    Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
-    virtual ~Normalizer2Impl();
+    U_COMMON_API Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
+    U_COMMON_API virtual ~Normalizer2Impl();
 
-    void init(const int32_t *inIndexes, const UCPTrie *inTrie,
-              const uint16_t *inExtraData, const uint8_t *inSmallFCD);
+    U_COMMON_API void init(const int32_t* inIndexes,
+                           const UCPTrie* inTrie,
+                           const uint16_t* inExtraData,
+                           const uint8_t* inSmallFCD);
 
-    void addLcccChars(UnicodeSet &set) const;
-    void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
-    void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
+    U_COMMON_API void addLcccChars(UnicodeSet& set) const;
+    U_COMMON_API void addPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const;
+    U_COMMON_API void addCanonIterPropertyStarts(const USetAdder* sa, UErrorCode& errorCode) const;
 
     // low-level properties ------------------------------------------------ ***
 
-    UBool ensureCanonIterData(UErrorCode &errorCode) const;
+    U_COMMON_API UBool ensureCanonIterData(UErrorCode& errorCode) const;
 
     // The trie stores values for lead surrogate code *units*.
     // Surrogate code *points* are inert.
-    uint16_t getNorm16(UChar32 c) const {
+    U_COMMON_API uint16_t getNorm16(UChar32 c) const {
         return U_IS_LEAD(c) ?
             static_cast<uint16_t>(INERT) :
             UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
     }
-    uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); }
+    U_COMMON_API uint16_t getRawNorm16(UChar32 c) const {
+        return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c);
+    }
 
-    UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
+    U_COMMON_API UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
         if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
             return UNORM_YES;
         } else if(minMaybeNo<=norm16) {
@@ -277,11 +281,17 @@ public:
             return UNORM_NO;
         }
     }
-    UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeNo; }
-    UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeNo; }
-    UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; }
+    U_COMMON_API UBool isAlgorithmicNoNo(uint16_t norm16) const {
+        return limitNoNo <= norm16 && norm16 < minMaybeNo;
+    }
+    U_COMMON_API UBool isCompNo(uint16_t norm16) const {
+        return minNoNo <= norm16 && norm16 < minMaybeNo;
+    }
+    U_COMMON_API UBool isDecompYes(uint16_t norm16) const {
+        return norm16 < minYesNo || minMaybeYes <= norm16;
+    }
 
-    uint8_t getCC(uint16_t norm16) const {
+    U_COMMON_API uint8_t getCC(uint16_t norm16) const {
         if(norm16>=MIN_NORMAL_MAYBE_YES) {
             return getCCFromNormalYesOrMaybe(norm16);
         }
@@ -290,13 +300,13 @@ public:
         }
         return getCCFromNoNo(norm16);
     }
-    static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
+    U_COMMON_API static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) {
         return static_cast<uint8_t>(norm16 >> OFFSET_SHIFT);
     }
-    static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) {
+    U_COMMON_API static uint8_t getCCFromYesOrMaybeYes(uint16_t norm16) {
         return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0;
     }
-    uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const {
+    U_COMMON_API uint8_t getCCFromYesOrMaybeYesCP(UChar32 c) const {
         if (c < minCompNoMaybeCP) { return 0; }
         return getCCFromYesOrMaybeYes(getNorm16(c));
     }
@@ -306,7 +316,7 @@ public:
      * @param c A Unicode code point.
      * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
      */
-    uint16_t getFCD16(UChar32 c) const {
+    U_COMMON_API uint16_t getFCD16(UChar32 c) const {
         if(c<minDecompNoCP) {
             return 0;
         } else if(c<=0xffff) {
@@ -322,7 +332,7 @@ public:
      * @param limit The end of the string, or NULL.
      * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
      */
-    uint16_t nextFCD16(const char16_t *&s, const char16_t *limit) const {
+    U_COMMON_API uint16_t nextFCD16(const char16_t*& s, const char16_t* limit) const {
         UChar32 c=*s++;
         if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) {
             return 0;
@@ -340,7 +350,7 @@ public:
      * @param s A valid pointer into a string. Requires start<s.
      * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0.
      */
-    uint16_t previousFCD16(const char16_t *start, const char16_t *&s) const {
+    U_COMMON_API uint16_t previousFCD16(const char16_t* start, const char16_t*& s) const {
         UChar32 c=*--s;
         if(c<minDecompNoCP) {
             return 0;
@@ -360,16 +370,16 @@ public:
     }
 
     /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */
-    UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
+    U_COMMON_API UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const {
         // 0<=lead<=0xffff
         uint8_t bits=smallFCD[lead>>8];
         if(bits==0) { return false; }
         return (bits >> ((lead >> 5) & 7)) & 1;
     }
     /** Returns the FCD value from the regular normalization data. */
-    uint16_t getFCD16FromNormData(UChar32 c) const;
+    U_COMMON_API uint16_t getFCD16FromNormData(UChar32 c) const;
 
-    uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const;
+    U_COMMON_API uint16_t getFCD16FromMaybeOrNonZeroCC(uint16_t norm16) const;
 
     /**
      * Gets the decomposition for one code point.
@@ -378,7 +388,7 @@ public:
      * @param length out-only, takes the length of the decomposition, if any
      * @return pointer to the decomposition, or NULL if none
      */
-    const char16_t *getDecomposition(UChar32 c, char16_t buffer[4], int32_t &length) const;
+    U_COMMON_API const char16_t* getDecomposition(UChar32 c, char16_t buffer[4], int32_t& length) const;
 
     /**
      * Gets the raw decomposition for one code point.
@@ -387,12 +397,14 @@ public:
      * @param length out-only, takes the length of the decomposition, if any
      * @return pointer to the decomposition, or NULL if none
      */
-    const char16_t *getRawDecomposition(UChar32 c, char16_t buffer[30], int32_t &length) const;
+    U_COMMON_API const char16_t* getRawDecomposition(UChar32 c,
+                                                     char16_t buffer[30],
+                                                     int32_t& length) const;
 
-    UChar32 composePair(UChar32 a, UChar32 b) const;
+    U_COMMON_API UChar32 composePair(UChar32 a, UChar32 b) const;
 
-    UBool isCanonSegmentStarter(UChar32 c) const;
-    UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const;
+    U_COMMON_API UBool isCanonSegmentStarter(UChar32 c) const;
+    U_COMMON_API UBool getCanonStartSet(UChar32 c, UnicodeSet& set) const;
 
     enum {
         // Fixed norm16 values.
@@ -481,71 +493,90 @@ public:
     // higher-level functionality ------------------------------------------ ***
 
     // NFD without an NFD Normalizer2 instance.
-    UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
-                             UErrorCode &errorCode) const;
+    U_COMMON_API UnicodeString& decompose(const UnicodeString& src,
+                                          UnicodeString& dest,
+                                          UErrorCode& errorCode) const;
     /**
      * Decomposes [src, limit[ and writes the result to dest.
      * limit can be NULL if src is NUL-terminated.
      * destLengthEstimate is the initial dest buffer capacity and can be -1.
      */
-    void decompose(const char16_t *src, const char16_t *limit,
-                   UnicodeString &dest, int32_t destLengthEstimate,
-                   UErrorCode &errorCode) const;
-
-    const char16_t *decompose(const char16_t *src, const char16_t *limit,
-                           ReorderingBuffer *buffer, UErrorCode &errorCode) const;
-    void decomposeAndAppend(const char16_t *src, const char16_t *limit,
-                            UBool doDecompose,
-                            UnicodeString &safeMiddle,
-                            ReorderingBuffer &buffer,
-                            UErrorCode &errorCode) const;
+    U_COMMON_API void decompose(const char16_t* src,
+                                const char16_t* limit,
+                                UnicodeString& dest,
+                                int32_t destLengthEstimate,
+                                UErrorCode& errorCode) const;
+
+    U_COMMON_API const char16_t* decompose(const char16_t* src,
+                                           const char16_t* limit,
+                                           ReorderingBuffer* buffer,
+                                           UErrorCode& errorCode) const;
+    U_COMMON_API void decomposeAndAppend(const char16_t* src,
+                                         const char16_t* limit,
+                                         UBool doDecompose,
+                                         UnicodeString& safeMiddle,
+                                         ReorderingBuffer& buffer,
+                                         UErrorCode& errorCode) const;
 
     /** sink==nullptr: isNormalized()/spanQuickCheckYes() */
-    const uint8_t *decomposeUTF8(uint32_t options,
-                                 const uint8_t *src, const uint8_t *limit,
-                                 ByteSink *sink, Edits *edits, UErrorCode &errorCode) const;
-
-    UBool compose(const char16_t *src, const char16_t *limit,
-                  UBool onlyContiguous,
-                  UBool doCompose,
-                  ReorderingBuffer &buffer,
-                  UErrorCode &errorCode) const;
-    const char16_t *composeQuickCheck(const char16_t *src, const char16_t *limit,
-                                   UBool onlyContiguous,
-                                   UNormalizationCheckResult *pQCResult) const;
-    void composeAndAppend(const char16_t *src, const char16_t *limit,
-                          UBool doCompose,
-                          UBool onlyContiguous,
-                          UnicodeString &safeMiddle,
-                          ReorderingBuffer &buffer,
-                          UErrorCode &errorCode) const;
+    U_COMMON_API const uint8_t* decomposeUTF8(uint32_t options,
+                                              const uint8_t* src,
+                                              const uint8_t* limit,
+                                              ByteSink* sink,
+                                              Edits* edits,
+                                              UErrorCode& errorCode) const;
+
+    U_COMMON_API UBool compose(const char16_t* src,
+                               const char16_t* limit,
+                               UBool onlyContiguous,
+                               UBool doCompose,
+                               ReorderingBuffer& buffer,
+                               UErrorCode& errorCode) const;
+    U_COMMON_API const char16_t* composeQuickCheck(const char16_t* src,
+                                                   const char16_t* limit,
+                                                   UBool onlyContiguous,
+                                                   UNormalizationCheckResult* pQCResult) const;
+    U_COMMON_API void composeAndAppend(const char16_t* src,
+                                       const char16_t* limit,
+                                       UBool doCompose,
+                                       UBool onlyContiguous,
+                                       UnicodeString& safeMiddle,
+                                       ReorderingBuffer& buffer,
+                                       UErrorCode& errorCode) const;
 
     /** sink==nullptr: isNormalized() */
-    UBool composeUTF8(uint32_t options, UBool onlyContiguous,
-                      const uint8_t *src, const uint8_t *limit,
-                      ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const;
-
-    const char16_t *makeFCD(const char16_t *src, const char16_t *limit,
-                         ReorderingBuffer *buffer, UErrorCode &errorCode) const;
-    void makeFCDAndAppend(const char16_t *src, const char16_t *limit,
-                          UBool doMakeFCD,
-                          UnicodeString &safeMiddle,
-                          ReorderingBuffer &buffer,
-                          UErrorCode &errorCode) const;
-
-    UBool hasDecompBoundaryBefore(UChar32 c) const;
-    UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
-    UBool hasDecompBoundaryAfter(UChar32 c) const;
-    UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
-    UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
-
-    UBool hasCompBoundaryBefore(UChar32 c) const {
+    U_COMMON_API UBool composeUTF8(uint32_t options,
+                                   UBool onlyContiguous,
+                                   const uint8_t* src,
+                                   const uint8_t* limit,
+                                   ByteSink* sink,
+                                   icu::Edits* edits,
+                                   UErrorCode& errorCode) const;
+
+    U_COMMON_API const char16_t* makeFCD(const char16_t* src,
+                                         const char16_t* limit,
+                                         ReorderingBuffer* buffer,
+                                         UErrorCode& errorCode) const;
+    U_COMMON_API void makeFCDAndAppend(const char16_t* src,
+                                       const char16_t* limit,
+                                       UBool doMakeFCD,
+                                       UnicodeString& safeMiddle,
+                                       ReorderingBuffer& buffer,
+                                       UErrorCode& errorCode) const;
+
+    U_COMMON_API UBool hasDecompBoundaryBefore(UChar32 c) const;
+    U_COMMON_API UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const;
+    U_COMMON_API UBool hasDecompBoundaryAfter(UChar32 c) const;
+    U_COMMON_API UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const;
+    U_COMMON_API UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); }
+
+    U_COMMON_API UBool hasCompBoundaryBefore(UChar32 c) const {
         return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c));
     }
-    UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
+    U_COMMON_API UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const {
         return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous);
     }
-    UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
+    U_COMMON_API UBool isCompInert(UChar32 c, UBool onlyContiguous) const {
         uint16_t norm16=getNorm16(c);
         return isCompYesAndZeroCC(norm16) &&
             (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 &&
@@ -553,10 +584,11 @@ public:
             // The last check fetches the mapping's first unit and checks tccc<=1.
     }
 
-    UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
-    UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
-    UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
-private:
+    U_COMMON_API UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); }
+    U_COMMON_API UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); }
+    U_COMMON_API UBool isFCDInert(UChar32 c) const { return getFCD16(c) <= 1; }
+
+  private:
     friend class InitCanonIterData;
     friend class LcccContext;
 

File diff suppressed because it is too large
+ 499 - 490
thirdparty/icu4c/common/propname_data.h


+ 1 - 1
thirdparty/icu4c/common/rbbidata.h

@@ -135,7 +135,7 @@ struct RBBIStateTable {
     uint32_t         fNumStates;            // Number of states.
     uint32_t         fRowLen;               // Length of a state table row, in bytes.
     uint32_t         fDictCategoriesStart;  // Char category number of the first dictionary
-                                            //   char class, or the the largest category number + 1
+                                            //   char class, or the largest category number + 1
                                             //   if there are no dictionary categories.
     uint32_t         fLookAheadResultsSize; // Size of run-time array required for holding
                                             //   look-ahead results. Indexed by row.fLookAhead.

+ 1 - 1
thirdparty/icu4c/common/rbbiscan.cpp

@@ -1020,7 +1020,7 @@ void RBBIRuleScanner::parse() {
     // Main loop for the rule parsing state machine.
     //   Runs once per state transition.
     //   Each time through optionally performs, depending on the state table,
-    //      - an advance to the the next input char
+    //      - an advance to the next input char
     //      - an action to be performed.
     //      - pushing or popping a state to/from the local state return stack.
     //

+ 4 - 3
thirdparty/icu4c/common/rbbisetb.cpp

@@ -328,9 +328,10 @@ int32_t RBBISetBuilder::getTrieSize()  {
             UCPTRIE_TYPE_FAST,
             use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16,
             fStatus);
-        fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus);
-        if (*fStatus == U_BUFFER_OVERFLOW_ERROR) {
-            *fStatus = U_ZERO_ERROR;
+        UErrorCode bufferStatus = *fStatus;
+        fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, &bufferStatus);
+        if (bufferStatus != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(bufferStatus)) {
+            *fStatus = bufferStatus;
         }
     }
     return fTrieSize;

+ 1 - 1
thirdparty/icu4c/common/rbbisetb.h

@@ -46,7 +46,7 @@ public:
     int32_t            fNum {0};                 // runtime-mapped input value for this range.
     bool               fIncludesDict {false};    // True if the range includes $dictionary.
     bool               fFirstInGroup {false};    // True if first range in a group with the same fNum.
-    UVector           *fIncludesSets {nullptr};  // vector of the the original
+    UVector           *fIncludesSets {nullptr};  // vector of the original
                                                  //   Unicode sets that include this range.
                                                  //    (Contains ptrs to uset nodes)
     RangeDescriptor   *fNext {nullptr};          // Next RangeDescriptor in the linked list.

+ 1 - 1
thirdparty/icu4c/common/rbbitblb.cpp

@@ -1442,7 +1442,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
     // 1. Identify pairs of character classes that are "safe." Safe means that boundaries
     // following the pair do not depend on context or state before the pair. To test
     // whether a pair is safe, run it through the main forward state table, starting
-    // from each state. If the the final state is the same, no matter what the starting state,
+    // from each state. If the final state is the same, no matter what the starting state,
     // the pair is safe.
     //
     // 2. Build a state table that recognizes the safe pairs. It's similar to their

+ 16 - 16
thirdparty/icu4c/common/servloc.h

@@ -422,7 +422,7 @@ public:
  ******************************************************************
  */
 
-class U_COMMON_API ICULocaleService : public ICUService 
+class U_COMMON_API_CLASS ICULocaleService : public ICUService
 {
  private:
   Locale fallbackLocale;
@@ -432,17 +432,17 @@ class U_COMMON_API ICULocaleService : public ICUService
   /**
    * Construct an ICULocaleService.
    */
-  ICULocaleService();
+  U_COMMON_API ICULocaleService();
 
   /**
    * Construct an ICULocaleService with a name (useful for debugging).
    */
-  ICULocaleService(const UnicodeString& name);
+  U_COMMON_API ICULocaleService(const UnicodeString& name);
 
   /**
    * Destructor.
    */
-  virtual ~ICULocaleService();
+  U_COMMON_API virtual ~ICULocaleService();
 
 #if 0
   // redeclare because of overload resolution rules?
@@ -462,19 +462,19 @@ class U_COMMON_API ICULocaleService : public ICUService
    * get(Locale, int, Locale[]) with KIND_ANY for kind and null for
    * actualReturn.
    */
-  UObject* get(const Locale& locale, UErrorCode& status) const;
+  U_COMMON_API UObject* get(const Locale& locale, UErrorCode& status) const;
 
   /**
    * Convenience override for callers using locales.  This calls
    * get(Locale, int, Locale[]) with a null actualReturn.
    */
-  UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
+  U_COMMON_API UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const;
 
   /**
    * Convenience override for callers using locales. This calls
    * get(Locale, String, Locale[]) with a null kind.
    */
-  UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
+  U_COMMON_API UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const;
                    
   /**
    * Convenience override for callers using locales.  This uses
@@ -482,27 +482,27 @@ class U_COMMON_API ICULocaleService : public ICUService
    * if actualReturn is not null, returns the actualResult from
    * getKey (stripping any prefix) into a Locale.  
    */
-  UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
+  U_COMMON_API UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const;
 
   /**
    * Convenience override for callers using locales.  This calls
    * registerObject(Object, Locale, int32_t kind, int coverage)
    * passing KIND_ANY for the kind, and VISIBLE for the coverage.
    */
-  virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
+  U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status);
 
   /**
    * Convenience function for callers using locales.  This calls
    * registerObject(Object, Locale, int kind, int coverage)
    * passing VISIBLE for the coverage.
    */
-  virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
+  U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status);
 
   /**
    * Convenience function for callers using locales.  This  instantiates
    * a SimpleLocaleKeyFactory, and registers the factory.
    */
-  virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
+  U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status);
 
 
   /**
@@ -512,13 +512,13 @@ class U_COMMON_API ICULocaleService : public ICUService
    * We really need a flag that is understood by all compilers that will suppress the warning about
    * hidden overrides.
    */
-  virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) override;
+  U_COMMON_API virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) override;
 
   /**
    * Convenience method for callers using locales.  This returns the standard
    * service ID enumeration.
    */
-  virtual StringEnumeration* getAvailableLocales() const;
+  U_COMMON_API virtual StringEnumeration* getAvailableLocales() const;
 
  protected:
 
@@ -526,17 +526,17 @@ class U_COMMON_API ICULocaleService : public ICUService
    * Return the name of the current fallback locale.  If it has changed since this was
    * last accessed, the service cache is cleared.
    */
-  const UnicodeString& validateFallbackLocale() const;
+  U_COMMON_API const UnicodeString& validateFallbackLocale() const;
 
   /**
    * Override superclass createKey method.
    */
-  virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const override;
+  U_COMMON_API virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const override;
 
   /**
    * Additional createKey that takes a kind.
    */
-  virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
+  U_COMMON_API virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const;
 
   friend class ServiceEnumeration;
 };

+ 10 - 10
thirdparty/icu4c/common/sharedobject.h

@@ -51,28 +51,28 @@ private:
  * Either stack-allocate, use LocalPointer, or use addRef()/removeRef().
  * Sharing requires reference-counting.
  */
-class U_COMMON_API SharedObject : public UObject {
+class U_COMMON_API_CLASS SharedObject : public UObject {
 public:
     /** Initializes totalRefCount, softRefCount to 0. */
-    SharedObject() :
+    U_COMMON_API SharedObject() :
             softRefCount(0),
             hardRefCount(0),
             cachePtr(nullptr) {}
 
     /** Initializes totalRefCount, softRefCount to 0. */
-    SharedObject(const SharedObject &other) :
+    U_COMMON_API SharedObject(const SharedObject &other) :
             UObject(other),
             softRefCount(0),
             hardRefCount(0),
             cachePtr(nullptr) {}
 
-    virtual ~SharedObject();
+    U_COMMON_API virtual ~SharedObject();
 
     /**
      * Increments the number of hard references to this object. Thread-safe.
      * Not for use from within the Unified Cache implementation.
      */
-    void addRef() const;
+    U_COMMON_API void addRef() const;
 
     /**
      * Decrements the number of hard references to this object, and
@@ -81,32 +81,32 @@ public:
      * 
      * Not for use from within the UnifiedCache implementation.
      */
-    void removeRef() const;
+    U_COMMON_API void removeRef() const;
 
     /**
      * Returns the number of hard references for this object.
      * Uses a memory barrier.
      */
-    int32_t getRefCount() const;
+    U_COMMON_API int32_t getRefCount() const;
 
     /**
      * If noHardReferences() == true then this object has no hard references.
      * Must be called only from within the internals of UnifiedCache.
      */
-    inline UBool noHardReferences() const { return getRefCount() == 0; }
+    U_COMMON_API inline UBool noHardReferences() const { return getRefCount() == 0; }
 
     /**
      * If hasHardReferences() == true then this object has hard references.
      * Must be called only from within the internals of UnifiedCache.
      */
-    inline UBool hasHardReferences() const { return getRefCount() != 0; }
+    U_COMMON_API inline UBool hasHardReferences() const { return getRefCount() != 0; }
 
     /**
      * Deletes this object if it has no references.
      * Available for non-cached SharedObjects only. Ownership of cached objects
      * is with the UnifiedCache, which is solely responsible for eviction and deletion.
      */
-    void deleteIfZeroRefCount() const;
+    U_COMMON_API void deleteIfZeroRefCount() const;
 
         
     /**

+ 6 - 0
thirdparty/icu4c/common/static_unicode_sets.cpp

@@ -187,7 +187,13 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
     U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
     U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);
 
+    // The following don't currently have parseLenients in data.
+    U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr);
     gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
+    U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr);
+    // This set of characters was manually curated from the
+    // values of the approximatelySign element of CLDR common/main/*.xml files.
+    gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status);
     if (U_FAILURE(status)) { return; }
 
     U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);

+ 1 - 0
thirdparty/icu4c/common/static_unicode_sets.h

@@ -56,6 +56,7 @@ enum Key {
     PERCENT_SIGN,
     PERMILLE_SIGN,
     INFINITY_SIGN,
+    APPROXIMATELY_SIGN,
 
     // Currency Symbols
     DOLLAR_SIGN,

File diff suppressed because it is too large
+ 430 - 421
thirdparty/icu4c/common/ubidi_props_data.h


+ 168 - 157
thirdparty/icu4c/common/ucase_props_data.h

@@ -9,11 +9,11 @@
 
 #ifdef INCLUDED_FROM_UCASE_CPP
 
-static const UVersionInfo ucase_props_dataVersion={0x10,0,0,0};
+static const UVersionInfo ucase_props_dataVersion={0x11,0,0,0};
 
-static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x78bc,0x6888,0x688,0x172,0,0,0,0,0,0,0,0,0,0,3};
+static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x7a14,0x69e0,0x688,0x172,0,0,0,0,0,0,0,0,0,0,3};
 
-static const uint16_t ucase_props_trieIndex[13372]={
+static const uint16_t ucase_props_trieIndex[13544]={
 0x363,0x36b,0x373,0x37b,0x389,0x391,0x399,0x3a1,0x3a9,0x3b1,0x3b8,0x3c0,0x3c8,0x3d0,0x3d8,0x3e0,
 0x3e6,0x3ee,0x3f6,0x3fe,0x406,0x40e,0x416,0x41e,0x426,0x42e,0x436,0x43e,0x446,0x44e,0x456,0x45e,
 0x466,0x46e,0x476,0x47e,0x486,0x48e,0x496,0x49e,0x49a,0x4a2,0x4a7,0x4af,0x4b6,0x4be,0x4c6,0x4ce,
@@ -27,18 +27,18 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x693,0x699,0x581,0x581,0x382,0x69f,0x6a7,0x382,
 0x6af,0x382,0x6b7,0x382,0x6be,0x6c4,0x382,0x382,0x382,0x6cc,0x382,0x382,0x382,0x382,0x382,0x382,
-0x6d3,0x382,0x6da,0x6e2,0x382,0x6ea,0x6f2,0x382,0x5b1,0x6f6,0x6fe,0x704,0x5f3,0x70c,0x382,0x713,
-0x382,0x718,0x382,0x71e,0x726,0x72a,0x732,0x73a,0x742,0x747,0x74a,0x752,0x762,0x75a,0x772,0x76a,
-0x3a9,0x77a,0x3a9,0x782,0x785,0x3a9,0x78d,0x3a9,0x795,0x79d,0x7a5,0x7ad,0x7b5,0x7bd,0x7c5,0x7cd,
-0x7d5,0x7dc,0x382,0x7e4,0x7ec,0x382,0x7f4,0x7fc,0x804,0x80c,0x814,0x81c,0x824,0x382,0x382,0x382,
+0x6d3,0x382,0x6da,0x6e2,0x382,0x6ea,0x6fa,0x6f2,0x5b1,0x702,0x70a,0x710,0x5f3,0x718,0x382,0x71f,
+0x382,0x724,0x382,0x72a,0x732,0x736,0x73e,0x746,0x74e,0x753,0x756,0x75e,0x76e,0x766,0x77e,0x776,
+0x3a9,0x786,0x3a9,0x78e,0x791,0x3a9,0x799,0x3a9,0x7a1,0x7a9,0x7b1,0x7b9,0x7c1,0x7c9,0x7d1,0x7d9,
+0x7e1,0x7e8,0x382,0x7f0,0x7f8,0x382,0x800,0x808,0x810,0x818,0x820,0x828,0x830,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x827,0x82d,0x833,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x833,0x839,0x83f,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x83b,0x83f,0x843,0x84b,0x3a9,0x3a9,0x3a9,0x853,0x85b,0x862,0x382,0x867,0x382,0x382,0x382,0x86f,
+0x847,0x84b,0x84f,0x857,0x3a9,0x3a9,0x3a9,0x85f,0x867,0x86e,0x382,0x873,0x382,0x382,0x382,0x87b,
 0x382,0x6b4,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x573,0x877,0x382,0x382,0x87e,0x382,0x382,0x886,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x573,0x883,0x382,0x382,0x88a,0x382,0x382,0x892,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
@@ -94,12 +94,12 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x88e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x89a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x71e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x894,0x382,0x89c,0x8a1,0x8a9,0x382,0x382,0x8b1,0x8b9,0x8c1,0x3a9,0x8c6,0x8ce,0x8d4,0x8db,0x8e3,
-0x8eb,0x8f2,0x382,0x382,0x382,0x382,0x8f9,0x901,0x382,0x909,0x910,0x382,0x55e,0x915,0x91d,0x6be,
-0x382,0x923,0x92b,0x92f,0x382,0x937,0x93f,0x947,0x382,0x94d,0x951,0x959,0x969,0x961,0x382,0x971,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x72a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x8a0,0x382,0x8a8,0x8ad,0x8b5,0x382,0x382,0x8bd,0x8c5,0x8cd,0x3a9,0x8d2,0x8da,0x8e0,0x8e7,0x8ef,
+0x8f7,0x8fe,0x382,0x382,0x382,0x382,0x905,0x90d,0x382,0x915,0x91c,0x382,0x55e,0x921,0x929,0x6be,
+0x382,0x92f,0x937,0x93b,0x382,0x943,0x94b,0x953,0x382,0x959,0x95d,0x965,0x975,0x96d,0x382,0x97d,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
@@ -139,9 +139,9 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x979,0x382,0x382,0x382,0x382,0x981,0x55e,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x985,0x382,0x382,0x382,0x382,0x98d,0x55e,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x986,0x98e,0x992,0x382,0x382,0x382,0x382,0x365,0x36b,0x99a,0x9a2,0x9a9,0x519,0x382,0x382,0x9b1,
+0x992,0x99a,0x99e,0x382,0x382,0x382,0x382,0x365,0x36b,0x9a6,0x9ae,0x9b5,0x519,0x382,0x382,0x9bd,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0xe0c,0xe0c,0xe24,0xe64,0xea4,0xee0,0xf20,0xf60,0xf98,0xfd8,0x1018,0x1058,0x1098,0x10d8,0x1118,0x1158,
@@ -173,61 +173,61 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
 0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,
 0xd4b,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x9b8,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x9c0,0x382,0x382,0x382,0x9c3,0x382,0x382,0x382,
-0x382,0x9cb,0x9d1,0x9d5,0x382,0x382,0x9d9,0x9dd,0x9e3,0x382,0x382,0x382,0x9ea,0x9ee,0x9f6,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa06,0x9fe,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa0e,
-0xa12,0x382,0x382,0x382,0x382,0x382,0xa1a,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0xa22,0xa26,0xa2e,0xa32,0x382,0xa39,0xa3e,0xa45,0xa4c,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0xa52,0x382,0xa56,0x382,0x382,0xa5e,0x382,0xa66,0x382,0x382,0x382,0x574,
-0xa68,0xa6f,0xa73,0x5f3,0xa7b,0xa83,0x382,0xa8b,0xa92,0x382,0xa98,0x5f3,0xa9d,0xaa5,0x382,0x382,
-0xaaa,0x574,0x382,0x382,0x382,0x365,0xab2,0x5f3,0x5f5,0xaba,0xac1,0x382,0xa8d,0xac9,0x58d,0x382,
-0xa68,0xad1,0x382,0x382,0xad9,0xae1,0x382,0x382,0x382,0x382,0x382,0x382,0xae5,0xaed,0x382,0x382,
-0xaf5,0x4dd,0x382,0x382,0xafd,0x382,0x382,0xb03,0xb0b,0x382,0x382,0x382,0x382,0x382,0x382,0xb10,
-0x382,0x382,0x382,0xb18,0xb20,0x382,0x382,0xb28,0xb30,0x382,0x382,0x382,0xb33,0x9c0,0xb3b,0xb3f,
-0xb47,0x382,0xb4e,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb55,
-0x382,0x382,0x981,0xb5d,0x382,0x382,0x382,0xb63,0xb6b,0x382,0xb6f,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0xb75,0x5f3,0xb7b,0xb83,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x9c4,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x9cc,0x382,0x382,0x382,0x9cf,0x382,0x382,0x382,
+0x382,0x9d7,0x9dd,0x9e1,0x382,0x382,0x9e5,0x9e9,0x9ef,0x382,0x382,0x382,0x9f6,0x9fa,0xa02,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa12,0xa0a,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa1a,
+0xa1e,0x382,0x382,0x382,0x382,0x382,0xa26,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0xa2e,0xa32,0xa3a,0xa3e,0x382,0xa45,0xa4a,0xa51,0xa58,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0xa5e,0x573,0xa62,0x382,0x382,0xa6a,0x382,0xa72,0x382,0x382,0x382,0x574,
+0xa74,0xa7b,0xa7f,0x5f3,0xa87,0xa8f,0x382,0xa97,0xa9e,0x382,0xaa4,0x5f3,0xaa9,0xab1,0x382,0x382,
+0xab6,0x574,0x382,0x382,0x382,0x365,0xabe,0x5f3,0x5f5,0xac6,0xacd,0x382,0xa99,0xad5,0x58d,0x382,
+0xa74,0xadd,0x382,0x382,0xae5,0xaed,0x382,0x382,0x382,0x382,0x382,0x382,0xaf1,0xaf9,0x382,0x382,
+0xb01,0x4dd,0x382,0x382,0xb09,0x382,0x382,0xb0f,0xb17,0x382,0x382,0x382,0x382,0x382,0x382,0xb1c,
+0x382,0x382,0x382,0xb24,0xb2c,0x382,0x382,0xb34,0xb3c,0x382,0x382,0x382,0xb3f,0x9cc,0xb47,0xb4b,
+0xb53,0x382,0xb5a,0x382,0x382,0x382,0x382,0x382,0x382,0xb62,0x382,0x382,0x382,0x382,0x382,0xb66,
+0x382,0x382,0x98d,0xb6e,0x382,0x382,0x382,0xb74,0xb7c,0x382,0xb80,0x382,0x94c,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0xb86,0x5f3,0xb8c,0xb94,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb8a,0xb92,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xb9b,0xba3,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x519,0xb9a,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x519,0xbab,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0xb9e,0x382,0xba4,0x5b1,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x55e,0xb77,0x382,0x382,0x382,0x382,0x382,0x382,0xb18,0xb20,0x382,0x382,
-0x382,0x382,0x382,0x382,0x6b4,0x382,0xbaa,0x382,0x382,0xbb2,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0xbaf,0x382,0xbb5,0x5b1,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x55e,0xb88,0x382,0x382,0x382,0x382,0x382,0x382,0xb24,0xb2c,0x382,0xbbd,
+0xbc4,0x382,0x382,0x382,0x6b4,0x382,0xbc9,0x382,0x382,0xbd1,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbb7,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbd6,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbbf,0x5b1,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbde,0x5b1,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8b9,0xbc7,0xbce,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0xbd5,0xbdd,0xbe3,0x382,0x382,0x382,0x382,0xbeb,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xbf3,0xbfb,0xc00,0xc06,0xc0e,
-0xc16,0xc1e,0xbf7,0xc26,0xc2e,0xc36,0xc3d,0xbf8,0xbf3,0xbfb,0xbf6,0xc06,0xbf9,0xbf4,0xc45,0xbf7,
-0xc4d,0xc55,0xc5d,0xc64,0xc50,0xc58,0xc60,0xc67,0xc53,0xc6f,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8b9,0xc77,0x8b9,0xc7e,0xc85,0xc8d,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8c5,0xbe6,0xbed,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0xbf4,0xbfc,0xc02,0x382,0x382,0x382,0x382,0xc0a,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xc12,0xc1a,0xc1f,0xc25,0xc2d,
+0xc35,0xc3d,0xc16,0xc45,0xc4d,0xc55,0xc5c,0xc17,0xc12,0xc1a,0xc15,0xc25,0xc18,0xc13,0xc64,0xc16,
+0xc6c,0xc74,0xc7c,0xc83,0xc6f,0xc77,0xc7f,0xc86,0xc72,0xc8e,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x8c5,0xc96,0x8c5,0xc9d,0xca4,0xcac,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0xc9d,0xca5,0x382,0x382,0x382,0x382,0x382,0x382,0xc95,0xcad,0xcc0,0xcb3,0xcb8,0x382,
-0x382,0x382,0x382,0xcc8,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xad5,
-0x382,0xa37,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0xcd0,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcd5,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcd9,0x382,0xce1,0xce9,0xcf0,0x382,
+0x382,0x382,0xcbc,0xcc4,0x382,0x382,0x382,0x382,0x382,0x382,0xcb4,0xccc,0xcdf,0xcd2,0xcd7,0x382,
+0x382,0x382,0x382,0xce7,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xae1,
+0x382,0xa43,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0xcef,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xcf4,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0xcfc,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xd04,0x382,0xd0c,0xd14,0xd1b,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0xbef,0xcf8,0xcf8,0xcfe,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa8d,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0xc0e,0xd23,0xd23,0xd29,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0xa99,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
-0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x574,0x8b9,0x8b9,0x8b9,0x382,
-0x382,0x382,0x382,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0x8b9,0xd06,0x382,0x382,0x382,0x382,0x382,
+0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x574,0x8c5,0x8c5,0x8c5,0x382,
+0x382,0x382,0x382,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0x8c5,0xd31,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,
 0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x382,0x362,0,0,0,0,
@@ -272,7 +272,7 @@ static const uint16_t ucase_props_trieIndex[13372]={
 1,0x9b11,1,0x9a91,0x869,1,1,1,0x9991,0x889,1,0x9891,0x8a9,0x8c9,0x8e9,1,
 0x97b1,0x9691,0x8e9,0x909,0x929,1,1,0x9691,1,0x949,0x9591,1,1,0x9511,1,1,
 1,1,1,1,1,0x969,1,1,0x9311,1,0x989,0x9311,1,1,1,0x9a9,
-0x9311,0xdd91,0x9391,0x9391,0xdc91,1,1,1,1,1,0x9291,1,0,1,1,1,
+0x9311,0xdd91,0x9391,0x9391,0xdc91,1,1,1,1,1,0x9291,1,0,0,1,1,
 1,1,1,1,1,0x9c9,0x9e9,1,1,1,1,1,1,1,1,1,
 1,1,1,1,1,1,1,1,5,5,0x25,5,5,5,5,5,
 5,4,4,4,0x14,4,0x14,4,5,5,4,4,4,4,4,4,
@@ -458,8 +458,11 @@ static const uint16_t ucase_props_trieIndex[13372]={
 4,4,4,4,4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,
 0x44,0x44,0x44,0x44,0x44,0,0,0x64,0,0,0,0,0,0,0,4,
 0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,
-0x64,0x64,0x64,0x44,0x44,0x64,4,0x64,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,
-0x44,0x44,0x64,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
+0x64,0x64,0x64,0x44,0x44,0x64,4,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,
+0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,
+0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,
 4,4,4,0,4,0,0,0,0,0,4,0,0x60,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -581,9 +584,9 @@ static const uint16_t ucase_props_trieIndex[13372]={
 4,4,4,0x92,0xff91,0x513a,1,0,0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,
 0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x515a,0x517a,0x519a,0x51ba,0x515a,1,
 0x51da,0x51fa,0x521a,0x523a,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
-0xe812,0x525a,0x527a,0x92,0xff91,0x92,0xff91,0x529a,0x92,0xff91,0,0,0x92,0xff91,0,1,
-0,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x52ba,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,
+0xe812,0x525a,0x527a,0x92,0xff91,0x92,0xff91,0x529a,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,
+0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x52ba,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,
 5,0x92,0xff91,0,5,5,1,0,0,0,0,0,0,0,4,0,
 0,0,0x64,0,0,0,0,4,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,
@@ -677,7 +680,7 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,0x64,0x64,0x64,0,0,0,0,0,0,0x64,0x64,
+0,0,0x64,0x64,4,0x64,0x64,0x64,0,0,0,0,0,0,0x64,0x64,
 0x44,0x44,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0x44,0x64,0x44,0x64,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
@@ -739,117 +742,125 @@ static const uint16_t ucase_props_trieIndex[13372]={
 0,0,0,0x64,0,0,0,0,0,0,0,0,0,4,4,4,
 4,4,4,0,0,4,4,4,0,0,0,0,0,0,0,0,
 0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
-4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,4,4,4,4,4,4,4,0,4,4,4,4,
-4,4,0,0x64,4,4,4,4,4,4,4,4,0,0,4,4,
-4,4,4,4,4,0,4,4,0,4,4,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,
-4,4,4,0,0,0,4,0,4,4,0,4,4,4,0x64,4,
-0x64,0x64,0,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,
-0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,4,4,4,4,4,0,0,0,0,0,4,0x60,0x64,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
-4,4,0,0,0,4,4,0x64,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0,
+4,0x64,0,0,0,0,0,0,4,0,4,4,4,0,4,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,0,
-0,0,0,0,0,0,0,0,0x60,0x60,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
-0,4,4,4,4,4,4,4,0,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,0,
+4,4,4,4,4,4,0,0x64,4,4,4,4,4,4,4,4,
+0,0,4,4,4,4,4,4,4,0,4,4,0,4,4,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,4,0x64,0,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,0,0,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
+0,4,4,4,4,4,4,0,0,0,4,0,4,4,0,4,
+4,4,0x64,4,0x64,0x64,0,4,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,
-0,0x60,0x60,0x60,0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,
-0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,
+4,4,0,0,0,4,0,0x64,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,
-0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
+0,0,0,0,0,0,4,4,4,4,4,0,0,0,0,0,
+4,0x60,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,
+0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
+4,4,4,4,4,4,0,0,0,4,4,0x64,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,
+0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
+0,0,0,0,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,
+0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0xd92,0,0,0xf291,
+0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,0xf291,
+0xf291,0xf291,0xf291,0xf291,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,0,4,4,0,0,0,0,0,0,0,
+0,0,0,0,0x60,0x60,4,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,0,4,4,4,
+4,4,4,4,0,4,4,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,4,0x64,0,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,0,0,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,
+0x60,0x60,0x60,4,4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,
+0x64,0x64,0x64,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
+1,0,0x21,0x21,1,1,1,1,1,1,1,1,2,2,2,2,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
 2,2,2,2,2,2,1,1,1,1,1,1,1,1,0x21,0x21,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,
-1,1,1,1,1,0,0x21,0x21,1,1,1,1,1,1,1,1,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,
-1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,2,0,2,2,0,0,2,0,0,2,2,0,
-0,2,2,2,2,0,2,2,2,2,2,2,2,2,1,1,
-1,1,0,1,0,1,0x21,0x21,1,1,1,1,0,1,1,1,
-1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,1,1,1,1,2,2,0,2,
-2,2,2,0,0,2,2,2,2,2,2,2,2,0,2,2,
+2,0,2,2,0,0,2,0,0,2,2,0,0,2,2,2,
+2,0,2,2,2,2,2,2,2,2,1,1,1,1,0,1,
+0,1,0x21,0x21,1,1,1,1,0,1,1,1,1,1,1,1,
+1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,1,1,1,1,2,2,0,2,2,2,2,0,
+0,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,
+2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,0,2,
+2,2,2,0,2,2,2,2,2,0,2,0,0,0,2,2,
 2,2,2,2,2,0,1,1,1,1,1,1,1,1,0x21,0x21,
+1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-2,2,0,2,2,2,2,0,2,2,2,2,2,0,2,0,
-0,0,2,2,2,2,2,2,2,0,1,1,1,1,1,1,
-1,1,0x21,0x21,1,1,1,1,1,1,1,1,1,1,1,1,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,1,1,1,1,1,1,0,0,2,2,2,2,
+1,1,1,1,1,1,0,0,2,2,2,2,2,2,2,2,
 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,0,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
-1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,
+2,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,
+1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,0,1,1,1,1,
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-1,1,1,1,1,0,1,1,1,1,1,1,2,2,2,2,
-2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,0,
-1,1,1,1,1,1,2,1,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,
-4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,0,0,0,0,4,4,4,4,4,4,4,4,4,
-4,4,4,4,4,0,0,0,0,0,0,0,0,4,0,0,
-0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,
-4,4,4,4,0,4,4,4,4,4,4,4,4,4,4,4,
-4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,
-0x44,0x44,0x44,0x44,1,1,1,1,1,1,1,1,1,1,0,1,
-1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x21,1,
-1,1,1,0,0,0,0,0,0,1,1,1,1,1,1,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,
-0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5,
-5,5,5,5,0x25,5,5,5,5,5,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-5,5,5,5,5,5,5,5,5,5,5,5,0x25,0x25,5,5,
-5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+1,0,1,1,1,1,1,1,2,2,2,2,2,2,2,2,
+2,2,2,2,2,2,2,2,1,1,1,0,1,1,1,1,
+1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
+4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,0,
+0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,
+4,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,
+0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,
+0,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0x44,0x44,0x44,0x44,0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,
-0,0,0,0,0,0,0,0,0,0,0,4,0x64,0x64,0x64,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,
+1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,0x21,1,1,1,1,0,
+0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,
-0,0,0,0,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
+0x44,0x44,0,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,
+5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+0x25,5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0x44,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5,
+5,5,5,5,5,5,5,5,0x25,0x25,5,5,5,5,5,5,
+5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,
+0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,0,0,0,0,
+0,0,0,0,0,0,0,4,0x64,0x64,0x64,0x44,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x64,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0x44,0,0,0x44,0,0,0,0,0,0,0,0x44,0x44,
+0,0,0,0,0,0x44,0,0,0,0,0,0,0,0,0,4,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,
 0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
-0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
+0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,
+0x1112,0x1112,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
 0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,
-0xef11,0xef11,0xef11,0xef11,0x44,0x44,0x44,0x44,0x44,0x44,0x64,4,0,0,0,0,
+0x44,0x44,0x44,0x44,0x44,0x44,0x64,4,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,
+2,2,2,2,2,2,0,0,0,0,0,0,2,2,2,2,
+2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,
-2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,
-4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,
-0,0,0,0,0,0,0,0,0,0,0,0
+0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4,
+4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0
 };
 
 static const uint16_t ucase_props_exceptions[1672]={
@@ -997,13 +1008,13 @@ static const UCaseProps ucase_props_singleton={
     ucase_props_trieIndex+3468,
     nullptr,
     3468,
-    9904,
+    10076,
     0x188,
     0xe08,
     0x0,
     0x0,
     0xe0800,
-    0x3438,
+    0x34e4,
     nullptr, 0, false, false, 0, nullptr
   },
   { 4,0,0,0 }

+ 3 - 3
thirdparty/icu4c/common/ucasemap.cpp

@@ -102,9 +102,9 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
         return;
     }
 
-    int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
-    if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
-        *pErrorCode=U_ZERO_ERROR;
+    UErrorCode bufferStatus = U_ZERO_ERROR;
+    int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), &bufferStatus);
+    if(bufferStatus==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) {
         /* we only really need the language code for case mappings */
         length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode);
     }

+ 27 - 0
thirdparty/icu4c/common/uchar.cpp

@@ -616,6 +616,33 @@ uscript_getScriptExtensions(UChar32 c,
     return length;
 }
 
+namespace {
+
+UBool U_CALLCONV
+_scxRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+    // From u_getUnicodeProperties(start, 0).
+    uint32_t vecWord = propsVectors[value];  // vecIndex=value, column 0
+    uint32_t scriptX = vecWord & UPROPS_SCRIPT_X_MASK;
+    if (scriptX >= UPROPS_SCRIPT_X_WITH_COMMON) {
+        // Code points start..end have Script_Extensions.
+        const USetAdder* sa = static_cast<const USetAdder*>(context);
+        sa->addRange(sa->set, start, end);
+    }
+    (void) value;
+    return true;
+}
+
+}
+
+// for icuexportdata
+U_CAPI void U_EXPORT2
+uprv_addScriptExtensionsCodePoints(const USetAdder *sa, UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+    utrie2_enum(&propsVectorsTrie, nullptr, _scxRange, sa);
+}
+
 U_CAPI UBlockCode U_EXPORT2
 ublock_getCode(UChar32 c) {
     // We store Block values indexed by the code point shifted right 4 bits

File diff suppressed because it is too large
+ 629 - 636
thirdparty/icu4c/common/uchar_props_data.h


+ 18 - 10
thirdparty/icu4c/common/ucnv.cpp

@@ -1752,20 +1752,24 @@ ucnv_fromUChars(UConverter *cnv,
         destLimit=dest+destCapacity;
 
         /* perform the conversion */
-        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
         destLength=(int32_t)(dest-originalDest);
 
         /* if an overflow occurs, then get the preflighting length */
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) {
             char buffer[1024];
 
             destLimit=buffer+sizeof(buffer);
             do {
                 dest=buffer;
-                *pErrorCode=U_ZERO_ERROR;
-                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
+                bufferStatus=U_ZERO_ERROR;
+                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
                 destLength+=(int32_t)(dest-buffer);
-            } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
+            } while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
+        }
+        if (U_FAILURE(bufferStatus)) {
+            *pErrorCode = bufferStatus;
         }
     } else {
         destLength=0;
@@ -1808,22 +1812,26 @@ ucnv_toUChars(UConverter *cnv,
         destLimit=dest+destCapacity;
 
         /* perform the conversion */
-        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
         destLength=(int32_t)(dest-originalDest);
 
         /* if an overflow occurs, then get the preflighting length */
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
+        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR)
         {
             char16_t buffer[1024];
 
             destLimit=buffer+UPRV_LENGTHOF(buffer);
             do {
                 dest=buffer;
-                *pErrorCode=U_ZERO_ERROR;
-                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
+                bufferStatus=U_ZERO_ERROR;
+                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
                 destLength+=(int32_t)(dest-buffer);
             }
-            while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
+            while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
+        }
+        if (U_FAILURE(bufferStatus)) {
+            *pErrorCode = bufferStatus;
         }
     } else {
         destLength=0;

+ 1 - 1
thirdparty/icu4c/common/ucnv2022.cpp

@@ -597,7 +597,7 @@ _ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
             /* open the required converters and cache them */
             myConverterData->myConverterArray[GB2312_1] =
                 ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode);
-            if(version==1) {
+            if(version>=1) {
                 myConverterData->myConverterArray[ISO_IR_165] =
                     ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode);
             }

+ 32 - 9
thirdparty/icu4c/common/ucnv_io.cpp

@@ -40,6 +40,7 @@
 #include "uarrsort.h"
 #include "uassert.h"
 #include "udataswp.h"
+#include "udatamem.h"
 #include "cstring.h"
 #include "cmemory.h"
 #include "ucnv_io.h"
@@ -235,23 +236,29 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
     const uint32_t *sectionSizes;
     uint32_t tableStart;
     uint32_t currOffset;
+    int32_t sizeOfData;
+    int32_t sizeOfTOC;
 
     ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup);
 
     U_ASSERT(gAliasData == nullptr);
     data = udata_openChoice(nullptr, DATA_TYPE, DATA_NAME, isAcceptable, nullptr, &errCode);
-    if(U_FAILURE(errCode)) {
+    if (U_FAILURE(errCode)) {
         return;
     }
 
     sectionSizes = static_cast<const uint32_t*>(udata_getMemory(data));
+    int32_t dataLength = udata_getLength(data); // This is the length minus the UDataInfo size
+    if (dataLength <= int32_t(sizeof(sectionSizes[0]))) {
+        // We don't even have a TOC!
+        goto invalidFormat;
+    }
     table = reinterpret_cast<const uint16_t*>(sectionSizes);
-
-    tableStart      = sectionSizes[0];
-    if (tableStart < minTocLength) {
-        errCode = U_INVALID_FORMAT_ERROR;
-        udata_close(data);
-        return;
+    tableStart = sectionSizes[0];
+    sizeOfTOC = int32_t((tableStart + 1) * sizeof(sectionSizes[0]));
+    if (tableStart < minTocLength || dataLength <= sizeOfTOC) {
+        // We don't have a whole TOC!
+        goto invalidFormat;
     }
     gAliasData = data;
 
@@ -264,11 +271,21 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
     gMainTable.optionTableSize        = sectionSizes[7];
     gMainTable.stringTableSize        = sectionSizes[8];
 
-    if (tableStart > 8) {
+    if (tableStart > minTocLength) {
         gMainTable.normalizedStringTableSize = sectionSizes[9];
     }
 
-    currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t));
+    sizeOfData = sizeOfTOC;
+    for (uint32_t section = 1; section <= tableStart; section++) {
+        sizeOfData += sectionSizes[section] * sizeof(table[0]);
+    }
+    if (dataLength < sizeOfData) {
+        // Truncated file!
+        goto invalidFormat;
+    }
+    // There may be some extra padding at the end, or this is a new file format with extra data that we can't read yet.
+
+    currOffset = (tableStart + 1) * (sizeof(uint32_t)/sizeof(uint16_t));
     gMainTable.converterList = table + currOffset;
 
     currOffset += gMainTable.converterListSize;
@@ -306,6 +323,12 @@ static void U_CALLCONV initAliasData(UErrorCode &errCode) {
     currOffset += gMainTable.stringTableSize;
     gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED)
         ? gMainTable.stringTable : (table + currOffset));
+
+    return;
+
+invalidFormat:
+    errCode = U_INVALID_FORMAT_ERROR;
+    udata_close(data);
 }
 
 

+ 1 - 0
thirdparty/icu4c/common/udata.cpp

@@ -1004,6 +1004,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName,
                 *  and return it.   */
                 pEntryData->mapAddr = dataMemory.mapAddr;
                 pEntryData->map     = dataMemory.map;
+                pEntryData->length  = dataMemory.length;
 
 #ifdef UDATA_DEBUG
                 fprintf(stderr, "** Mapped file: %s\n", pathBuffer);

+ 63 - 52
thirdparty/icu4c/common/uidna.cpp

@@ -248,9 +248,10 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
     if(srcIsASCII == false){
         
         // step 2    
-        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus);
 
-        if(*status == U_BUFFER_OVERFLOW_ERROR){
+        if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
             // redo processing of string
             // we do not have enough room so grow the buffer
             if(b1 != b1Stack){
@@ -262,9 +263,12 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
                 goto CLEANUP;
             }
 
-            *status = U_ZERO_ERROR; // reset error
-            
-            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
+            bufferStatus = U_ZERO_ERROR; // reset error
+
+            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus);
+        }
+        if (U_FAILURE(bufferStatus)) {
+            *status = bufferStatus;
         }
     }
     // error bail out
@@ -333,9 +337,10 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
             // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool));
             // uprv_memset(caseFlags,true,b1Len);
 
-            b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
+            UErrorCode bufferStatus = U_ZERO_ERROR;
+            b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags,&bufferStatus);
 
-            if(*status == U_BUFFER_OVERFLOW_ERROR){
+            if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
                 // redo processing of string
                 /* we do not have enough room so grow the buffer*/
                 b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR));
@@ -344,12 +349,13 @@ _internal_toASCII(const char16_t* src, int32_t srcLength,
                     goto CLEANUP;
                 }
 
-                *status = U_ZERO_ERROR; // reset error
-                
-                b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
+                bufferStatus = U_ZERO_ERROR; // reset error
+
+                b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags,&bufferStatus);
             }
             //error bail out
-            if(U_FAILURE(*status)){
+            if(U_FAILURE(bufferStatus)){
+                *status = bufferStatus;
                 goto CLEANUP;
             }
             // TODO : Reconsider while implementing the case preserve RFE
@@ -454,8 +460,9 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
     
     if(srcIsASCII == false){
         // step 2: process the string
-        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
-        if(*status == U_BUFFER_OVERFLOW_ERROR){
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+        b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus);
+        if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
             // redo processing of string
             /* we do not have enough room so grow the buffer*/
             b1 = static_cast<char16_t*>(uprv_malloc(b1Len * U_SIZEOF_UCHAR));
@@ -464,12 +471,13 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
                 goto CLEANUP;
             }
 
-            *status = U_ZERO_ERROR; // reset error
-            
-            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
+            bufferStatus = U_ZERO_ERROR; // reset error
+
+            b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus);
         }
         //bail out on error
-        if(U_FAILURE(*status)){
+        if(U_FAILURE(bufferStatus)){
+            *status = bufferStatus;
             goto CLEANUP;
         }
     }else{
@@ -493,9 +501,10 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
         b1PrimeLen  = b1Len - ACE_PREFIX_LENGTH;
 
         //step 5: Decode using punycode
-        b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+        b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags, &bufferStatus);
 
-        if(*status == U_BUFFER_OVERFLOW_ERROR){
+        if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
             // redo processing of string
             /* we do not have enough room so grow the buffer*/
             b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR));
@@ -504,16 +513,16 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
                 goto CLEANUP;
             }
 
-            *status = U_ZERO_ERROR; // reset error
+            bufferStatus = U_ZERO_ERROR; // reset error
 
-            b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
+            b2Len =  u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, &bufferStatus);
         }
 
 
         //step 6:Apply toASCII
-        b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
+        b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, &bufferStatus);
 
-        if(*status == U_BUFFER_OVERFLOW_ERROR){
+        if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
             // redo processing of string
             /* we do not have enough room so grow the buffer*/
             b3 = static_cast<char16_t*>(uprv_malloc(b3Len * U_SIZEOF_UCHAR));
@@ -522,13 +531,14 @@ _internal_toUnicode(const char16_t* src, int32_t srcLength,
                 goto CLEANUP;
             }
 
-            *status = U_ZERO_ERROR; // reset error
+            bufferStatus = U_ZERO_ERROR; // reset error
 
-            b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
+            b3Len =  uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError,&bufferStatus);
 
         }
         //bail out on error
-        if(U_FAILURE(*status)){
+        if(U_FAILURE(bufferStatus)){
+            *status = bufferStatus;
             goto CLEANUP;
         }
 
@@ -706,24 +716,21 @@ uidna_IDNToASCII(  const char16_t *src, int32_t srcLength,
         labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
         labelReqLength = 0;
         if(!(labelLen==0 && done)){// make sure this is not a root label separator.
-        
-            labelReqLength = _internal_toASCII( labelStart, labelLen, 
-                                                currentDest, remainingDestCapacity, 
-                                                options, nameprep, 
-                                                parseError, status);
-    
-            if(*status == U_BUFFER_OVERFLOW_ERROR){
-                
-                *status = U_ZERO_ERROR; // reset error
+
+            UErrorCode bufferStatus = U_ZERO_ERROR;
+            labelReqLength = _internal_toASCII( labelStart, labelLen,
+                                                currentDest, remainingDestCapacity,
+                                                options, nameprep,
+                                                parseError, &bufferStatus);
+
+            if (bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
                 remainingDestCapacity = 0;
+            } else if (U_FAILURE(bufferStatus)) {
+                *status = bufferStatus;
+                break;
             }
         }
 
-    
-        if(U_FAILURE(*status)){
-            break;
-        }
-        
         reqLength +=labelReqLength;
         // adjust the destination pointer
         if(labelReqLength < remainingDestCapacity){
@@ -877,8 +884,9 @@ uidna_compare(  const char16_t *s1, int32_t length1,
     
     UParseError parseError;
 
-    b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
-    if(*status == U_BUFFER_OVERFLOW_ERROR){
+    UErrorCode bufferStatus = U_ZERO_ERROR;
+    b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, &bufferStatus);
+    if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
         // redo processing of string
         b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
         if(b1==nullptr){
@@ -886,14 +894,13 @@ uidna_compare(  const char16_t *s1, int32_t length1,
             goto CLEANUP;
         }
 
-        *status = U_ZERO_ERROR; // reset error
-        
-        b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
-        
+        bufferStatus = U_ZERO_ERROR; // reset error
+
+        b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, &bufferStatus);
     }
 
-    b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
-    if(*status == U_BUFFER_OVERFLOW_ERROR){
+    b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, &bufferStatus);
+    if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
         // redo processing of string
         b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
         if(b2==nullptr){
@@ -901,11 +908,15 @@ uidna_compare(  const char16_t *s1, int32_t length1,
             goto CLEANUP;
         }
 
-        *status = U_ZERO_ERROR; // reset error
-        
-        b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
-        
+        bufferStatus = U_ZERO_ERROR; // reset error
+
+        b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, &bufferStatus);
+    }
+
+    if (U_FAILURE(bufferStatus)) {
+        *status = bufferStatus;
     }
+
     // when toASCII is applied all label separators are replaced with FULL_STOP
     result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
 

+ 16 - 2
thirdparty/icu4c/common/uloc.cpp

@@ -627,7 +627,7 @@ ulocimp_getKeywords(std::string_view localeID,
         do {
             bool duplicate = false;
             /* skip leading spaces */
-            while (localeID.front() == ' ') {
+            while (!localeID.empty() && localeID.front() == ' ') {
                 localeID.remove_prefix(1);
             }
             if (localeID.empty()) { /* handle trailing "; " */
@@ -1102,7 +1102,21 @@ ulocimp_setKeywordValue(std::string_view keywords,
         /* if input key/value specified removal of a keyword not present in locale, or
          * there was an error in CharString.append, leave original locale alone. */
         U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
-        return static_cast<int32_t>(keywords.size());
+        // The sink is expected to be a buffer which already contains the full
+        // locale string, so when it isn't going to be modified there's no need
+        // to actually write any data to it, as the data is already there. Only
+        // the first character needs to be overwritten (changing '\0' to '@').
+        needLen = static_cast<int32_t>(keywords.size());
+        int32_t capacity = 0;
+        char* buffer = sink.GetAppendBuffer(
+                needLen, needLen, nullptr, needLen, &capacity);
+        if (capacity < needLen || buffer == nullptr) {
+            status = U_BUFFER_OVERFLOW_ERROR;
+        } else {
+            *buffer = '@';
+            sink.Append(buffer, needLen);
+        }
+        return needLen;
     }
 
     needLen = updatedKeysAndValues.length();

+ 20 - 24
thirdparty/icu4c/common/uloc_keytype.cpp

@@ -14,9 +14,9 @@
 #include "unicode/unistr.h"
 #include "unicode/uobject.h"
 
-#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "fixedstring.h"
 #include "uassert.h"
 #include "ucln_cmn.h"
 #include "uhash.h"
@@ -53,7 +53,7 @@ struct TypeAlias : public icu::UMemory {
     std::string_view from;
 };
 
-static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = nullptr;
+static icu::MemoryPool<icu::FixedString>* gKeyTypeStringPool = nullptr;
 static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = nullptr;
 static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = nullptr;
 static icu::MemoryPool<TypeAlias>* gTypeAliasEntries = nullptr;
@@ -108,7 +108,7 @@ initFromResourceBundle(UErrorCode& sts) {
     LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", nullptr, &tmpSts));
 
     // initialize pools storing dynamically allocated objects
-    gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
+    gKeyTypeStringPool = new icu::MemoryPool<icu::FixedString>;
     if (gKeyTypeStringPool == nullptr) {
         sts = U_MEMORY_ALLOCATION_ERROR;
         return;
@@ -146,12 +146,12 @@ initFromResourceBundle(UErrorCode& sts) {
         // empty value indicates that BCP key is same with the legacy key.
         const char* bcpKeyId = legacyKeyId;
         if (!uBcpKeyId.isEmpty()) {
-            icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
+            icu::FixedString* bcpKeyIdBuf = gKeyTypeStringPool->create();
             if (bcpKeyIdBuf == nullptr) {
                 sts = U_MEMORY_ALLOCATION_ERROR;
                 break;
             }
-            bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
+            copyInvariantChars(uBcpKeyId, *bcpKeyIdBuf, sts);
             if (U_FAILURE(sts)) {
                 break;
             }
@@ -220,18 +220,16 @@ initFromResourceBundle(UErrorCode& sts) {
                     // a timezone key uses a colon instead of a slash in the resource.
                     // e.g. America:Los_Angeles
                     if (uprv_strchr(legacyTypeId, ':') != nullptr) {
-                        icu::CharString* legacyTypeIdBuf =
-                                gKeyTypeStringPool->create(legacyTypeId, sts);
-                        if (legacyTypeIdBuf == nullptr) {
+                        U_ASSERT(legacyTypeId != nullptr && *legacyTypeId != '\0');
+                        std::string_view legacyTypeIdView = legacyTypeId;
+                        icu::FixedString* legacyTypeIdBuf = gKeyTypeStringPool->create(legacyTypeIdView);
+                        if (legacyTypeIdBuf == nullptr || legacyTypeIdBuf->isEmpty()) {
                             sts = U_MEMORY_ALLOCATION_ERROR;
                             break;
                         }
-                        if (U_FAILURE(sts)) {
-                            break;
-                        }
                         std::replace(
-                                legacyTypeIdBuf->data(),
-                                legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
+                                legacyTypeIdBuf->getAlias(),
+                                legacyTypeIdBuf->getAlias() + legacyTypeIdView.length(),
                                 ':', '/');
                         legacyTypeId = legacyTypeIdBuf->data();
                     }
@@ -245,12 +243,12 @@ initFromResourceBundle(UErrorCode& sts) {
                 // empty value indicates that BCP type is same with the legacy type.
                 const char* bcpTypeId = legacyTypeId;
                 if (!uBcpTypeId.isEmpty()) {
-                    icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
+                    icu::FixedString* bcpTypeIdBuf = gKeyTypeStringPool->create();
                     if (bcpTypeIdBuf == nullptr) {
                         sts = U_MEMORY_ALLOCATION_ERROR;
                         break;
                     }
-                    bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
+                    copyInvariantChars(uBcpTypeId, *bcpTypeIdBuf, sts);
                     if (U_FAILURE(sts)) {
                         break;
                     }
@@ -302,20 +300,18 @@ initFromResourceBundle(UErrorCode& sts) {
                             if (isTZ) {
                                 // replace colon with slash if necessary
                                 if (uprv_strchr(from, ':') != nullptr) {
-                                    icu::CharString* fromBuf =
-                                            gKeyTypeStringPool->create(from, sts);
-                                    if (fromBuf == nullptr) {
+                                    U_ASSERT(from != nullptr && *from != '\0');
+                                    std::string_view fromView = from;
+                                    icu::FixedString* fromBuf = gKeyTypeStringPool->create(fromView);
+                                    if (fromBuf == nullptr || fromBuf->isEmpty()) {
                                         sts = U_MEMORY_ALLOCATION_ERROR;
                                         break;
                                     }
-                                    if (U_FAILURE(sts)) {
-                                        break;
-                                    }
                                     std::replace(
-                                            fromBuf->data(),
-                                            fromBuf->data() + fromBuf->length(),
+                                            fromBuf->getAlias(),
+                                            fromBuf->getAlias() + fromView.length(),
                                             ':', '/');
-                                    alias->from = fromBuf->toStringPiece();
+                                    alias->from = {fromBuf->data(), fromView.length()};
                                 }
                             }
                             uhash_put(typeDataMap, &alias->from, t, &sts);

+ 39 - 39
thirdparty/icu4c/common/ulocimp.h

@@ -55,95 +55,95 @@ uloc_getCurrentCountryID(const char* oldID);
 U_CFUNC const char* 
 uloc_getCurrentLanguageID(const char* oldID);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toBcpKeyWithFallback(std::string_view keyword);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getKeywords(std::string_view localeID,
                     char prev,
                     bool valuesToo,
                     UErrorCode& status);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getKeywords(std::string_view localeID,
                     char prev,
                     icu::ByteSink& sink,
                     bool valuesToo,
                     UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getName(std::string_view localeID,
                 UErrorCode& err);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getName(std::string_view localeID,
                 icu::ByteSink& sink,
                 UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getBaseName(std::string_view localeID,
                     UErrorCode& err);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getBaseName(std::string_view localeID,
                     icu::ByteSink& sink,
                     UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_canonicalize(std::string_view localeID,
                      UErrorCode& err);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_canonicalize(std::string_view localeID,
                      icu::ByteSink& sink,
                      UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getKeywordValue(const char* localeID,
                         std::string_view keywordName,
                         UErrorCode& status);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getKeywordValue(const char* localeID,
                         std::string_view keywordName,
                         icu::ByteSink& sink,
                         UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getScript(std::string_view localeID, UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_setKeywordValue(std::string_view keywordName,
                         std::string_view keywordValue,
                         icu::CharString& localeID,
                         UErrorCode& status);
 
-U_EXPORT int32_t
+U_COMMON_API int32_t
 ulocimp_setKeywordValue(std::string_view keywords,
                         std::string_view keywordName,
                         std::string_view keywordValue,
                         icu::ByteSink& sink,
                         UErrorCode& status);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getSubtags(
         std::string_view localeID,
         icu::CharString* language,
@@ -153,7 +153,7 @@ ulocimp_getSubtags(
         const char** pEnd,
         UErrorCode& status);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getSubtags(
         std::string_view localeID,
         icu::ByteSink* language,
@@ -182,16 +182,16 @@ ulocimp_getSubtags(
             status);
 }
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getParent(const char* localeID,
                   UErrorCode& err);
 
-U_EXPORT void
+U_COMMON_API void
 ulocimp_getParent(const char* localeID,
                   icu::ByteSink& sink,
                   UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_toLanguageTag(const char* localeID,
                       bool strict,
                       UErrorCode& status);
@@ -215,13 +215,13 @@ ulocimp_toLanguageTag(const char* localeID,
  *
  * @internal ICU 64
  */
-U_EXPORT void
+U_COMMON_API void
 ulocimp_toLanguageTag(const char* localeID,
                       icu::ByteSink& sink,
                       bool strict,
                       UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_forLanguageTag(const char* langtag,
                        int32_t tagLen,
                        int32_t* parsedLength,
@@ -253,7 +253,7 @@ ulocimp_forLanguageTag(const char* langtag,
  *                  failed.
  * @internal ICU 63
  */
-U_EXPORT void
+U_COMMON_API void
 ulocimp_forLanguageTag(const char* langtag,
                        int32_t tagLen,
                        icu::ByteSink& sink,
@@ -280,11 +280,11 @@ ulocimp_forLanguageTag(const char* langtag,
  *     The region code found, empty if none found.
  * @internal ICU 57
  */
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
                                      UErrorCode& status);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_addLikelySubtags(const char* localeID,
                          UErrorCode& status);
 
@@ -317,12 +317,12 @@ ulocimp_addLikelySubtags(const char* localeID,
  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  * @internal ICU 64
  */
-U_EXPORT void
+U_COMMON_API void
 ulocimp_addLikelySubtags(const char* localeID,
                          icu::ByteSink& sink,
                          UErrorCode& err);
 
-U_EXPORT icu::CharString
+U_COMMON_API icu::CharString
 ulocimp_minimizeSubtags(const char* localeID,
                         bool favorScript,
                         UErrorCode& status);
@@ -357,7 +357,7 @@ ulocimp_minimizeSubtags(const char* localeID,
  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  * @internal ICU 64
  */
-U_EXPORT void
+U_COMMON_API void
 ulocimp_minimizeSubtags(const char* localeID,
                         icu::ByteSink& sink,
                         bool favorScript,
@@ -405,24 +405,24 @@ ultag_isVariantSubtags(const char* s, int32_t len);
 const char*
 ultag_getTKeyStart(const char* localeID);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toBcpKey(std::string_view key);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toLegacyKey(std::string_view key);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toBcpType(std::string_view key, std::string_view type);
 
-U_EXPORT std::optional<std::string_view>
+U_COMMON_API std::optional<std::string_view>
 ulocimp_toLegacyType(std::string_view key, std::string_view type);
 
 /* Function for testing purpose */
-U_EXPORT const char* const*
+U_COMMON_API const char* const*
 ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
 
 // Return true if the value is already canonicalized.
-U_EXPORT bool
+U_COMMON_API bool
 ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
 
 #ifdef __cplusplus

+ 7 - 0
thirdparty/icu4c/common/umapfile.cpp

@@ -119,6 +119,7 @@ typedef HANDLE MemoryMap;
 
         HANDLE map = nullptr;
         HANDLE file = INVALID_HANDLE_VALUE;
+        DWORD fileLength = 0;
 
         UDataMemory_init(pData); /* Clear the output struct.        */
 
@@ -159,6 +160,8 @@ typedef HANDLE MemoryMap;
             return false;
         }
 
+        fileLength = GetFileSize(file, nullptr);
+
         // Note: We use nullptr/nullptr for lpAttributes parameter below.
         // This means our handle cannot be inherited and we will get the default security descriptor.
         /* create an unnamed Windows file-mapping object for the specified file */
@@ -181,6 +184,8 @@ typedef HANDLE MemoryMap;
             return false;
         }
         pData->map = map;
+        pData->length = fileLength;
+
         return true;
     }
 
@@ -237,6 +242,7 @@ typedef HANDLE MemoryMap;
         pData->map = (char *)data + length;
         pData->pHeader=(const DataHeader *)data;
         pData->mapAddr = data;
+        pData->length = length;
 #if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID
     // Apparently supported from Android 23 and higher:
     //   https://github.com/ggml-org/llama.cpp/pull/3631
@@ -320,6 +326,7 @@ typedef HANDLE MemoryMap;
         pData->map=p;
         pData->pHeader=(const DataHeader *)p;
         pData->mapAddr=p;
+        pData->length = fileLength;
         return true;
     }
 

+ 25 - 39
thirdparty/icu4c/common/umutex.h

@@ -37,31 +37,6 @@
 #error U_USER_ATOMICS and U_USER_MUTEX_H are not supported
 #endif
 
-// Export an explicit template instantiation of std::atomic<int32_t>. 
-// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
-// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
-//
-// Similar story for std::atomic<std::mutex *>, and the exported UMutex class.
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
-#if defined(__clang__) || defined(_MSC_VER)
-  #if defined(__clang__)
-    // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
-    #pragma clang diagnostic push
-    #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
-  #endif
-template struct U_COMMON_API std::atomic<int32_t>;
-template struct U_COMMON_API std::atomic<std::mutex *>;
-  #if defined(__clang__)
-    #pragma clang diagnostic pop
-  #endif
-#elif defined(__GNUC__)
-// For GCC this class is already exported/visible, so no need for U_COMMON_API.
-template struct std::atomic<int32_t>;
-template struct std::atomic<std::mutex *>;
-#endif
-#endif
-
-
 U_NAMESPACE_BEGIN
 
 /****************************************************************************
@@ -95,11 +70,22 @@ inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) {
  *
  *************************************************************************************************/
 
-struct U_COMMON_API UInitOnce {
-    u_atomic_int32_t   fState {0};
-    UErrorCode       fErrCode {U_ZERO_ERROR};
-    void reset() {fState = 0;}
-    UBool isReset() {return umtx_loadAcquire(fState) == 0;}
+struct U_COMMON_API_CLASS UInitOnce {
+private:
+    friend U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce&);
+    friend U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce&);
+    template <typename T> friend void umtx_initOnce(UInitOnce&, T*, void (T::*)());
+    friend void umtx_initOnce(UInitOnce&, void (*)());
+    friend void umtx_initOnce(UInitOnce&, void (*)(UErrorCode&), UErrorCode&);
+    template <typename T> friend void umtx_initOnce(UInitOnce&, void (*)(T), T);
+    template <typename T> friend void umtx_initOnce(UInitOnce&, void (*)(T, UErrorCode&), T, UErrorCode&);
+
+    u_atomic_int32_t fState{0};
+    UErrorCode fErrCode{U_ZERO_ERROR};
+
+public:
+    U_COMMON_API void reset() { fState = 0; }
+    U_COMMON_API UBool isReset() { return umtx_loadAcquire(fState) == 0; }
 // Note: isReset() is used by service registration code.
 //                 Thread safety of this usage needs review.
 };
@@ -216,24 +202,24 @@ template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UE
  *    }         // myMutex is released when lock goes out of scope.
  */
 
-class U_COMMON_API UMutex {
+class U_COMMON_API_CLASS UMutex {
 public:
-    UMUTEX_CONSTEXPR UMutex() {}
-    ~UMutex() = default;
+    U_COMMON_API UMUTEX_CONSTEXPR UMutex() {}
+    U_COMMON_API ~UMutex() = default;
 
-    UMutex(const UMutex &other) = delete;
-    UMutex &operator =(const UMutex &other) = delete;
-    void *operator new(size_t) = delete;
+    U_COMMON_API UMutex(const UMutex& other) = delete;
+    U_COMMON_API UMutex& operator=(const UMutex& other) = delete;
+    U_COMMON_API void* operator new(size_t) = delete;
 
     // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard
-    void lock() {
+    U_COMMON_API void lock() {
         std::mutex *m = fMutex.load(std::memory_order_acquire);
         if (m == nullptr) { m = getMutex(); }
         m->lock();
     }
-    void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
+    U_COMMON_API void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); }
 
-    static void cleanup();
+    U_COMMON_API static void cleanup();
 
 private:
     alignas(std::mutex) char fStorage[sizeof(std::mutex)] {};

+ 56 - 57
thirdparty/icu4c/common/unicode/brkiter.h

@@ -58,8 +58,6 @@ U_NAMESPACE_END
 
 U_NAMESPACE_BEGIN
 
-class CharString;
-
 /**
  * The BreakIterator class implements methods for finding the location
  * of boundaries in text. BreakIterator is an abstract base class.
@@ -105,13 +103,13 @@ class CharString;
  * and in the sample program icu/source/samples/break/break.cpp
  *
  */
-class U_COMMON_API BreakIterator : public UObject {
+class U_COMMON_API_CLASS BreakIterator : public UObject {
 public:
     /**
      *  destructor
      *  @stable ICU 2.0
      */
-    virtual ~BreakIterator();
+    U_COMMON_API virtual ~BreakIterator();
 
     /**
      * Return true if another object is semantically equal to this
@@ -126,7 +124,7 @@ public:
      * object, and styles are not considered.
      * @stable ICU 2.0
      */
-    virtual bool operator==(const BreakIterator&) const = 0;
+    U_COMMON_API virtual bool operator==(const BreakIterator&) const = 0;
 
     /**
      * Returns the complement of the result of operator==
@@ -134,27 +132,27 @@ public:
      * @return the complement of the result of operator==
      * @stable ICU 2.0
      */
-    bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+    U_COMMON_API bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
 
     /**
      * Return a polymorphic copy of this object.  This is an abstract
      * method which subclasses implement.
      * @stable ICU 2.0
      */
-    virtual BreakIterator* clone() const = 0;
+    U_COMMON_API virtual BreakIterator* clone() const = 0;
 
     /**
      * Return a polymorphic class ID for this object. Different subclasses
      * will return distinct unequal values.
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID() const override = 0;
+    U_COMMON_API virtual UClassID getDynamicClassID() const override = 0;
 
     /**
      * Return a CharacterIterator over the text being analyzed.
      * @stable ICU 2.0
      */
-    virtual CharacterIterator& getText() const = 0;
+    U_COMMON_API virtual CharacterIterator& getText() const = 0;
 
     /**
       *  Get a UText for the text being analyzed.
@@ -170,7 +168,7 @@ public:
       *           UText was provided, it will always be returned.
       * @stable ICU 3.4
       */
-     virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
+    U_COMMON_API virtual UText* getUText(UText* fillIn, UErrorCode& status) const = 0;
 
     /**
      * Change the text over which this operates. The text boundary is
@@ -183,7 +181,7 @@ public:
      * @param text The UnicodeString used to change the text.
      * @stable ICU 2.0
      */
-    virtual void  setText(const UnicodeString &text) = 0;
+    U_COMMON_API virtual void setText(const UnicodeString& text) = 0;
 
     /**
      * Reset the break iterator to operate over the text represented by
@@ -203,7 +201,7 @@ public:
      * @param status receives any error codes.
      * @stable ICU 3.4
      */
-    virtual void  setText(UText *text, UErrorCode &status) = 0;
+    U_COMMON_API virtual void setText(UText* text, UErrorCode& status) = 0;
 
     /**
      * Change the text over which this operates. The text boundary is
@@ -213,7 +211,7 @@ public:
      * @param it The CharacterIterator used to change the text.
      * @stable ICU 2.0
      */
-    virtual void  adoptText(CharacterIterator* it) = 0;
+    U_COMMON_API virtual void adoptText(CharacterIterator* it) = 0;
 
     enum {
         /**
@@ -229,14 +227,14 @@ public:
      * @return The offset of the beginning of the text, zero.
      * @stable ICU 2.0
      */
-    virtual int32_t first() = 0;
+    U_COMMON_API virtual int32_t first() = 0;
 
     /**
      * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
      * @return The index immediately BEYOND the last character in the text being scanned.
      * @stable ICU 2.0
      */
-    virtual int32_t last() = 0;
+    U_COMMON_API virtual int32_t last() = 0;
 
     /**
      * Set the iterator position to the boundary preceding the current boundary.
@@ -244,7 +242,7 @@ public:
      * boundaries have been returned.
      * @stable ICU 2.0
      */
-    virtual int32_t previous() = 0;
+    U_COMMON_API virtual int32_t previous() = 0;
 
     /**
      * Advance the iterator to the boundary following the current boundary.
@@ -252,14 +250,14 @@ public:
      * boundaries have been returned.
      * @stable ICU 2.0
      */
-    virtual int32_t next() = 0;
+    U_COMMON_API virtual int32_t next() = 0;
 
     /**
      * Return character index of the current iterator position within the text.
      * @return The boundary most recently returned.
      * @stable ICU 2.0
      */
-    virtual int32_t current() const = 0;
+    U_COMMON_API virtual int32_t current() const = 0;
 
     /**
      * Advance the iterator to the first boundary following the specified offset.
@@ -269,7 +267,7 @@ public:
      * @return The first boundary after the specified offset.
      * @stable ICU 2.0
      */
-    virtual int32_t following(int32_t offset) = 0;
+    U_COMMON_API virtual int32_t following(int32_t offset) = 0;
 
     /**
      * Set the iterator position to the first boundary preceding the specified offset.
@@ -279,7 +277,7 @@ public:
      * @return The first boundary before the specified offset.
      * @stable ICU 2.0
      */
-    virtual int32_t preceding(int32_t offset) = 0;
+    U_COMMON_API virtual int32_t preceding(int32_t offset) = 0;
 
     /**
      * Return true if the specified position is a boundary position.
@@ -289,7 +287,7 @@ public:
      * @return True if "offset" is a boundary position.
      * @stable ICU 2.0
      */
-    virtual UBool isBoundary(int32_t offset) = 0;
+    U_COMMON_API virtual UBool isBoundary(int32_t offset) = 0;
 
     /**
      * Set the iterator position to the nth boundary from the current boundary
@@ -300,9 +298,9 @@ public:
      * DONE if there are fewer than |n| boundaries in the specified direction.
      * @stable ICU 2.0
      */
-    virtual int32_t next(int32_t n) = 0;
+    U_COMMON_API virtual int32_t next(int32_t n) = 0;
 
-   /**
+    /**
      * For RuleBasedBreakIterators, return the status tag from the break rule
      * that determined the boundary at the current iteration position.
      * <p>
@@ -315,7 +313,7 @@ public:
      * @see UWordBreak
      * @stable ICU 52
      */
-    virtual int32_t getRuleStatus() const;
+    U_COMMON_API virtual int32_t getRuleStatus() const;
 
    /**
     * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
@@ -345,7 +343,9 @@ public:
     * @see getRuleStatus
     * @stable ICU 52
     */
-    virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+    U_COMMON_API virtual int32_t getRuleStatusVec(int32_t* fillInVec,
+                                                  int32_t capacity,
+                                                  UErrorCode& status);
 
     /**
      * Create BreakIterator for word-breaks using the given locale.
@@ -366,7 +366,7 @@ public:
      * The caller owns the returned object and is responsible for deleting it.
      * @stable ICU 2.0
      */
-    static BreakIterator* U_EXPORT2
+    U_COMMON_API static BreakIterator* U_EXPORT2
     createWordInstance(const Locale& where, UErrorCode& status);
 
     /**
@@ -390,7 +390,7 @@ public:
      * The caller owns the returned object and is responsible for deleting it.
      * @stable ICU 2.0
      */
-    static BreakIterator* U_EXPORT2
+    U_COMMON_API static BreakIterator* U_EXPORT2
     createLineInstance(const Locale& where, UErrorCode& status);
 
     /**
@@ -412,7 +412,7 @@ public:
      * The caller owns the returned object and is responsible for deleting it.
      * @stable ICU 2.0
      */
-    static BreakIterator* U_EXPORT2
+    U_COMMON_API static BreakIterator* U_EXPORT2
     createCharacterInstance(const Locale& where, UErrorCode& status);
 
     /**
@@ -433,7 +433,7 @@ public:
      * The caller owns the returned object and is responsible for deleting it.
      * @stable ICU 2.0
      */
-    static BreakIterator* U_EXPORT2
+    U_COMMON_API static BreakIterator* U_EXPORT2
     createSentenceInstance(const Locale& where, UErrorCode& status);
 
 #ifndef U_HIDE_DEPRECATED_API
@@ -459,7 +459,7 @@ public:
      * The caller owns the returned object and is responsible for deleting it.
      * @deprecated ICU 64 Use createWordInstance instead.
      */
-    static BreakIterator* U_EXPORT2
+    U_COMMON_API static BreakIterator* U_EXPORT2
     createTitleInstance(const Locale& where, UErrorCode& status);
 #endif /* U_HIDE_DEPRECATED_API */
 
@@ -472,7 +472,7 @@ public:
      * @return available locales
      * @stable ICU 2.0
      */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+    U_COMMON_API static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
 
     /**
      * Get name of the object for the desired Locale, in the desired language.
@@ -483,9 +483,9 @@ public:
      * @return user-displayable name
      * @stable ICU 2.0
      */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         const Locale& displayLocale,
-                                         UnicodeString& name);
+    U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                                                const Locale& displayLocale,
+                                                                UnicodeString& name);
 
     /**
      * Get name of the object for the desired Locale, in the language of the
@@ -495,8 +495,8 @@ public:
      * @return user-displayable name
      * @stable ICU 2.0
      */
-    static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
-                                         UnicodeString& name);
+    U_COMMON_API static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+                                                                UnicodeString& name);
 
 #ifndef U_FORCE_HIDE_DEPRECATED_API
     /**
@@ -518,9 +518,9 @@ public:
      *
      * @deprecated ICU 52. Use clone() instead.
      */
-    virtual BreakIterator *  createBufferClone(void *stackBuffer,
-                                               int32_t &BufferSize,
-                                               UErrorCode &status) = 0;
+    U_COMMON_API virtual BreakIterator* createBufferClone(void* stackBuffer,
+                                                          int32_t& BufferSize,
+                                                          UErrorCode& status) = 0;
 #endif  // U_FORCE_HIDE_DEPRECATED_API
 
 #ifndef U_HIDE_DEPRECATED_API
@@ -531,7 +531,7 @@ public:
      *   must be closed by an explicit call to the destructor (not delete).
      * @deprecated ICU 52. Always delete the BreakIterator.
      */
-    inline UBool isBufferClone();
+    U_COMMON_API inline UBool isBufferClone();
 
 #endif /* U_HIDE_DEPRECATED_API */
 
@@ -551,10 +551,10 @@ public:
      * @return a registry key that can be used to unregister this instance
      * @stable ICU 2.4
      */
-    static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
-                                        const Locale& locale,
-                                        UBreakIteratorType kind,
-                                        UErrorCode& status);
+    U_COMMON_API static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
+                                                                const Locale& locale,
+                                                                UBreakIteratorType kind,
+                                                                UErrorCode& status);
 
     /**
      * Unregister a previously-registered BreakIterator using the key returned from the
@@ -568,7 +568,7 @@ public:
      * @return true if the iterator for the key was successfully unregistered
      * @stable ICU 2.4
      */
-    static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+    U_COMMON_API static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
 
     /**
      * Return a StringEnumeration over the locales available at the time of the call,
@@ -576,7 +576,7 @@ public:
      * @return a StringEnumeration over the locales available at the time of the call
      * @stable ICU 2.4
      */
-    static StringEnumeration* U_EXPORT2 getAvailableLocales();
+    U_COMMON_API static StringEnumeration* U_EXPORT2 getAvailableLocales();
 #endif
 
     /**
@@ -584,7 +584,7 @@ public:
      * actual locale.
      * @stable ICU 2.8
      */
-    Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+    U_COMMON_API Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
 
 #ifndef U_HIDE_INTERNAL_API
     /** Get the locale for this break iterator object. You can choose between valid and actual locale.
@@ -593,7 +593,7 @@ public:
      *  @return the locale
      *  @internal
      */
-    const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+    U_COMMON_API const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
 #endif  /* U_HIDE_INTERNAL_API */
 
     /**
@@ -621,7 +621,7 @@ public:
      *
      * @stable ICU 49
      */
-    virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
+    U_COMMON_API virtual BreakIterator& refreshInputText(UText* input, UErrorCode& status) = 0;
 
  private:
     static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
@@ -635,22 +635,21 @@ protected:
     // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
     // or else the compiler will create a public ones.
     /** @internal */
-    BreakIterator();
+    U_COMMON_API BreakIterator();
     /** @internal */
-    BreakIterator (const BreakIterator &other);
+    U_COMMON_API BreakIterator(const BreakIterator& other);
 #ifndef U_HIDE_INTERNAL_API
     /** @internal */
-    BreakIterator (const Locale& valid, const Locale &actual);
+    U_COMMON_API BreakIterator(const Locale& valid, const Locale& actual);
     /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
-    BreakIterator &operator = (const BreakIterator &other);
+    U_COMMON_API BreakIterator& operator=(const BreakIterator& other);
 #endif  /* U_HIDE_INTERNAL_API */
 
 private:
 
-    /** @internal (private) */
-    CharString* actualLocale = nullptr;
-    CharString* validLocale = nullptr;
-    CharString* requestLocale = nullptr;
+    Locale actualLocale;
+    Locale validLocale;
+    Locale requestLocale;
 };
 
 #ifndef U_HIDE_DEPRECATED_API

+ 33 - 2
thirdparty/icu4c/common/unicode/bytestream.h

@@ -41,6 +41,8 @@
 
 #if U_SHOW_CPLUSPLUS_API
 
+#include <type_traits>
+
 #include "unicode/uobject.h"
 #include "unicode/std_string.h"
 
@@ -258,13 +260,36 @@ private:
   CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
 };
 
+namespace prv {
+/** @internal */
+template<typename StringClass, typename = void>
+struct value_type_or_char {
+  /** @internal */
+  using type = char;
+};
+/** @internal */
+template<typename StringClass>
+struct value_type_or_char<StringClass, std::void_t<typename StringClass::value_type>> {
+  /** @internal */
+  using type = typename StringClass::value_type;
+};
+/** @internal */
+template<typename StringClass>
+using value_type_or_char_t = typename value_type_or_char<StringClass>::type;
+}
+
 /** 
  * Implementation of ByteSink that writes to a "string".
- * The StringClass is usually instantiated with a std::string.
+ * The StringClass is usually instantiated with a std::string or a std::u8string.
+ * StringClass must have public member functions reserve(integer type), capacity(), length(), and
+ * append(value type, integer type) with the same semantics as those of std::basic_string, and must
+ * have an 8-bit value type.  If the value type is not char, it must be a public member type
+ * StringClass::value_type.
  * @stable ICU 4.2
  */
 template<typename StringClass>
 class StringByteSink : public ByteSink {
+  using Unit = typename prv::value_type_or_char_t<StringClass>;
  public:
   /**
    * Constructs a ByteSink that will append bytes to the dest string.
@@ -291,7 +316,13 @@ class StringByteSink : public ByteSink {
    * @param n the number of bytes; must be non-negative
    * @stable ICU 4.2
    */
-  virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); }
+  virtual void Append(const char* data, int32_t n) override {
+    if constexpr (std::is_same_v<Unit, char>) {
+      dest_->append(data, n);
+    } else {
+      dest_->append(reinterpret_cast<const Unit*>(data), n);
+    }
+  }
  private:
   StringClass* dest_;
 

+ 0 - 3
thirdparty/icu4c/common/unicode/bytestriebuilder.h

@@ -150,9 +150,6 @@ private:
     virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; }
     virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; }
 
-    /**
-     * @internal (private)
-     */
     class BTLinearMatchNode : public LinearMatchNode {
     public:
         BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);

+ 0 - 2
thirdparty/icu4c/common/unicode/caniter.h

@@ -156,13 +156,11 @@ private:
 
     /**
      * Copy constructor. Private for now.
-     * @internal (private)
      */
     CanonicalIterator(const CanonicalIterator& other) = delete;
 
     /**
      * Assignment operator. Private for now.
-     * @internal (private)
      */
     CanonicalIterator& operator=(const CanonicalIterator& other) = delete;
 

+ 4 - 2
thirdparty/icu4c/common/unicode/docmain.h

@@ -75,7 +75,8 @@
  *   <tr>
  *     <td>Strings and Character Iteration</td>
  *     <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
- *     <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
+ *     <td>icu::UnicodeString, utfiterator.h (ICU 78+), icu::CharacterIterator, icu::Appendable,<br>
+ *         icu::StringPiece, icu::ByteSink</td>
  *   </tr>
  *   <tr>
  *     <td>Unicode Character<br/>Properties and Names</td>
@@ -160,7 +161,8 @@
  *   <tr>
  *     <td>Number Formatting<br/>(includes currency and unit formatting)</td>
  *     <td>unumberformatter.h, unum.h, usimplenumberformatter.h</td>
- *     <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>icu::number::SimpleNumberFormatter (ICU 73+)</td>
+ *     <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>
+ *         icu::number::SimpleNumberFormatter (ICU 73+)</td>
  *   </tr>
  *   <tr>
  *     <td>Number Range Formatting<br />(includes currency and unit ranges)</td>

+ 2 - 2
thirdparty/icu4c/common/unicode/localebuilder.h

@@ -18,7 +18,7 @@
  */
 
 U_NAMESPACE_BEGIN
-class CharString;
+class FixedString;
 
 /**
  * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
@@ -297,7 +297,7 @@ private:
     char language_[9];
     char script_[5];
     char region_[4];
-    CharString *variant_;  // Pointer not object so we need not #include internal charstr.h.
+    FixedString *variant_;  // Pointer not object so we need not #include internal fixedstring.h.
     icu::Locale *extensions_;  // Pointer not object. Storage for all other fields.
 
 };

+ 0 - 2
thirdparty/icu4c/common/unicode/localpointer.h

@@ -70,9 +70,7 @@ public:
     // No heap allocation. Use only on the stack.
     static void* U_EXPORT2 operator new(size_t) = delete;
     static void* U_EXPORT2 operator new[](size_t) = delete;
-#if U_HAVE_PLACEMENT_NEW
     static void* U_EXPORT2 operator new(size_t, void*) = delete;
-#endif
 
     /**
      * Constructor takes ownership.

+ 246 - 138
thirdparty/icu4c/common/unicode/locid.h

@@ -35,6 +35,9 @@
 
 #if U_SHOW_CPLUSPLUS_API
 
+#include <cstdint>
+#include <string_view>
+
 #include "unicode/bytestream.h"
 #include "unicode/localpointer.h"
 #include "unicode/strenum.h"
@@ -192,53 +195,53 @@ class UnicodeString;
  * @stable ICU 2.0
  * @see ResourceBundle
  */
-class U_COMMON_API Locale : public UObject {
+class U_COMMON_API_CLASS Locale : public UObject {
 public:
     /** Useful constant for the Root locale. @stable ICU 4.4 */
-    static const Locale& U_EXPORT2 getRoot();
+    U_COMMON_API static const Locale& U_EXPORT2 getRoot();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getEnglish();
+    U_COMMON_API static const Locale& U_EXPORT2 getEnglish();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getFrench();
+    U_COMMON_API static const Locale& U_EXPORT2 getFrench();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getGerman();
+    U_COMMON_API static const Locale& U_EXPORT2 getGerman();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getItalian();
+    U_COMMON_API static const Locale& U_EXPORT2 getItalian();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getJapanese();
+    U_COMMON_API static const Locale& U_EXPORT2 getJapanese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getKorean();
+    U_COMMON_API static const Locale& U_EXPORT2 getKorean();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getChinese();
+    U_COMMON_API static const Locale& U_EXPORT2 getChinese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getSimplifiedChinese();
+    U_COMMON_API static const Locale& U_EXPORT2 getSimplifiedChinese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getTraditionalChinese();
+    U_COMMON_API static const Locale& U_EXPORT2 getTraditionalChinese();
 
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getFrance();
+    U_COMMON_API static const Locale& U_EXPORT2 getFrance();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getGermany();
+    U_COMMON_API static const Locale& U_EXPORT2 getGermany();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getItaly();
+    U_COMMON_API static const Locale& U_EXPORT2 getItaly();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getJapan();
+    U_COMMON_API static const Locale& U_EXPORT2 getJapan();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getKorea();
+    U_COMMON_API static const Locale& U_EXPORT2 getKorea();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getChina();
+    U_COMMON_API static const Locale& U_EXPORT2 getChina();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getPRC();
+    U_COMMON_API static const Locale& U_EXPORT2 getPRC();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getTaiwan();
+    U_COMMON_API static const Locale& U_EXPORT2 getTaiwan();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getUK();
+    U_COMMON_API static const Locale& U_EXPORT2 getUK();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getUS();
+    U_COMMON_API static const Locale& U_EXPORT2 getUS();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getCanada();
+    U_COMMON_API static const Locale& U_EXPORT2 getCanada();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale& U_EXPORT2 getCanadaFrench();
+    U_COMMON_API static const Locale& U_EXPORT2 getCanadaFrench();
 
     /**
      * Construct a default locale object, a Locale for the default locale ID.
@@ -247,7 +250,7 @@ public:
      * @see uloc_getDefault
      * @stable ICU 2.0
      */
-    Locale();
+    U_COMMON_API Locale();
 
     /**
      * Construct a locale from language, country, variant.
@@ -273,10 +276,10 @@ public:
      * @see uloc_getDefault
      * @stable ICU 2.0
      */
-    Locale(const char* language,
-           const char* country = nullptr,
-           const char* variant = nullptr,
-           const char* keywordsAndValues = nullptr);
+    U_COMMON_API Locale(const char* language,
+                        const char* country = nullptr,
+                        const char* variant = nullptr,
+                        const char* keywordsAndValues = nullptr);
 
     /**
      * Initializes a Locale object from another Locale object.
@@ -284,7 +287,7 @@ public:
      * @param other The Locale object being copied in.
      * @stable ICU 2.0
      */
-    Locale(const    Locale& other);
+    U_COMMON_API Locale(const Locale& other);
 
     /**
      * Move constructor; might leave source in bogus state.
@@ -293,13 +296,13 @@ public:
      * @param other The Locale object being moved in.
      * @stable ICU 63
      */
-    Locale(Locale&& other) noexcept;
+    U_COMMON_API Locale(Locale&& other) noexcept;
 
     /**
      * Destructor
      * @stable ICU 2.0
      */
-    virtual ~Locale() ;
+    U_COMMON_API virtual ~Locale();
 
     /**
      * Replaces the entire contents of *this with the specified value.
@@ -308,7 +311,7 @@ public:
      * @return      *this
      * @stable ICU 2.0
      */
-    Locale& operator=(const Locale& other);
+    U_COMMON_API Locale& operator=(const Locale& other);
 
     /**
      * Move assignment operator; might leave source in bogus state.
@@ -319,7 +322,7 @@ public:
      * @return      *this
      * @stable ICU 63
      */
-    Locale& operator=(Locale&& other) noexcept;
+    U_COMMON_API Locale& operator=(Locale&& other) noexcept;
 
     /**
      * Checks if two locale keys are the same.
@@ -328,7 +331,7 @@ public:
      * @return      true if the two locale keys are the same, false otherwise.
      * @stable ICU 2.0
      */
-    bool    operator==(const    Locale&     other) const;
+    U_COMMON_API bool operator==(const Locale& other) const;
 
     /**
      * Checks if two locale keys are not the same.
@@ -338,7 +341,7 @@ public:
      *              otherwise.
      * @stable ICU 2.0
      */
-    inline bool    operator!=(const    Locale&     other) const;
+    U_COMMON_API inline bool operator!=(const Locale& other) const;
 
     /**
      * Clone this object.
@@ -351,7 +354,7 @@ public:
      * @see getDynamicClassID
      * @stable ICU 2.8
      */
-    Locale *clone() const;
+    U_COMMON_API Locale* clone() const;
 
 #ifndef U_HIDE_SYSTEM_API
     /**
@@ -369,7 +372,7 @@ public:
      * @system
      * @stable ICU 2.0
      */
-    static const Locale& U_EXPORT2 getDefault();
+    U_COMMON_API static const Locale& U_EXPORT2 getDefault();
 
     /**
      * Sets the default. Normally set once at the beginning of a process,
@@ -383,8 +386,7 @@ public:
      * @system
      * @stable ICU 2.0
      */
-    static void U_EXPORT2 setDefault(const Locale& newLocale,
-                                     UErrorCode&   success);
+    U_COMMON_API static void U_EXPORT2 setDefault(const Locale& newLocale, UErrorCode& success);
 #endif  /* U_HIDE_SYSTEM_API */
 
     /**
@@ -408,7 +410,7 @@ public:
      * @return        the Locale for the specified BCP47 language tag.
      * @stable ICU 63
      */
-    static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
+    U_COMMON_API static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
 
     /**
      * Returns a well-formed language tag for this Locale.
@@ -423,7 +425,7 @@ public:
      * @param status  error information if creating the language tag failed.
      * @stable ICU 63
      */
-    void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
+    U_COMMON_API void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
 
     /**
      * Returns a well-formed language tag for this Locale.
@@ -447,11 +449,11 @@ public:
      * @stable ICU 2.0
      * @see uloc_getName
      */
-    static Locale U_EXPORT2 createFromName(const char *name);
+    U_COMMON_API static Locale U_EXPORT2 createFromName(const char* name);
 
 #ifndef U_HIDE_INTERNAL_API
     /** @internal */
-    static Locale U_EXPORT2 createFromName(StringPiece name);
+    U_COMMON_API static Locale U_EXPORT2 createFromName(StringPiece name);
 #endif  /* U_HIDE_INTERNAL_API */
 
     /**
@@ -462,14 +464,14 @@ public:
      * @stable ICU 3.0
      * @see uloc_canonicalize
      */
-    static Locale U_EXPORT2 createCanonical(const char* name);
+    U_COMMON_API static Locale U_EXPORT2 createCanonical(const char* name);
 
     /**
      * Returns the locale's ISO-639 language code.
      * @return      An alias to the code
      * @stable ICU 2.0
      */
-    inline const char *  getLanguage( ) const;
+    U_COMMON_API const char* getLanguage() const;
 
     /**
      * Returns the locale's ISO-15924 abbreviation script code.
@@ -478,21 +480,21 @@ public:
      * @see uscript_getCode
      * @stable ICU 2.8
      */
-    inline const char *  getScript( ) const;
+    U_COMMON_API const char* getScript() const;
 
     /**
      * Returns the locale's ISO-3166 country code.
      * @return      An alias to the code
      * @stable ICU 2.0
      */
-    inline const char *  getCountry( ) const;
+    U_COMMON_API const char* getCountry() const;
 
     /**
      * Returns the locale's variant code.
      * @return      An alias to the code
      * @stable ICU 2.0
      */
-    inline const char *  getVariant( ) const;
+    U_COMMON_API const char* getVariant() const;
 
     /**
      * Returns the programmatic name of the entire locale, with the language,
@@ -502,7 +504,7 @@ public:
      * @return      A pointer to "name".
      * @stable ICU 2.0
      */
-    inline const char * getName() const;
+    U_COMMON_API const char* getName() const;
 
     /**
      * Returns the programmatic name of the entire locale as getName() would return,
@@ -511,7 +513,7 @@ public:
      * @see getName
      * @stable ICU 2.8
      */
-    const char * getBaseName() const;
+    U_COMMON_API const char* getBaseName() const;
 
     /**
      * Add the likely subtags for this Locale, per the algorithm described
@@ -542,7 +544,7 @@ public:
      *                U_ILLEGAL_ARGUMENT_ERROR.
      * @stable ICU 63
      */
-    void addLikelySubtags(UErrorCode& status);
+    U_COMMON_API void addLikelySubtags(UErrorCode& status);
 
     /**
      * Minimize the subtags for this Locale, per the algorithm described
@@ -573,7 +575,7 @@ public:
      *                U_ILLEGAL_ARGUMENT_ERROR.
      * @stable ICU 63
      */
-    void minimizeSubtags(UErrorCode& status);
+    U_COMMON_API void minimizeSubtags(UErrorCode& status);
 
     /**
      * Canonicalize the locale ID of this object according to CLDR.
@@ -581,7 +583,7 @@ public:
      * @stable ICU 67
      * @see createCanonical
      */
-    void canonicalize(UErrorCode& status);
+    U_COMMON_API void canonicalize(UErrorCode& status);
 
     /**
      * Gets the list of keywords for the specified locale.
@@ -592,7 +594,7 @@ public:
      * @see getKeywords
      * @stable ICU 2.8
      */
-    StringEnumeration * createKeywords(UErrorCode &status) const;
+    U_COMMON_API StringEnumeration* createKeywords(UErrorCode& status) const;
 
     /**
      * Gets the list of Unicode keywords for the specified locale.
@@ -603,7 +605,7 @@ public:
      * @see getUnicodeKeywords
      * @stable ICU 63
      */
-    StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
+    U_COMMON_API StringEnumeration* createUnicodeKeywords(UErrorCode& status) const;
 
     /**
      * Gets the set of keywords for this Locale.
@@ -649,7 +651,10 @@ public:
      *
      * @stable ICU 2.8
      */
-    int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
+    U_COMMON_API int32_t getKeywordValue(const char* keywordName,
+                                         char* buffer,
+                                         int32_t bufferCapacity,
+                                         UErrorCode& status) const;
 
     /**
      * Gets the value for a keyword.
@@ -664,7 +669,7 @@ public:
      * @param status       error information if getting the value failed.
      * @stable ICU 63
      */
-    void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+    U_COMMON_API void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
 
     /**
      * Gets the value for a keyword.
@@ -695,7 +700,9 @@ public:
      * @param status       error information if getting the value failed.
      * @stable ICU 63
      */
-    void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+    U_COMMON_API void getUnicodeKeywordValue(StringPiece keywordName,
+                                             ByteSink& sink,
+                                             UErrorCode& status) const;
 
     /**
      * Gets the Unicode value for a Unicode keyword.
@@ -732,7 +739,9 @@ public:
      *
      * @stable ICU 49
      */
-    void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) {
+    U_COMMON_API void setKeywordValue(const char* keywordName,
+                                      const char* keywordValue,
+                                      UErrorCode& status) {
         setKeywordValue(StringPiece{keywordName}, StringPiece{keywordValue}, status);
     }
 
@@ -754,7 +763,9 @@ public:
      * @param status Returns any error information while performing this operation.
      * @stable ICU 63
      */
-    void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+    U_COMMON_API void setKeywordValue(StringPiece keywordName,
+                                      StringPiece keywordValue,
+                                      UErrorCode& status);
 
     /**
      * Sets or removes the Unicode value for a Unicode keyword.
@@ -774,7 +785,9 @@ public:
      * @param status Returns any error information while performing this operation.
      * @stable ICU 63
      */
-    void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+    U_COMMON_API void setUnicodeKeywordValue(StringPiece keywordName,
+                                             StringPiece keywordValue,
+                                             UErrorCode& status);
 
     /**
      * returns the locale's three-letter language code, as specified
@@ -782,14 +795,14 @@ public:
      * @return      An alias to the code, or an empty string
      * @stable ICU 2.0
      */
-    const char * getISO3Language() const;
+    U_COMMON_API const char* getISO3Language() const;
 
     /**
      * Fills in "name" with the locale's three-letter ISO-3166 country code.
      * @return      An alias to the code, or an empty string
      * @stable ICU 2.0
      */
-    const char * getISO3Country() const;
+    U_COMMON_API const char* getISO3Country() const;
 
     /**
      * Returns the Windows LCID value corresponding to this locale.
@@ -798,7 +811,7 @@ public:
      * there is no Windows LCID value that corresponds to this locale, returns 0.
      * @stable ICU 2.0
      */
-    uint32_t getLCID() const;
+    U_COMMON_API uint32_t getLCID() const;
 
     /**
      * Returns whether this locale's script is written right-to-left.
@@ -813,7 +826,7 @@ public:
      * @return true if the locale's script is written right-to-left
      * @stable ICU 54
      */
-    UBool isRightToLeft() const;
+    U_COMMON_API UBool isRightToLeft() const;
 
     /**
      * Fills in "dispLang" with the name of this locale's language in a format suitable for
@@ -824,7 +837,7 @@ public:
      * @return          A reference to "dispLang".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayLanguage(UnicodeString&   dispLang) const;
+    U_COMMON_API UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
 
     /**
      * Fills in "dispLang" with the name of this locale's language in a format suitable for
@@ -839,8 +852,8 @@ public:
      * @return          A reference to "dispLang".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayLanguage( const   Locale&         displayLocale,
-                                                UnicodeString&  dispLang) const;
+    U_COMMON_API UnicodeString& getDisplayLanguage(const Locale& displayLocale,
+                                                   UnicodeString& dispLang) const;
 
     /**
      * Fills in "dispScript" with the name of this locale's script in a format suitable
@@ -851,7 +864,7 @@ public:
      * @return              A reference to "dispScript".
      * @stable ICU 2.8
      */
-    UnicodeString&  getDisplayScript(          UnicodeString& dispScript) const;
+    U_COMMON_API UnicodeString& getDisplayScript(UnicodeString& dispScript) const;
 
     /**
      * Fills in "dispScript" with the name of this locale's country in a format suitable
@@ -867,8 +880,8 @@ public:
      * @return              A reference to "dispScript".
      * @stable ICU 2.8
      */
-    UnicodeString&  getDisplayScript(  const   Locale&         displayLocale,
-                                               UnicodeString&  dispScript) const;
+    U_COMMON_API UnicodeString& getDisplayScript(const Locale& displayLocale,
+                                                 UnicodeString& dispScript) const;
 
     /**
      * Fills in "dispCountry" with the name of this locale's country in a format suitable
@@ -879,7 +892,7 @@ public:
      * @return              A reference to "dispCountry".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayCountry(          UnicodeString& dispCountry) const;
+    U_COMMON_API UnicodeString& getDisplayCountry(UnicodeString& dispCountry) const;
 
     /**
      * Fills in "dispCountry" with the name of this locale's country in a format suitable
@@ -895,8 +908,8 @@ public:
      * @return              A reference to "dispCountry".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayCountry(  const   Locale&         displayLocale,
-                                                UnicodeString&  dispCountry) const;
+    U_COMMON_API UnicodeString& getDisplayCountry(const Locale& displayLocale,
+                                                  UnicodeString& dispCountry) const;
 
     /**
      * Fills in "dispVar" with the name of this locale's variant code in a format suitable
@@ -905,7 +918,7 @@ public:
      * @return          A reference to "dispVar".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayVariant(      UnicodeString& dispVar) const;
+    U_COMMON_API UnicodeString& getDisplayVariant(UnicodeString& dispVar) const;
 
     /**
      * Fills in "dispVar" with the name of this locale's variant code in a format
@@ -915,8 +928,8 @@ public:
      * @return          A reference to "dispVar".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayVariant(  const   Locale&         displayLocale,
-                                                UnicodeString&  dispVar) const;
+    U_COMMON_API UnicodeString& getDisplayVariant(const Locale& displayLocale,
+                                                  UnicodeString& dispVar) const;
 
     /**
      * Fills in "name" with the name of this locale in a format suitable for user display
@@ -929,7 +942,7 @@ public:
      * @return      A reference to "name".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayName(         UnicodeString&  name) const;
+    U_COMMON_API UnicodeString& getDisplayName(UnicodeString& name) const;
 
     /**
      * Fills in "name" with the name of this locale in a format suitable for user display
@@ -943,14 +956,13 @@ public:
      * @return          A reference to "name".
      * @stable ICU 2.0
      */
-    UnicodeString&  getDisplayName( const   Locale&         displayLocale,
-                                            UnicodeString&  name) const;
+    U_COMMON_API UnicodeString& getDisplayName(const Locale& displayLocale, UnicodeString& name) const;
 
     /**
      * Generates a hash code for the locale.
      * @stable ICU 2.0
      */
-    int32_t hashCode() const;
+    U_COMMON_API int32_t hashCode() const;
 
     /**
      * Sets the locale to bogus
@@ -960,14 +972,14 @@ public:
      * instantiated from a locale and from a rule set).
      * @stable ICU 2.1
      */
-    void setToBogus();
+    U_COMMON_API void setToBogus();
 
     /**
      * Gets the bogus state. Locale object can be bogus if it doesn't exist
      * @return false if it is a real locale, true if it is a bogus locale
      * @stable ICU 2.1
      */
-    inline UBool isBogus() const;
+    U_COMMON_API inline UBool isBogus() const;
 
     /**
      * Returns a list of all installed locales.
@@ -977,7 +989,7 @@ public:
      *              get ownership of this list, and must NOT delete it.
      * @stable ICU 2.0
      */
-    static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+    U_COMMON_API static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
 
     /**
      * Gets a list of all available 2-letter country codes defined in ISO 3166.  This is a
@@ -987,7 +999,7 @@ public:
      * @return a list of all available country codes
      * @stable ICU 2.0
      */
-    static const char* const* U_EXPORT2 getISOCountries();
+    U_COMMON_API static const char* const* U_EXPORT2 getISOCountries();
 
     /**
      * Returns a list of all unique language codes defined in ISO 639.
@@ -1000,21 +1012,21 @@ public:
      * @return a list of all available language codes
      * @stable ICU 2.0
      */
-    static const char* const* U_EXPORT2 getISOLanguages();
+    U_COMMON_API static const char* const* U_EXPORT2 getISOLanguages();
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for this class.
      *
      * @stable ICU 2.2
      */
-    static UClassID U_EXPORT2 getStaticClassID();
+    U_COMMON_API static UClassID U_EXPORT2 getStaticClassID();
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for the actual class.
      *
      * @stable ICU 2.2
      */
-    virtual UClassID getDynamicClassID() const override;
+    U_COMMON_API virtual UClassID getDynamicClassID() const override;
 
     /**
      * A Locale iterator interface similar to a Java Iterator<Locale>.
@@ -1118,7 +1130,7 @@ protected: /* only protected for testing purposes. DO NOT USE. */
      * Set this from a single POSIX style locale string.
      * @internal
      */
-    void setFromPOSIXID(const char *posixID);
+    U_COMMON_API void setFromPOSIXID(const char* posixID);
     /**
      * Minimize the subtags for this Locale, per the algorithm described
      * @param favorScript favor to keep script if true, to keep region if false.
@@ -1127,7 +1139,7 @@ protected: /* only protected for testing purposes. DO NOT USE. */
      *                U_ILLEGAL_ARGUMENT_ERROR.
      * @internal
      */
-    void minimizeSubtags(bool favorScript, UErrorCode& status);
+    U_COMMON_API void minimizeSubtags(bool favorScript, UErrorCode& status);
 #endif  /* U_HIDE_INTERNAL_API */
 
 private:
@@ -1139,7 +1151,6 @@ private:
      * @param canonicalize whether to call uloc_canonicalize on cLocaleID
      */
     Locale& init(const char* localeID, UBool canonicalize);
-    /** @internal */
     Locale& init(StringPiece localeID, UBool canonicalize);
 
     /*
@@ -1147,8 +1158,10 @@ private:
      *   NO side effects.   (Default constructor tries to get
      *   the default locale.)
      */
-    enum ELocaleType {
-        eBOGUS
+    enum ELocaleType : uint8_t {
+        eBOGUS,
+        eNEST,
+        eHEAP,
     };
     Locale(ELocaleType);
 
@@ -1157,33 +1170,158 @@ private:
      */
     static Locale* getLocaleCache();
 
-    char language[ULOC_LANG_CAPACITY];
-    char script[ULOC_SCRIPT_CAPACITY];
-    char country[ULOC_COUNTRY_CAPACITY];
-    int32_t variantBegin;
-    char* fullName;
-    char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
-    // name without keywords
-    char* baseName;
-    void initBaseName(UErrorCode& status);
+    union Payload;
+    struct Nest;
+    struct Heap;
+
+    /**
+     * Locale data that can be nested directly within the union Payload object.
+     */
+    struct Nest {
+        static constexpr size_t SIZE = 32;
+
+        ELocaleType type = eNEST;
+        char language[4];
+        char script[5];
+        char region[4];
+        uint8_t variantBegin;
+        char baseName[SIZE -
+                      sizeof type -
+                      sizeof language -
+                      sizeof script -
+                      sizeof region -
+                      sizeof variantBegin];
+
+        const char* getLanguage() const { return language; }
+        const char* getScript() const { return script; }
+        const char* getRegion() const { return region; }
+        const char* getVariant() const { return variantBegin == 0 ? "" : getBaseName() + variantBegin; }
+        const char* getBaseName() const { return baseName; }
+
+        // Doesn't inherit from UMemory, shouldn't be heap allocated.
+        static void* U_EXPORT2 operator new(size_t) noexcept = delete;
+        static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
+
+        Nest() : language{'\0'}, script{'\0'}, region{'\0'}, variantBegin{0}, baseName{'\0'} {}
+
+        void init(std::string_view language,
+                  std::string_view script,
+                  std::string_view region,
+                  uint8_t variantBegin);
+
+        static bool fits(int32_t length,
+                         std::string_view language,
+                         std::string_view script,
+                         std::string_view region) {
+            return length < static_cast<int32_t>(sizeof Nest::baseName) &&
+                   language.size() < sizeof Nest::language &&
+                   script.size() < sizeof Nest::script &&
+                   region.size() < sizeof Nest::region;
+        }
+
+      private:
+        friend union Payload;
+        Nest(Heap&& heap, uint8_t variantBegin);
+    };
+    static_assert(sizeof(Nest) == Nest::SIZE);
+
+    /**
+     * Locale data that needs to be heap allocated in the union Payload object.
+     */
+    struct Heap {
+        struct Alloc;
+
+        ELocaleType type;
+        char language[ULOC_LANG_CAPACITY];
+        char script[ULOC_SCRIPT_CAPACITY];
+        char region[ULOC_COUNTRY_CAPACITY];
+        Alloc* ptr;
+
+        const char* getLanguage() const { return language; }
+        const char* getScript() const { return script; }
+        const char* getRegion() const { return region; }
+        const char* getVariant() const;
+        const char* getFullName() const;
+        const char* getBaseName() const;
+
+        // Doesn't inherit from UMemory, shouldn't be heap allocated.
+        static void* U_EXPORT2 operator new(size_t) noexcept = delete;
+        static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
+
+        Heap(std::string_view language,
+             std::string_view script,
+             std::string_view region,
+             int32_t variantBegin);
+        ~Heap();
+
+        Heap& operator=(const Heap& other);
+        Heap& operator=(Heap&& other) noexcept;
+    };
+    static_assert(sizeof(Heap) <= sizeof(Nest));
+
+    /**
+     * This is kind of std::variant but customized to not waste any space on the
+     * discriminator or on any padding, and to copy any heap allocated object.
+     */
+    union Payload {
+      private:
+        Nest nest;
+        Heap heap;
+        ELocaleType type;
+
+        void copy(const Payload& other);
+        void move(Payload&& other) noexcept;
+
+      public:
+        // Doesn't inherit from UMemory, shouldn't be heap allocated.
+        static void* U_EXPORT2 operator new(size_t) noexcept = delete;
+        static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
+
+        Payload() : type{eBOGUS} {}
+        ~Payload();
+
+        Payload(const Payload& other);
+        Payload(Payload&& other) noexcept;
+
+        Payload& operator=(const Payload& other);
+        Payload& operator=(Payload&& other) noexcept;
 
-    UBool fIsBogus;
+        void setToBogus();
+        bool isBogus() const { return type == eBOGUS; }
+
+        template <typename T, typename... Args> T& emplace(Args&&... args);
+
+        template <typename T> T* get();
+
+        template <typename BogusFn, typename NestFn, typename HeapFn, typename... Args>
+        auto visit(BogusFn bogusFn, NestFn nestFn, HeapFn heapFn, Args... args) const;
+    } payload;
+
+    /**
+     * Call a field getter function on either Nest or Heap in payload.
+     * (This is kind of std::visit but simpler and without exceptions.)
+     *
+     * @tparam NEST Pointer to the Nest getter function.
+     * @tparam HEAP Pointer to the Heap getter function.
+     * @return the result from the getter, or the empty string if isBogus().
+     */
+    template <const char* (Nest::*const NEST)() const,
+              const char* (Heap::*const HEAP)() const>
+    const char* getField() const;
 
     static const Locale &getLocale(int locid);
 
     /**
      * A friend to allow the default locale to be set by either the C or C++ API.
-     * @internal (private)
      */
     friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
 
     /**
-     * @internal (private)
      */
     friend void U_CALLCONV locale_available_init();
 };
 
-inline bool
+U_COMMON_API inline bool
 Locale::operator!=(const    Locale&     other) const
 {
     return !operator==(other);
@@ -1199,36 +1337,6 @@ Locale::toLanguageTag(UErrorCode& status) const
     return result;
 }
 
-inline const char *
-Locale::getCountry() const
-{
-    return country;
-}
-
-inline const char *
-Locale::getLanguage() const
-{
-    return language;
-}
-
-inline const char *
-Locale::getScript() const
-{
-    return script;
-}
-
-inline const char *
-Locale::getVariant() const
-{
-    return fIsBogus ? "" : &baseName[variantBegin];
-}
-
-inline const char *
-Locale::getName() const
-{
-    return fullName;
-}
-
 template<typename StringClass, typename OutputIterator> inline void
 Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
 {
@@ -1285,9 +1393,9 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) cons
     return result;
 }
 
-inline UBool
+U_COMMON_API inline UBool
 Locale::isBogus() const {
-    return fIsBogus;
+    return payload.isBogus();
 }
 
 U_NAMESPACE_END

+ 8 - 17
thirdparty/icu4c/common/unicode/platform.h

@@ -368,19 +368,6 @@
 #   define U_IS_BIG_ENDIAN 0
 #endif
 
-/**
- * \def U_HAVE_PLACEMENT_NEW
- * Determines whether to override placement new and delete for STL.
- * @stable ICU 2.6
- */
-#ifdef U_HAVE_PLACEMENT_NEW
-    /* Use the predefined value. */
-#elif defined(__BORLANDC__)
-#   define U_HAVE_PLACEMENT_NEW 0
-#else
-#   define U_HAVE_PLACEMENT_NEW 1
-#endif
-
 /**
  * \def U_HAVE_DEBUG_LOCATION_NEW 
  * Define this to define the MFC debug version of the operator new.
@@ -479,6 +466,12 @@
     /* Otherwise use the predefined value. */
 #elif !defined(__cplusplus)
 #   define U_CPLUSPLUS_VERSION 0
+// The value of _MSVC_LANG for C++23 preview is undocumented, except that it is larger than 202002.
+// As of this writing, it is 202004.
+#elif __cplusplus >= 202302L || (defined(_MSVC_LANG) && _MSVC_LANG > 202002L)
+#   define U_CPLUSPLUS_VERSION 23
+#elif __cplusplus >= 202002L || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
+#   define U_CPLUSPLUS_VERSION 20
 #elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
 #   define U_CPLUSPLUS_VERSION 17
 #elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
@@ -493,12 +486,10 @@
 /**
  * \def U_FALLTHROUGH
  * Annotate intentional fall-through between switch labels.
- * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+ * https://clang.llvm.org/docs/AttributeReference.html#fallthrough
  * @internal
  */
-#ifndef __cplusplus
-    // Not for C.
-#elif defined(U_FALLTHROUGH)
+#if defined(U_FALLTHROUGH)
     // Use the predefined value.
 #elif defined(__clang__)
     // Test for compiler vs. feature separately.

+ 0 - 13
thirdparty/icu4c/common/unicode/rbbi.h

@@ -122,7 +122,6 @@ class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
 private:
     /**
      * The UText through which this BreakIterator accesses the text
-     * @internal (private)
      */
     UText  fText = UTEXT_INITIALIZER;
 
@@ -172,7 +171,6 @@ private:
      * If present, UStack of LanguageBreakEngine objects that might handle
      * dictionary characters. Searched from top to bottom to find an object to
      * handle a given character.
-     * @internal (private)
      */
     UStack              *fLanguageBreakEngines = nullptr;
 
@@ -181,14 +179,12 @@ private:
      * If present, the special LanguageBreakEngine used for handling
      * characters that are in the dictionary set, but not handled by any
      * LanguageBreakEngine.
-     * @internal (private)
      */
     UnhandledEngine     *fUnhandledBreakEngine = nullptr;
 
     /**
      * Counter for the number of characters encountered with the "dictionary"
      *   flag set.
-     * @internal (private)
      */
     uint32_t            fDictionaryCharCount = 0;
 
@@ -233,7 +229,6 @@ private:
      *
      *             The break iterator adopts the memory, and will
      *             free it when done.
-     * @internal (private)
      */
     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
 
@@ -248,20 +243,16 @@ private:
      * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
      * @see udata_open
      * @see #getBinaryRules
-     * @internal (private)
      */
     RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
 
-    /** @internal */
     friend class RBBIRuleBuilder;
-    /** @internal */
     friend class BreakIterator;
 
     /**
      * Default constructor with an error code parameter.
      * Aside from error handling, otherwise identical to the default constructor.
      * Internally, handles common initialization for other constructors.
-     * @internal (private)
      */
     RuleBasedBreakIterator(UErrorCode *status);
 
@@ -732,7 +723,6 @@ private:
      * will operate correctly. A Safe Position is not necessarily a boundary itself.
      *
      * @param fromPosition the position in the input text to begin the iteration.
-     * @internal (private)
      */
     int32_t handleSafePrevious(int32_t fromPosition);
 
@@ -745,8 +735,6 @@ private:
      *    fDictionaryCharCount the number of dictionary characters encountered.
      *                         If > 0, the segment will be further subdivided
      *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
-     *
-     * @internal (private)
      */
     int32_t handleNext();
 
@@ -778,7 +766,6 @@ private:
      * given character c.
      * @param c         A character in the dictionary set
      * @param locale    The locale.
-     * @internal (private)
      */
     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale);
 

+ 1 - 1
thirdparty/icu4c/common/unicode/stringpiece.h

@@ -333,7 +333,7 @@ class U_COMMON_API StringPiece : public UMemory {
  * @return true if the string data is equal
  * @stable ICU 4.8
  */
-U_EXPORT UBool U_EXPORT2 
+U_COMMON_API UBool U_EXPORT2 
 operator==(const StringPiece& x, const StringPiece& y);
 
 /**

+ 34 - 17
thirdparty/icu4c/common/unicode/uchar.h

@@ -61,7 +61,7 @@ U_CDECL_BEGIN
  * @see u_getUnicodeVersion
  * @stable ICU 2.0
  */
-#define U_UNICODE_VERSION "16.0"
+#define U_UNICODE_VERSION "17.0"
 
 /**
  * \file
@@ -552,14 +552,12 @@ typedef enum UProperty {
      * @stable ICU 74
      */
     UCHAR_ID_COMPAT_MATH_CONTINUE=74,
-#ifndef U_HIDE_DRAFT_API
     /**
      * Binary property Modifier_Combining_Mark.
      * Used by the AMTRA algorithm in UAX #53.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     UCHAR_MODIFIER_COMBINING_MARK=75,
-#endif  // U_HIDE_DRAFT_API
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the last constant for binary Unicode properties.
@@ -682,14 +680,12 @@ typedef enum UProperty {
      * @stable ICU 75
      */
     UCHAR_IDENTIFIER_STATUS=0x1019,
-#ifndef U_HIDE_DRAFT_API
     /**
      * Enumerated property Indic_Conjunct_Break.
      * Used in the grapheme cluster break algorithm in UAX #29.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     UCHAR_INDIC_CONJUNCT_BREAK=0x101A,
-#endif  // U_HIDE_DRAFT_API
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the last constant for enumerated/integer Unicode properties.
@@ -1985,6 +1981,25 @@ enum UBlockCode {
     /** @stable ICU 76 */
     UBLOCK_TULU_TIGALARI = 338, /*[11380]*/
 
+    // New blocks in Unicode 17.0.0
+
+    /** @stable ICU 78 */
+    UBLOCK_BERIA_ERFE = 339, /*[16EA0]*/
+    /** @stable ICU 78 */
+    UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_J = 340, /*[323B0]*/
+    /** @stable ICU 78 */
+    UBLOCK_MISCELLANEOUS_SYMBOLS_SUPPLEMENT = 341, /*[1CEC0]*/
+    /** @stable ICU 78 */
+    UBLOCK_SHARADA_SUPPLEMENT = 342, /*[11B60]*/
+    /** @stable ICU 78 */
+    UBLOCK_SIDETIC = 343, /*[10940]*/
+    /** @stable ICU 78 */
+    UBLOCK_TAI_YO = 344, /*[1E6C0]*/
+    /** @stable ICU 78 */
+    UBLOCK_TANGUT_COMPONENTS_SUPPLEMENT = 345, /*[18D80]*/
+    /** @stable ICU 78 */
+    UBLOCK_TOLONG_SIKI = 346, /*[11DB0]*/
+
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the highest normal UBlockCode value.
@@ -1992,7 +2007,7 @@ enum UBlockCode {
      *
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    UBLOCK_COUNT = 339,
+    UBLOCK_COUNT = 347,
 #endif  // U_HIDE_DEPRECATED_API
 
     /** @stable ICU 2.0 */
@@ -2289,6 +2304,8 @@ typedef enum UJoiningGroup {
 
     U_JG_KASHMIRI_YEH,  /**< @stable ICU 76 */
 
+    U_JG_THIN_NOON,  /**< @stable ICU 78 */
+
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the highest normal UJoiningGroup value.
@@ -2536,6 +2553,8 @@ typedef enum ULineBreak {
     U_LB_VIRAMA_FINAL = 46,      /*[VF]*/
     /** @stable ICU 74 */
     U_LB_VIRAMA = 47,            /*[VI]*/
+    /** @stable ICU 78 */
+    U_LB_UNAMBIGUOUS_HYPHEN = 48,/*[HH]*/
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the highest normal ULineBreak value.
@@ -2543,7 +2562,7 @@ typedef enum ULineBreak {
      *
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    U_LB_COUNT = 48
+    U_LB_COUNT = 49
 #endif  // U_HIDE_DEPRECATED_API
 } ULineBreak;
 
@@ -2741,12 +2760,11 @@ typedef enum UIndicSyllabicCategory {
     U_INSC_REORDERING_KILLER,
 } UIndicSyllabicCategory;
 
-#ifndef U_HIDE_DRAFT_API
 /**
  * Indic Conjunct Break constants.
  *
  * @see UCHAR_INDIC_CONJUNCT_BREAK
- * @draft ICU 76
+ * @stable ICU 76
  */
 typedef enum UIndicConjunctBreak {
     /*
@@ -2755,16 +2773,15 @@ typedef enum UIndicConjunctBreak {
     *     U_INCB_<Unicode Indic_Conjunct_Break value name>
     */
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     U_INCB_NONE,
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     U_INCB_CONSONANT,
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     U_INCB_EXTEND,
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     U_INCB_LINKER,
 } UIndicConjunctBreak;
-#endif  // U_HIDE_DRAFT_API
 
 /**
  * Vertical Orientation constants.
@@ -2918,7 +2935,7 @@ u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);
  * @return the property as a set
  * @see UProperty
  * @see u_hasBinaryProperty
- * @see Unicode::fromUSet
+ * @see UnicodeSet::fromUSet
  * @stable ICU 63
  */
 U_CAPI const USet * U_EXPORT2

+ 22 - 0
thirdparty/icu4c/common/unicode/umachine.h

@@ -119,6 +119,28 @@
 /** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API  */
 #define U_INTERNAL U_CAPI
 
+/**
+ * \def U_FORCE_INLINE
+ * Forces function inlining on compilers that are known to support it.
+ * Place this before specifiers like "static" and "explicit".
+ *
+ * This does not replace the "inline" keyword which suspends the One Definition Rule (ODR)
+ * in addition to optionally serving as an inlining hint to the compiler.
+ *
+ * @internal
+ */
+#ifdef U_FORCE_INLINE
+    // already defined
+#elif defined(U_IN_DOXYGEN)
+#  define U_FORCE_INLINE inline
+#elif (defined(__clang__) && __clang__) || U_GCC_MAJOR_MINOR != 0
+#  define U_FORCE_INLINE [[gnu::always_inline]]
+#elif defined(U_REAL_MSVC)
+#  define U_FORCE_INLINE __forceinline
+#else
+#  define U_FORCE_INLINE inline
+#endif
+
 // Before ICU 65, function-like, multi-statement ICU macros were just defined as
 // series of statements wrapped in { } blocks and the caller could choose to
 // either treat them as if they were actual functions and end the invocation

+ 25 - 27
thirdparty/icu4c/common/unicode/uniset.h

@@ -495,7 +495,7 @@ public:
      * @return <tt>true</tt> if the specified set is equal to this set.
      * @stable ICU 2.0
      */
-    virtual bool operator==(const UnicodeSet& o) const;
+    bool operator==(const UnicodeSet& o) const;
 
     /**
      * Compares the specified object with this set for equality.  Returns
@@ -522,7 +522,7 @@ public:
      * @see Object#hashCode()
      * @stable ICU 2.0
      */
-    virtual int32_t hashCode() const;
+    int32_t hashCode() const;
 
     /**
      * Get a UnicodeSet pointer from a USet
@@ -792,7 +792,7 @@ public:
      * @stable ICU 2.0
      * @see getRangeCount
      */
-    virtual int32_t size() const;
+    int32_t size() const;
 
     /**
      * Returns <tt>true</tt> if this set contains no elements.
@@ -800,7 +800,7 @@ public:
      * @return <tt>true</tt> if this set contains no elements.
      * @stable ICU 2.0
      */
-    virtual UBool isEmpty() const;
+    UBool isEmpty() const;
 
     /**
      * @return true if this set contains multi-character strings or the empty string.
@@ -825,7 +825,7 @@ public:
      * @return true if the test condition is met
      * @stable ICU 2.0
      */
-    virtual UBool contains(UChar32 start, UChar32 end) const;
+    UBool contains(UChar32 start, UChar32 end) const;
 
     /**
      * Returns <tt>true</tt> if this set contains the given
@@ -843,7 +843,7 @@ public:
      * @return true if the test condition is met
      * @stable ICU 2.4
      */
-    virtual UBool containsAll(const UnicodeSet& c) const;
+    UBool containsAll(const UnicodeSet& c) const;
 
     /**
      * Returns true if this set contains all the characters
@@ -1021,7 +1021,7 @@ public:
      * Implement UnicodeMatcher::matches()
      * @stable ICU 2.4
      */
-    virtual UMatchDegree matches(const Replaceable& text,
+    UMatchDegree matches(const Replaceable& text,
                          int32_t& offset,
                          int32_t limit,
                          UBool incremental) override;
@@ -1102,7 +1102,6 @@ public:
      */
     UChar32 charAt(int32_t index) const;
 
-#ifndef U_HIDE_DRAFT_API
     /**
      * Returns a C++ "range" for iterating over the code points of this set.
      *
@@ -1114,7 +1113,7 @@ public:
      * \endcode
      *
      * @return a "range" object for iterating over the code points of this set.
-     * @draft ICU 76
+     * @stable ICU 76
      * @see ranges
      * @see strings
      * @see begin
@@ -1140,7 +1139,7 @@ public:
      * \endcode
      *
      * @return a "range" object for iterating over the code point ranges of this set.
-     * @draft ICU 76
+     * @stable ICU 76
      * @see codePoints
      * @see strings
      * @see begin
@@ -1164,7 +1163,7 @@ public:
      * \endcode
      *
      * @return a "range" object for iterating over the strings of this set.
-     * @draft ICU 76
+     * @stable ICU 76
      * @see codePoints
      * @see ranges
      * @see begin
@@ -1173,7 +1172,6 @@ public:
     inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const {
         return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
     }
-#endif  // U_HIDE_DRAFT_API
 
 #ifndef U_HIDE_DRAFT_API
     /**
@@ -1231,7 +1229,7 @@ public:
      * to this set.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& add(UChar32 start, UChar32 end);
+    UnicodeSet& add(UChar32 start, UChar32 end);
 
     /**
      * Adds the specified character to this set if it is not already
@@ -1337,7 +1335,7 @@ public:
      * @param end last character, inclusive, of range
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& retain(UChar32 start, UChar32 end);
+    UnicodeSet& retain(UChar32 start, UChar32 end);
 
 
     /**
@@ -1375,7 +1373,7 @@ public:
      * from this set.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& remove(UChar32 start, UChar32 end);
+    UnicodeSet& remove(UChar32 start, UChar32 end);
 
     /**
      * Removes the specified character from this set if it is present.
@@ -1412,7 +1410,7 @@ public:
      * A frozen set will not be modified.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& complement();
+    UnicodeSet& complement();
 
     /**
      * Complements the specified range in this set.  Any character in
@@ -1426,7 +1424,7 @@ public:
      * @param end last character, inclusive, of range
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& complement(UChar32 start, UChar32 end);
+    UnicodeSet& complement(UChar32 start, UChar32 end);
 
     /**
      * Complements the specified character in this set.  The character
@@ -1463,7 +1461,7 @@ public:
      * @see #add(UChar32, UChar32)
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& addAll(const UnicodeSet& c);
+    UnicodeSet& addAll(const UnicodeSet& c);
 
     /**
      * Retains only the elements in this set that are contained in the
@@ -1476,7 +1474,7 @@ public:
      * @param c set that defines which elements this set will retain.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& retainAll(const UnicodeSet& c);
+    UnicodeSet& retainAll(const UnicodeSet& c);
 
     /**
      * Removes from this set all of its elements that are contained in the
@@ -1489,7 +1487,7 @@ public:
      *          this set.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& removeAll(const UnicodeSet& c);
+    UnicodeSet& removeAll(const UnicodeSet& c);
 
     /**
      * Complements in this set all elements contained in the specified
@@ -1501,7 +1499,7 @@ public:
      *          this set.
      * @stable ICU 2.4
      */
-    virtual UnicodeSet& complementAll(const UnicodeSet& c);
+    UnicodeSet& complementAll(const UnicodeSet& c);
 
     /**
      * Removes all of the elements from this set.  This set will be
@@ -1509,7 +1507,7 @@ public:
      * A frozen set will not be modified.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& clear();
+    UnicodeSet& clear();
 
     /**
      * Close this set over the given attribute.  For the attribute
@@ -1546,7 +1544,7 @@ public:
      * @return a reference to this set.
      * @stable ICU 4.2
      */
-    virtual UnicodeSet &removeAllStrings();
+    UnicodeSet &removeAllStrings();
 
     /**
      * Iteration method that returns the number of ranges contained in
@@ -1555,7 +1553,7 @@ public:
      * @see #getRangeEnd
      * @stable ICU 2.4
      */
-    virtual int32_t getRangeCount() const;
+    int32_t getRangeCount() const;
 
     /**
      * Iteration method that returns the first character in the
@@ -1564,7 +1562,7 @@ public:
      * @see #getRangeEnd
      * @stable ICU 2.4
      */
-    virtual UChar32 getRangeStart(int32_t index) const;
+    UChar32 getRangeStart(int32_t index) const;
 
     /**
      * Iteration method that returns the last character in the
@@ -1573,7 +1571,7 @@ public:
      * @see #getRangeEnd
      * @stable ICU 2.4
      */
-    virtual UChar32 getRangeEnd(int32_t index) const;
+    UChar32 getRangeEnd(int32_t index) const;
 
     /**
      * Serializes this set into an array of 16-bit integers.  Serialization
@@ -1631,7 +1629,7 @@ public:
      * A frozen set will not be modified.
      * @stable ICU 2.4
      */
-    virtual UnicodeSet& compact();
+    UnicodeSet& compact();
 
     /**
      * Return the class ID for this class.  This is useful only for

+ 89 - 30
thirdparty/icu4c/common/unicode/unistr.h

@@ -215,6 +215,10 @@ class UnicodeStringAppendable;  // unicode/appendable.h
  *
  * The UnicodeString equivalent of std::string’s clear() is remove().
  *
+ * Starting with ICU 78, a UnicodeString is a C++ "range" of char16_t code units.
+ * utfStringCodePoints() and unsafeUTFStringCodePoints() can be used to iterate over
+ * the code points.
+ *
  * A UnicodeString may "alias" an external array of characters
  * (that is, point to it, rather than own the array)
  * whose lifetime must then at least match the lifetime of the aliasing object.
@@ -289,12 +293,17 @@ class UnicodeStringAppendable;  // unicode/appendable.h
  * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
  *
  * @see utf.h
+ * @see utfiterator.h
+ * @see utfStringCodePoints
+ * @see unsafeUTFStringCodePoints
  * @see CharacterIterator
  * @stable ICU 2.0
  */
 class U_COMMON_API UnicodeString : public Replaceable
 {
 public:
+  /** C++ boilerplate @internal */
+  using value_type = char16_t;
 
   /**
    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
@@ -327,7 +336,6 @@ public:
    */
   inline bool operator== (const UnicodeString& text) const;
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Equality operator. Performs only bitwise comparison with `text`
    * which is, or which is implicitly convertible to,
@@ -341,7 +349,7 @@ public:
    * \endcode
    * @param text The string view to compare to this string.
    * @return true if `text` contains the same characters as this one, false otherwise.
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   inline bool operator==(const S &text) const {
@@ -349,7 +357,6 @@ public:
     uint32_t len;  // unsigned to avoid a compiler warning
     return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Inequality operator. Performs only bitwise comparison.
@@ -360,7 +367,6 @@ public:
    */
   inline bool operator!= (const UnicodeString& text) const;
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Inequality operator. Performs only bitwise comparison with `text`
    * which is, or which is implicitly convertible to,
@@ -376,13 +382,12 @@ public:
    * \endcode
    * @param text The string view to compare to this string.
    * @return false if `text` contains the same characters as this one, true otherwise.
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   inline bool operator!=(const S &text) const {
     return !operator==(text);
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Greater than operator. Performs only bitwise comparison.
@@ -1767,7 +1772,8 @@ public:
    * Unpaired surrogates are replaced with U+FFFD.
    * Calls toUTF8().
    *
-   * @param result A standard string (or a compatible object)
+   * @tparam StringClass A std::string or a std::u8string (or a compatible type)
+   * @param result A std::string or a std::u8string (or a compatible object)
    *        to which the UTF-8 version of the string is appended.
    * @return The string object.
    * @stable ICU 4.2
@@ -1780,6 +1786,27 @@ public:
     return result;
   }
 
+#ifndef U_HIDE_DRAFT_API
+  /**
+   * Convert the UnicodeString to a UTF-8 string.
+   * Unpaired surrogates are replaced with U+FFFD.
+   * Calls toUTF8().
+   *
+   * @tparam StringClass A std::string or a std::u8string (or a compatible type)
+   * @return A std::string or a std::u8string (or a compatible object)
+   *        with the UTF-8 version of the string.
+   * @draft ICU 78
+   * @see toUTF8
+   */
+  template<typename StringClass>
+  StringClass toUTF8String() const {
+    StringClass result;
+    StringByteSink<StringClass> sbs(&result, length());
+    toUTF8(sbs);
+    return result;
+  }
+#endif  // U_HIDE_DRAFT_API
+
   /**
    * Convert the UnicodeString to UTF-32.
    * Unpaired surrogates are replaced with U+FFFD.
@@ -1892,6 +1919,42 @@ public:
    */
   inline UBool isBogus() const;
 
+#ifndef U_HIDE_DRAFT_API
+private:
+  // These type aliases are private; there is no guarantee that they will remain
+  // aliases to the same types in subsequent versions of ICU.
+  // Note that whether `std::u16string_view::const_iterator` is a pointer or a
+  // class that models contiguous_iterator is platform-dependent.
+  using unspecified_iterator = std::u16string_view::const_iterator;
+  using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
+
+public:
+  /**
+   * @return an iterator to the first code unit in this string.
+   *     The iterator may be a pointer or a contiguous-iterator object.
+   * @draft ICU 78
+   */
+  unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
+  /**
+   * @return an iterator to just past the last code unit in this string.
+   *     The iterator may be a pointer or a contiguous-iterator object.
+   * @draft ICU 78
+   */
+  unspecified_iterator end() const { return std::u16string_view(*this).end(); }
+  /**
+   * @return a reverse iterator to the last code unit in this string.
+   *     The iterator may be a pointer or a contiguous-iterator object.
+   * @draft ICU 78
+   */
+  unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
+  /**
+   * @return a reverse iterator to just before the first code unit in this string.
+   *     The iterator may be a pointer or a contiguous-iterator object.
+   * @draft ICU 78
+   */
+  unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
+#endif  // U_HIDE_DRAFT_API
+
   //========================================
   // Write operations
   //========================================
@@ -1945,7 +2008,6 @@ public:
    */
   UnicodeString &fastCopyFrom(const UnicodeString &src);
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Assignment operator. Replaces the characters in this UnicodeString
    * with a copy of the characters from the `src`
@@ -1954,14 +2016,13 @@ public:
    *
    * @param src The string view containing the characters to copy.
    * @return a reference to this
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   inline UnicodeString &operator=(const S &src) {
     unBogus();
     return doReplace(0, length(), internal::toU16StringView(src));
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Move assignment operator; might leave src in bogus state.
@@ -2212,7 +2273,6 @@ public:
    */
   inline UnicodeString& operator+= (const UnicodeString& srcText);
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Append operator. Appends the characters in `src`
    * which is, or which is implicitly convertible to,
@@ -2221,13 +2281,12 @@ public:
    *
    * @param src the source for the new characters
    * @return a reference to this
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   inline UnicodeString& operator+=(const S &src) {
     return doAppend(internal::toU16StringView(src));
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Append the characters
@@ -2285,7 +2344,6 @@ public:
   inline UnicodeString& append(ConstChar16Ptr srcChars,
             int32_t srcLength);
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Appends the characters in `src`
    * which is, or which is implicitly convertible to,
@@ -2294,13 +2352,12 @@ public:
    *
    * @param src the source for the new characters
    * @return a reference to this
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   inline UnicodeString& append(const S &src) {
     return doAppend(internal::toU16StringView(src));
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Append the code unit `srcChar` to the UnicodeString object.
@@ -2318,6 +2375,16 @@ public:
    */
   UnicodeString& append(UChar32 srcChar);
 
+#ifndef U_HIDE_DRAFT_API
+  /**
+   * Appends the code unit `c` to the UnicodeString object.
+   * Same as append(c) except does not return *this.
+   *
+   * @param c the code unit to append
+   * @draft ICU 78
+   */
+  inline void push_back(char16_t c) { append(c); }
+#endif  // U_HIDE_DRAFT_API
 
   /* Insert operations */
 
@@ -3025,12 +3092,11 @@ public:
    */
   const char16_t *getTerminatedBuffer();
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Converts to a std::u16string_view.
    *
    * @return a string view of the contents of this string
-   * @draft ICU 76
+   * @stable ICU 76
    */
   inline operator std::u16string_view() const {
     return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
@@ -3044,7 +3110,7 @@ public:
    * about char16_t vs. wchar_t become clearer.
    *
    * @return a string view of the contents of this string
-   * @draft ICU 76
+   * @stable ICU 76
    */
   inline operator std::wstring_view() const {
     const char16_t *p = getBuffer();
@@ -3054,7 +3120,6 @@ public:
     return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
   }
 #endif  // U_SIZEOF_WCHAR_T
-#endif  // U_HIDE_DRAFT_API
 
   //========================================
   // Constructors
@@ -3257,7 +3322,6 @@ public:
    */
   inline UnicodeString(const std::nullptr_t text, int32_t textLength);
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Constructor from `text`
    * which is, or which is implicitly convertible to,
@@ -3268,14 +3332,13 @@ public:
    * then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.
    *
    * @param text UTF-16 string
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text) {
     fUnion.fFields.fLengthAndFlags = kShortString;
     doAppend(internal::toU16StringViewNullable(text));
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Readonly-aliasing char16_t* constructor.
@@ -3573,7 +3636,6 @@ public:
    */
   virtual ~UnicodeString();
 
-#ifndef U_HIDE_DRAFT_API
   /**
    * Readonly-aliasing factory method.
    * Aliases the same buffer as the input `text`
@@ -3594,7 +3656,7 @@ public:
    * so that both strings then alias the same readonly-text.
    *
    * @param text The string view to alias for the UnicodeString.
-   * @draft ICU 76
+   * @stable ICU 76
    */
   template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
   static inline UnicodeString readOnlyAlias(const S &text) {
@@ -3618,12 +3680,11 @@ public:
    * so that both strings then alias the same readonly-text.
    *
    * @param text The UnicodeString to alias.
-   * @draft ICU 76
+   * @stable ICU 76
    */
   static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
     return readOnlyAliasFromUnicodeString(text);
   }
-#endif  // U_HIDE_DRAFT_API
 
   /**
    * Create a UnicodeString from a UTF-8 string.
@@ -4102,7 +4163,6 @@ private:
 U_COMMON_API UnicodeString U_EXPORT2
 operator+ (const UnicodeString &s1, const UnicodeString &s2);
 
-#ifndef U_HIDE_DRAFT_API
 /**
  * Creates a new UnicodeString from the concatenation of a UnicodeString and `s2`
  * which is, or which is implicitly convertible to,
@@ -4111,13 +4171,12 @@ operator+ (const UnicodeString &s1, const UnicodeString &s2);
  * @param s1 The string to be copied to the new one.
  * @param s2 The string view to be copied to the new string, after s1.
  * @return UnicodeString(s1).append(s2)
- * @draft ICU 76
+ * @stable ICU 76
  */
 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
   return unistr_internalConcat(s1, internal::toU16StringView(s2));
 }
-#endif  // U_HIDE_DRAFT_API
 
 #ifndef U_FORCE_HIDE_INTERNAL_API
 /** @internal */

+ 1 - 2
thirdparty/icu4c/common/unicode/uobject.h

@@ -157,7 +157,6 @@ public:
      */
     static void U_EXPORT2 operator delete[](void *p) noexcept;
 
-#if U_HAVE_PLACEMENT_NEW
     /**
      * Override for ICU4C C++ memory management for STL.
      * See new().
@@ -171,7 +170,7 @@ public:
      * @stable ICU 2.6
      */
     static inline void U_EXPORT2 operator delete(void *, void *) noexcept {}
-#endif /* U_HAVE_PLACEMENT_NEW */
+
 #if U_HAVE_DEBUG_LOCATION_NEW
     /**
       * This method overrides the MFC debug version of the operator new

+ 5 - 3
thirdparty/icu4c/common/unicode/urename.h

@@ -33,8 +33,9 @@
 
 #if !U_DISABLE_RENAMING
 
-// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work.
-#if !(defined(_MSC_VER) && defined(__INTELLISENSE__))
+// Disable Renaming for Visual Studio's IntelliSense feature and for LLVM's Clang-Tidy tool, so that
+// 'Go-to-Definition' (F12) and 'include-cleaner' respectively will work.
+#if !(defined(_MSC_VER) && defined(__INTELLISENSE__)) && !defined(__clang_analyzer__)
 
 /* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
    the platform a chance to define it first.
@@ -1392,6 +1393,7 @@
 #define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
 #define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
 #define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow)
+#define uprv_addScriptExtensionsCodePoints U_ICU_ENTRY_POINT_RENAME(uprv_addScriptExtensionsCodePoints)
 #define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
 #define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
 #define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
@@ -2037,7 +2039,7 @@
 #define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
 #define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
 
-#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */
+#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) && !defined(__clang_analyzer__) */
 #endif /* U_DISABLE_RENAMING */
 #endif /* URENAME_H */
 

+ 12 - 1
thirdparty/icu4c/common/unicode/uscript.h

@@ -518,6 +518,17 @@ typedef enum UScriptCode {
       /** @stable ICU 76 */
       USCRIPT_TULU_TIGALARI                 = 207, /* Tutg */
 
+      /** @stable ICU 78 */
+      USCRIPT_BERIA_ERFE                    = 208, /* Berf */
+      /** @stable ICU 78 */
+      USCRIPT_SIDETIC                       = 209, /* Sidt */
+      /** @stable ICU 78 */
+      USCRIPT_TAI_YO                        = 210, /* Tayo */
+      /** @stable ICU 78 */
+      USCRIPT_TOLONG_SIKI                   = 211, /* Tols */
+      /** @stable ICU 78 */
+      USCRIPT_TRADITIONAL_HAN_WITH_LATIN    = 212, /* Hntl */
+
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the highest normal UScriptCode value.
@@ -525,7 +536,7 @@ typedef enum UScriptCode {
      *
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    USCRIPT_CODE_LIMIT    = 208
+    USCRIPT_CODE_LIMIT    = 213
 #endif  // U_HIDE_DEPRECATED_API
 } UScriptCode;
 

+ 56 - 61
thirdparty/icu4c/common/unicode/uset.h

@@ -987,12 +987,10 @@ uset_size(const USet* set);
 U_CAPI int32_t U_EXPORT2
 uset_getRangeCount(const USet *set);
 
-#ifndef U_HIDE_DRAFT_API
-
 /**
  * @param set the set
  * @return the number of strings in this set.
- * @draft ICU 76
+ * @stable ICU 76
  * @see uset_getRangeCount
  * @see uset_getItemCount
  * @see uset_size
@@ -1009,14 +1007,12 @@ uset_getStringCount(const USet *set);
  * @param index the string index, 0 .. uset_getStringCount() - 1
  * @param pLength the output string length; must not be NULL
  * @return the pointer to the string; NULL if the index is out of range or pLength is NULL
- * @draft ICU 76
+ * @stable ICU 76
  * @see uset_getStringCount
  */
 U_CAPI const UChar* U_EXPORT2
 uset_getString(const USet *set, int32_t index, int32_t *pLength);
 
-#endif  // U_HIDE_DRAFT_API
-
 /**
  * Returns the number of items in this set.  An item is either a range
  * of characters or a single multicharacter string.
@@ -1327,7 +1323,6 @@ uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
                         UChar32* pStart, UChar32* pEnd);
 
 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
-#ifndef U_HIDE_DRAFT_API
 
 namespace U_HEADER_ONLY_NAMESPACE {
 
@@ -1336,14 +1331,14 @@ namespace U_HEADER_ONLY_NAMESPACE {
 
 /**
  * Iterator returned by USetCodePoints.
- * @draft ICU 76
+ * @stable ICU 76
  */
 class USetCodePointIterator {
 public:
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetCodePointIterator(const USetCodePointIterator &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator==(const USetCodePointIterator &other) const {
         // No need to compare rangeCount & end given private constructor
         // and assuming we don't compare iterators across the set being modified.
@@ -1353,15 +1348,15 @@ public:
         return uset == other.uset && c == other.c;
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     UChar32 operator*() const { return c; }
 
     /**
      * Pre-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetCodePointIterator &operator++() {
         if (c < end) {
@@ -1382,7 +1377,7 @@ public:
 
     /**
      * Post-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetCodePointIterator operator++(int) {
         USetCodePointIterator result(*this);
@@ -1419,7 +1414,7 @@ private:
  *
  * C++ UnicodeSet has member functions for iteration, including codePoints().
  *
- * @draft ICU 76
+ * @stable ICU 76
  * @see USetRanges
  * @see USetStrings
  * @see USetElements
@@ -1428,19 +1423,19 @@ class USetCodePoints {
 public:
     /**
      * Constructs a C++ "range" object over the code points of the USet.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetCodePoints(const USetCodePoints &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetCodePointIterator begin() const {
         return USetCodePointIterator(uset, 0, rangeCount);
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetCodePointIterator end() const {
         return USetCodePointIterator(uset, rangeCount, rangeCount);
     }
@@ -1455,25 +1450,25 @@ private:
  * Returned by USetRangeIterator which is returned by USetRanges.
  * Both the rangeStart and rangeEnd are in the range.
  * (end() returns an iterator corresponding to rangeEnd+1.)
- * @draft ICU 76
+ * @stable ICU 76
  */
 struct CodePointRange {
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     struct iterator {
-        /** @draft ICU 76 */
+        /** @stable ICU 76 */
         iterator(UChar32 aC) : c(aC) {}
 
-        /** @draft ICU 76 */
+        /** @stable ICU 76 */
         bool operator==(const iterator &other) const { return c == other.c; }
-        /** @draft ICU 76 */
+        /** @stable ICU 76 */
         bool operator!=(const iterator &other) const { return !operator==(other); }
 
-        /** @draft ICU 76 */
+        /** @stable ICU 76 */
         UChar32 operator*() const { return c; }
 
         /**
          * Pre-increment.
-         * @draft ICU 76
+         * @stable ICU 76
          */
         iterator &operator++() {
             ++c;
@@ -1482,7 +1477,7 @@ struct CodePointRange {
 
         /**
          * Post-increment.
-         * @draft ICU 76
+         * @stable ICU 76
          */
         iterator operator++(int) {
             return c++;
@@ -1490,44 +1485,44 @@ struct CodePointRange {
 
         /**
          * The current code point in the range.
-         * @draft ICU 76
+         * @stable ICU 76
          */
         UChar32 c;
     };
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     CodePointRange(UChar32 start, UChar32 end) : rangeStart(start), rangeEnd(end) {}
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     CodePointRange(const CodePointRange &other) = default;
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     size_t size() const { return (rangeEnd + 1) - rangeStart; }
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     iterator begin() const { return rangeStart; }
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     iterator end() const { return rangeEnd + 1; }
 
     /**
      * Start of a USet/UnicodeSet range of code points.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     UChar32 rangeStart;
     /**
      * Inclusive end of a USet/UnicodeSet range of code points.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     UChar32 rangeEnd;
 };
 
 /**
  * Iterator returned by USetRanges.
- * @draft ICU 76
+ * @stable ICU 76
  */
 class USetRangeIterator {
 public:
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetRangeIterator(const USetRangeIterator &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator==(const USetRangeIterator &other) const {
         // No need to compare rangeCount given private constructor
         // and assuming we don't compare iterators across the set being modified.
@@ -1536,10 +1531,10 @@ public:
         return uset == other.uset && rangeIndex == other.rangeIndex;
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     CodePointRange operator*() const {
         if (rangeIndex < rangeCount) {
             UChar32 start, end;
@@ -1554,7 +1549,7 @@ public:
 
     /**
      * Pre-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetRangeIterator &operator++() {
         ++rangeIndex;
@@ -1563,7 +1558,7 @@ public:
 
     /**
      * Post-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetRangeIterator operator++(int) {
         USetRangeIterator result(*this);
@@ -1600,7 +1595,7 @@ private:
  *
  * C++ UnicodeSet has member functions for iteration, including ranges().
  *
- * @draft ICU 76
+ * @stable ICU 76
  * @see USetCodePoints
  * @see USetStrings
  * @see USetElements
@@ -1609,19 +1604,19 @@ class USetRanges {
 public:
     /**
      * Constructs a C++ "range" object over the code point ranges of the USet.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {}
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetRanges(const USetRanges &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetRangeIterator begin() const {
         return USetRangeIterator(uset, 0, rangeCount);
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetRangeIterator end() const {
         return USetRangeIterator(uset, rangeCount, rangeCount);
     }
@@ -1633,14 +1628,14 @@ private:
 
 /**
  * Iterator returned by USetStrings.
- * @draft ICU 76
+ * @stable ICU 76
  */
 class USetStringIterator {
 public:
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetStringIterator(const USetStringIterator &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator==(const USetStringIterator &other) const {
         // No need to compare count given private constructor
         // and assuming we don't compare iterators across the set being modified.
@@ -1649,10 +1644,10 @@ public:
         return uset == other.uset && index == other.index;
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     std::u16string_view operator*() const {
         if (index < count) {
             int32_t length;
@@ -1665,7 +1660,7 @@ public:
 
     /**
      * Pre-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetStringIterator &operator++() {
         ++index;
@@ -1674,7 +1669,7 @@ public:
 
     /**
      * Post-increment.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetStringIterator operator++(int) {
         USetStringIterator result(*this);
@@ -1710,7 +1705,7 @@ private:
  *
  * C++ UnicodeSet has member functions for iteration, including strings().
  *
- * @draft ICU 76
+ * @stable ICU 76
  * @see USetCodePoints
  * @see USetRanges
  * @see USetElements
@@ -1719,19 +1714,19 @@ class USetStrings {
 public:
     /**
      * Constructs a C++ "range" object over the strings of the USet.
-     * @draft ICU 76
+     * @stable ICU 76
      */
     USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {}
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetStrings(const USetStrings &other) = default;
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetStringIterator begin() const {
         return USetStringIterator(uset, 0, count);
     }
 
-    /** @draft ICU 76 */
+    /** @stable ICU 76 */
     USetStringIterator end() const {
         return USetStringIterator(uset, count, count);
     }
@@ -1740,7 +1735,6 @@ private:
     const USet *uset;
     int32_t count;
 };
-#endif  // U_HIDE_DRAFT_API
 
 #ifndef U_HIDE_DRAFT_API
 /**
@@ -1900,9 +1894,10 @@ private:
     int32_t rangeCount, stringCount;
 };
 
+#endif  // U_HIDE_DRAFT_API
+
 }  // namespace U_HEADER_ONLY_NAMESPACE
 
-#endif  // U_HIDE_DRAFT_API
 #endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
 
 #endif  // __USET_H__

+ 32 - 1
thirdparty/icu4c/common/unicode/utf.h

@@ -121,8 +121,39 @@
 
 /* single-code point definitions -------------------------------------------- */
 
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Is c a Unicode code point U+0000..U+10FFFF?
+ * https://www.unicode.org/glossary/#code_point
+ *
+ * @param c 32-bit code point
+ * @return true or false
+ * @draft ICU 78
+ * @see AllCodePoints
+ * @see U_IS_SCALAR_VALUE
+ */
+#define U_IS_CODE_POINT(c) ((uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is c a Unicode scalar value, that is, a non-surrogate code point?
+ * Only scalar values can be represented in well-formed UTF-8/16/32.
+ * https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * @param c 32-bit code point
+ * @return true or false
+ * @draft ICU 78
+ * @see AllScalarValues
+ * @see U_IS_CODE_POINT
+ */
+#define U_IS_SCALAR_VALUE(c) ((uint32_t)(c)<0xd800 || (0xe000<=(c) && (c)<=0x10ffff))
+
+#endif  // U_HIDE_DRAFT_API
+
 /**
  * Is this code point a Unicode noncharacter?
+ * https://www.unicode.org/glossary/#noncharacter
+ *
  * @param c 32-bit code point
  * @return true or false
  * @stable ICU 2.4
@@ -150,7 +181,7 @@
  */
 #define U_IS_UNICODE_CHAR(c) \
     ((uint32_t)(c)<0xd800 || \
-        (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
+        (0xe000<=(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
 
 /**
  * Is this code point a BMP code point (U+0000..U+ffff)?

+ 29 - 3
thirdparty/icu4c/common/unicode/utf8.h

@@ -170,7 +170,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * @return true or false
  * @stable ICU 2.4
  */
-#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+#define U8_IS_SINGLE(c) ((int8_t)(c)>=0)
 
 /**
  * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
@@ -214,6 +214,32 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  */
 #define U8_MAX_LENGTH 4
 
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Returns the length of a well-formed UTF-8 byte sequence according to its lead byte.
+ * Returns 1 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 1..4
+ * @draft ICU 78
+ */
+#define U8_LENGTH_FROM_LEAD_BYTE(leadByte) (U8_COUNT_TRAIL_BYTES(leadByte) + 1)
+
+/**
+ * Returns the length of a well-formed UTF-8 byte sequence according to its lead byte.
+ * Returns 1 for 0..0xc1. Undefined for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @return 1..4
+ * @draft ICU 78
+ */
+#define U8_LENGTH_FROM_LEAD_BYTE_UNSAFE(leadByte) (U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) + 1)
+
+#endif  // U_HIDE_DRAFT_API
+
 /**
  * Get a code point from a string at a random-access offset,
  * without changing the offset.
@@ -517,7 +543,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
             if(U8_IS_TRAIL(__t1)) { \
                 ++(i); \
             } \
-        } else /* c>=0xf0 */ { \
+        } else /* b>=0xf0 */ { \
             if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
                     ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
                     ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
@@ -683,7 +709,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  */
 #define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
     (c)=(uint8_t)(s)[--(i)]; \
-    if(U8_IS_TRAIL(c)) { \
+    if(!U8_IS_SINGLE(c)) { \
         uint8_t __b, __count=1, __shift=6; \
 \
         /* c is a trail byte */ \

+ 2 - 0
thirdparty/icu4c/common/unicode/utf_old.h

@@ -385,8 +385,10 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[];
         /* each following branch falls through to the next one */ \
         case 3: \
             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+            U_FALLTHROUGH; \
         case 2: \
             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+            U_FALLTHROUGH; \
         case 1: \
             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
         /* no other branches to optimize switch() */ \

+ 2677 - 0
thirdparty/icu4c/common/unicode/utfiterator.h

@@ -0,0 +1,2677 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utfiterator.h
+// created: 2024aug12 Markus W. Scherer
+
+#ifndef __UTFITERATOR_H__
+#define __UTFITERATOR_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
+
+#include <iterator>
+#if defined(__cpp_lib_ranges)
+#include <ranges>
+#endif
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include "unicode/utf16.h"
+#include "unicode/utf8.h"
+#include "unicode/uversion.h"
+
+/**
+ * \file
+ * \brief C++ header-only API: C++ iterators over Unicode strings (=UTF-8/16/32 if well-formed).
+ *
+ * Sample code:
+ * \code
+ * #include <string_view>
+ * #include <iostream>
+ * #include "unicode/utypes.h"
+ * #include "unicode/utfiterator.h"
+ *
+ * using icu::header::utfIterator;
+ * using icu::header::utfStringCodePoints;
+ * using icu::header::unsafeUTFIterator;
+ * using icu::header::unsafeUTFStringCodePoints;
+ *
+ * int32_t rangeLoop16(std::u16string_view s) {
+ *     // We are just adding up the code points for minimal-code demonstration purposes.
+ *     int32_t sum = 0;
+ *     for (auto units : utfStringCodePoints<UChar32, UTF_BEHAVIOR_NEGATIVE>(s)) {
+ *         sum += units.codePoint();  // < 0 if ill-formed
+ *     }
+ *     return sum;
+ * }
+ *
+ * int32_t loopIterPlusPlus16(std::u16string_view s) {
+ *     auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
+ *     int32_t sum = 0;
+ *     for (auto iter = range.begin(), limit = range.end(); iter != limit;) {
+ *         sum += (*iter++).codePoint();  // U+FFFD if ill-formed
+ *     }
+ *     return sum;
+ * }
+ *
+ * int32_t backwardLoop16(std::u16string_view s) {
+ *     auto range = utfStringCodePoints<UChar32, UTF_BEHAVIOR_SURROGATE>(s);
+ *     int32_t sum = 0;
+ *     for (auto start = range.begin(), iter = range.end(); start != iter;) {
+ *         sum += (*--iter).codePoint();  // surrogate code point if unpaired / ill-formed
+ *     }
+ *     return sum;
+ * }
+ *
+ * int32_t reverseLoop8(std::string_view s) {
+ *     auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
+ *     int32_t sum = 0;
+ *     for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) {
+ *         sum += iter->codePoint();  // U+FFFD if ill-formed
+ *     }
+ *     return sum;
+ * }
+ *
+ * int32_t countCodePoints16(std::u16string_view s) {
+ *     auto range = utfStringCodePoints<UChar32, UTF_BEHAVIOR_SURROGATE>(s);
+ *     return std::distance(range.begin(), range.end());
+ * }
+ *
+ * int32_t unsafeRangeLoop16(std::u16string_view s) {
+ *     int32_t sum = 0;
+ *     for (auto units : unsafeUTFStringCodePoints<UChar32>(s)) {
+ *         sum += units.codePoint();
+ *     }
+ *     return sum;
+ * }
+ *
+ * int32_t unsafeReverseLoop8(std::string_view s) {
+ *     auto range = unsafeUTFStringCodePoints<UChar32>(s);
+ *     int32_t sum = 0;
+ *     for (auto iter = range.rbegin(), limit = range.rend(); iter != limit; ++iter) {
+ *         sum += iter->codePoint();
+ *     }
+ *     return sum;
+ * }
+ *
+ * char32_t firstCodePointOrFFFD16(std::u16string_view s) {
+ *     if (s.empty()) { return 0xfffd; }
+ *     auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
+ *     return range.begin()->codePoint();
+ * }
+ *
+ * std::string_view firstSequence8(std::string_view s) {
+ *     if (s.empty()) { return {}; }
+ *     auto range = utfStringCodePoints<char32_t, UTF_BEHAVIOR_FFFD>(s);
+ *     auto units = *(range.begin());
+ *     if (units.wellFormed()) {
+ *         return units.stringView();
+ *     } else {
+ *         return {};
+ *     }
+ * }
+ *
+ * template<typename InputStream>  // some istream or streambuf
+ * std::u32string cpFromInput(InputStream &in) {
+ *     // This is a single-pass input_iterator.
+ *     std::istreambuf_iterator bufIter(in);
+ *     std::istreambuf_iterator<typename InputStream::char_type> bufLimit;
+ *     auto iter = utfIterator<char32_t, UTF_BEHAVIOR_FFFD>(bufIter);
+ *     auto limit = utfIterator<char32_t, UTF_BEHAVIOR_FFFD>(bufLimit);
+ *     std::u32string s32;
+ *     for (; iter != limit; ++iter) {
+ *         s32.push_back(iter->codePoint());
+ *     }
+ *     return s32;
+ * }
+ *
+ * std::u32string cpFromStdin() { return cpFromInput(std::cin); }
+ * std::u32string cpFromWideStdin() { return cpFromInput(std::wcin); }
+ * \endcode
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Some defined behaviors for handling ill-formed Unicode strings.
+ * This is a template parameter for UTFIterator and related classes.
+ *
+ * When a validating UTFIterator encounters an ill-formed code unit sequence,
+ * then CodeUnits.codePoint() is a value according to this parameter.
+ *
+ * @draft ICU 78
+ * @see CodeUnits
+ * @see UTFIterator
+ * @see UTFStringCodePoints
+ */
+typedef enum UTFIllFormedBehavior {
+    /**
+     * Returns a negative value (-1=U_SENTINEL) instead of a code point.
+     * If the CP32 template parameter for the relevant classes is an unsigned type,
+     * then the negative value becomes 0xffffffff=UINT32_MAX.
+     *
+     * @draft ICU 78
+     */
+    UTF_BEHAVIOR_NEGATIVE,
+    /** Returns U+FFFD Replacement Character. @draft ICU 78 */
+    UTF_BEHAVIOR_FFFD,
+    /**
+     * UTF-8: Not allowed;
+     * UTF-16: returns the unpaired surrogate;
+     * UTF-32: returns the surrogate code point, or U+FFFD if out of range.
+     *
+     * @draft ICU 78
+     */
+    UTF_BEHAVIOR_SURROGATE
+} UTFIllFormedBehavior;
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+namespace prv {
+#if U_CPLUSPLUS_VERSION >= 20
+
+/** @internal */
+template<typename Iter>
+using iter_value_t = typename std::iter_value_t<Iter>;
+
+/** @internal */
+template<typename Iter>
+using iter_difference_t = std::iter_difference_t<Iter>;
+
+/** @internal */
+template<typename Iter>
+constexpr bool forward_iterator = std::forward_iterator<Iter>;
+
+/** @internal */
+template<typename Iter>
+constexpr bool bidirectional_iterator = std::bidirectional_iterator<Iter>;
+
+/** @internal */
+template<typename Range>
+constexpr bool range = std::ranges::range<Range>;
+
+#else
+
+/** @internal */
+template<typename Iter>
+using iter_value_t = typename std::iterator_traits<Iter>::value_type;
+
+/** @internal */
+template<typename Iter>
+using iter_difference_t = typename std::iterator_traits<Iter>::difference_type;
+
+/** @internal */
+template<typename Iter>
+constexpr bool forward_iterator =
+    std::is_base_of_v<
+        std::forward_iterator_tag,
+        typename std::iterator_traits<Iter>::iterator_category>;
+
+/** @internal */
+template<typename Iter>
+constexpr bool bidirectional_iterator =
+    std::is_base_of_v<
+        std::bidirectional_iterator_tag,
+        typename std::iterator_traits<Iter>::iterator_category>;
+
+/** @internal */
+template<typename Range, typename = void>
+struct range_type : std::false_type {};
+
+/** @internal */
+template<typename Range>
+struct range_type<
+    Range,
+    std::void_t<decltype(std::declval<Range>().begin()),
+                decltype(std::declval<Range>().end())>> : std::true_type {};
+
+/** @internal */
+template<typename Range>
+constexpr bool range = range_type<Range>::value;
+
+#endif
+
+/** @internal */
+template <typename T> struct is_basic_string_view : std::false_type {};
+
+/** @internal */
+template <typename... Args>
+struct is_basic_string_view<std::basic_string_view<Args...>> : std::true_type {};
+
+/** @internal */
+template <typename T> constexpr bool is_basic_string_view_v = is_basic_string_view<T>::value;
+
+/** @internal */
+template<typename CP32, bool skipSurrogates>
+class CodePointsIterator {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    /** C++ iterator boilerplate @internal */
+    using value_type = CP32;
+    /** C++ iterator boilerplate @internal */
+    using reference = value_type;
+    /** C++ iterator boilerplate @internal */
+    using pointer = CP32 *;
+    /** C++ iterator boilerplate @internal */
+    using difference_type = int32_t;
+    /** C++ iterator boilerplate @internal */
+    using iterator_category = std::forward_iterator_tag;
+
+    /** @internal */
+    inline CodePointsIterator(CP32 c) : c_(c) {}
+    /** @internal */
+    inline bool operator==(const CodePointsIterator &other) const { return c_ == other.c_; }
+    /** @internal */
+    inline bool operator!=(const CodePointsIterator &other) const { return !operator==(other); }
+    /** @internal */
+    inline CP32 operator*() const { return c_; }
+    /** @internal */
+    inline CodePointsIterator &operator++() {  // pre-increment
+        ++c_;
+        if (skipSurrogates && c_ == 0xd800) {
+            c_ = 0xe000;
+        }
+        return *this;
+    }
+    /** @internal */
+    inline CodePointsIterator operator++(int) {  // post-increment
+        CodePointsIterator result(*this);
+        ++(*this);
+        return result;
+    }
+
+private:
+    CP32 c_;
+};
+
+}  // namespace prv
+
+/**
+ * A C++ "range" over all Unicode code points U+0000..U+10FFFF.
+ * https://www.unicode.org/glossary/#code_point
+ *
+ * Intended for test and builder code.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @draft ICU 78
+ * @see U_IS_CODE_POINT
+ */
+template<typename CP32>
+class AllCodePoints {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    /** Constructor. @draft ICU 78 */
+    AllCodePoints() {}
+    /**
+     * @return an iterator over all Unicode code points.
+     *     The iterator returns CP32 integers.
+     * @draft ICU 78
+     */
+    auto begin() const { return prv::CodePointsIterator<CP32, false>(0); }
+    /**
+     * @return an exclusive-end iterator over all Unicode code points.
+     * @draft ICU 78
+     */
+    auto end() const { return prv::CodePointsIterator<CP32, false>(0x110000); }
+};
+
+/**
+ * A C++ "range" over all Unicode scalar values U+0000..U+D7FF & U+E000..U+10FFFF.
+ * That is, all code points except surrogates.
+ * Only scalar values can be represented in well-formed UTF-8/16/32.
+ * https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * Intended for test and builder code.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @draft ICU 78
+ * @see U_IS_SCALAR_VALUE
+ */
+template<typename CP32>
+class AllScalarValues {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    /** Constructor. @draft ICU 78 */
+    AllScalarValues() {}
+    /**
+     * @return an iterator over all Unicode scalar values.
+     *     The iterator returns CP32 integers.
+     * @draft ICU 78
+     */
+    auto begin() const { return prv::CodePointsIterator<CP32, true>(0); }
+    /**
+     * @return an exclusive-end iterator over all Unicode scalar values.
+     * @draft ICU 78
+     */
+    auto end() const { return prv::CodePointsIterator<CP32, true>(0x110000); }
+};
+
+/**
+ * Result of decoding a code unit sequence for one code point.
+ * Returned from non-validating Unicode string code point iterators.
+ * Base class for class CodeUnits which is returned from validating iterators.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if UTF_BEHAVIOR_NEGATIVE
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @see UnsafeUTFIterator
+ * @see UnsafeUTFStringCodePoints
+ * @draft ICU 78
+ */
+template<typename CP32, typename UnitIter, typename = void>
+class UnsafeCodeUnits {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Unit = typename prv::iter_value_t<UnitIter>;
+public:
+    /** @internal */
+    UnsafeCodeUnits(CP32 codePoint, uint8_t length, UnitIter start, UnitIter limit) :
+            c_(codePoint), len_(length), start_(start), limit_(limit) {}
+
+    /** Copy constructor. @draft ICU 78 */
+    UnsafeCodeUnits(const UnsafeCodeUnits &other) = default;
+    /** Copy assignment operator. @draft ICU 78 */
+    UnsafeCodeUnits &operator=(const UnsafeCodeUnits &other) = default;
+
+    /**
+     * @return the Unicode code point decoded from the code unit sequence.
+     *     If the sequence is ill-formed and the iterator validates,
+     *     then this is a replacement value according to the iterator‘s
+     *     UTFIllFormedBehavior template parameter.
+     * @draft ICU 78
+     */
+    CP32 codePoint() const { return c_; }
+
+    /**
+     * @return the start of the code unit sequence for one code point.
+     * Only enabled if UnitIter is a (multi-pass) forward_iterator or better.
+     * @draft ICU 78
+     */
+    UnitIter begin() const { return start_; }
+
+    /**
+     * @return the limit (exclusive end) of the code unit sequence for one code point.
+     * Only enabled if UnitIter is a (multi-pass) forward_iterator or better.
+     * @draft ICU 78
+     */
+    UnitIter end() const { return limit_; }
+
+    /**
+     * @return the length of the code unit sequence for one code point.
+     * @draft ICU 78
+     */
+    uint8_t length() const { return len_; }
+
+#if U_CPLUSPLUS_VERSION >= 20
+    /**
+     * @return a string_view of the code unit sequence for one code point.
+     * Only works if UnitIter is a pointer or a contiguous_iterator.
+     * @draft ICU 78
+     */
+    template<std::contiguous_iterator Iter = UnitIter>
+    std::basic_string_view<Unit> stringView() const {
+        return std::basic_string_view<Unit>(begin(), end());
+    }
+#else
+    /**
+     * @return a string_view of the code unit sequence for one code point.
+     * Only works if UnitIter is a pointer or a contiguous_iterator.
+     * @draft ICU 78
+     */
+    template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
+    std::enable_if_t<std::is_pointer_v<Iter> ||
+                         std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
+                         std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
+                         std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
+                         std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
+                     std::basic_string_view<Unit>>
+    stringView() const {
+        return std::basic_string_view<Unit>(&*start_, len_);
+    }
+#endif
+
+private:
+    // Order of fields with padding and access frequency in mind.
+    CP32 c_;
+    uint8_t len_;
+    UnitIter start_;
+    UnitIter limit_;
+};
+
+#ifndef U_IN_DOXYGEN
+// Partial template specialization for single-pass input iterator.
+// No UnitIter field, no getter for it, no stringView().
+template<typename CP32, typename UnitIter>
+class UnsafeCodeUnits<
+        CP32,
+        UnitIter,
+        std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    UnsafeCodeUnits(CP32 codePoint, uint8_t length) : c_(codePoint), len_(length) {}
+
+    UnsafeCodeUnits(const UnsafeCodeUnits &other) = default;
+    UnsafeCodeUnits &operator=(const UnsafeCodeUnits &other) = default;
+
+    CP32 codePoint() const { return c_; }
+
+    uint8_t length() const { return len_; }
+
+private:
+    // Order of fields with padding and access frequency in mind.
+    CP32 c_;
+    uint8_t len_;
+};
+#endif  // U_IN_DOXYGEN
+
+/**
+ * Result of validating and decoding a code unit sequence for one code point.
+ * Returned from validating Unicode string code point iterators.
+ * Adds function wellFormed() to base class UnsafeCodeUnits.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if UTF_BEHAVIOR_NEGATIVE
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @see UTFIterator
+ * @see UTFStringCodePoints
+ * @draft ICU 78
+ */
+template<typename CP32, typename UnitIter, typename = void>
+class CodeUnits : public UnsafeCodeUnits<CP32, UnitIter> {
+public:
+    /** @internal */
+    CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed, UnitIter start, UnitIter limit) :
+            UnsafeCodeUnits<CP32, UnitIter>(codePoint, length, start, limit), ok_(wellFormed) {}
+
+    /** Copy constructor. @draft ICU 78 */
+    CodeUnits(const CodeUnits &other) = default;
+    /** Copy assignment operator. @draft ICU 78 */
+    CodeUnits &operator=(const CodeUnits &other) = default;
+
+    /**
+     * @return true if the decoded code unit sequence is well-formed.
+     * @draft ICU 78
+     */
+    bool wellFormed() const { return ok_; }
+
+private:
+    bool ok_;
+};
+
+#ifndef U_IN_DOXYGEN
+// Partial template specialization for single-pass input iterator.
+// No UnitIter field, no getter for it, no stringView().
+template<typename CP32, typename UnitIter>
+class CodeUnits<
+        CP32,
+        UnitIter,
+        std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
+            public UnsafeCodeUnits<CP32, UnitIter> {
+public:
+    CodeUnits(CP32 codePoint, uint8_t length, bool wellFormed) :
+            UnsafeCodeUnits<CP32, UnitIter>(codePoint, length), ok_(wellFormed) {}
+
+    CodeUnits(const CodeUnits &other) = default;
+    CodeUnits &operator=(const CodeUnits &other) = default;
+
+    bool wellFormed() const { return ok_; }
+
+private:
+    bool ok_;
+};
+#endif  // U_IN_DOXYGEN
+
+// Validating implementations ---------------------------------------------- ***
+
+#ifndef U_IN_DOXYGEN
+template<typename CP32, UTFIllFormedBehavior behavior,
+         typename UnitIter, typename LimitIter = UnitIter, typename = void>
+class UTFImpl;
+
+// Note: readAndInc() functions take both a p0 and a p iterator.
+// They must have the same value.
+// For a multi-pass UnitIter, the caller must copy its p into a local variable p0,
+// and readAndInc() copies p0 and the incremented p into the CodeUnits.
+// For a single-pass UnitIter, which may not be default-constructible nor coypable,
+// the caller can pass p into both references, and readAndInc() does not use p0
+// and constructs CodeUnits without them.
+// Moving the p0 variable into the call site avoids having to declare it inside readAndInc()
+// which may not be possible for a single-pass iterator.
+
+// UTF-8
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter, typename LimitIter>
+class UTFImpl<
+        CP32, behavior,
+        UnitIter, LimitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    static_assert(behavior != UTF_BEHAVIOR_SURROGATE,
+                  "For 8-bit strings, the SURROGATE option does not have an equivalent.");
+public:
+    // Handle ill-formed UTF-8
+    U_FORCE_INLINE static CP32 sub() {
+        switch (behavior) {
+            case UTF_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+            case UTF_BEHAVIOR_FFFD: return 0xfffd;
+        }
+    }
+
+    U_FORCE_INLINE static void inc(UnitIter &p, const LimitIter &limit) {
+        // Very similar to U8_FWD_1().
+        uint8_t b = *p;
+        ++p;
+        if (U8_IS_LEAD(b) && p != limit) {
+            uint8_t t1 = *p;
+            if ((0xe0 <= b && b < 0xf0)) {
+                if (U8_IS_VALID_LEAD3_AND_T1(b, t1) &&
+                        ++p != limit && U8_IS_TRAIL(*p)) {
+                    ++p;
+                }
+            } else if (b < 0xe0) {
+                if (U8_IS_TRAIL(t1)) {
+                    ++p;
+                }
+            } else /* b >= 0xf0 */ {
+                if (U8_IS_VALID_LEAD4_AND_T1(b, t1) &&
+                        ++p != limit && U8_IS_TRAIL(*p) &&
+                        ++p != limit && U8_IS_TRAIL(*p)) {
+                    ++p;
+                }
+            }
+        }
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter start, UnitIter &p) {
+        // Very similar to U8_BACK_1().
+        uint8_t c = *--p;
+        if (U8_IS_TRAIL(c) && p != start) {
+            UnitIter p1 = p;
+            uint8_t b1 = *--p1;
+            if (U8_IS_LEAD(b1)) {
+                if (b1 < 0xe0 ||
+                        (b1 < 0xf0 ?
+                            U8_IS_VALID_LEAD3_AND_T1(b1, c) :
+                            U8_IS_VALID_LEAD4_AND_T1(b1, c))) {
+                    p = p1;
+                    return;
+                }
+            } else if (U8_IS_TRAIL(b1) && p1 != start) {
+                uint8_t b2 = *--p1;
+                if (0xe0 <= b2 && b2 <= 0xf4) {
+                    if (b2 < 0xf0 ?
+                            U8_IS_VALID_LEAD3_AND_T1(b2, b1) :
+                            U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+                        p = p1;
+                        return;
+                    }
+                } else if (U8_IS_TRAIL(b2) && p1 != start) {
+                    uint8_t b3 = *--p1;
+                    if (0xf0 <= b3 && b3 <= 0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+                        p = p1;
+                        return;
+                    }
+                }
+            }
+        }
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> readAndInc(
+            UnitIter &p0, UnitIter &p, const LimitIter &limit) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        // Very similar to U8_NEXT_OR_FFFD().
+        CP32 c = uint8_t(*p);
+        ++p;
+        if (U8_IS_SINGLE(c)) {
+            if constexpr (isMultiPass) {
+                return {c, 1, true, p0, p};
+            } else {
+                return {c, 1, true};
+            }
+        }
+        uint8_t length = 1;
+        uint8_t t = 0;
+        if (p != limit &&
+                // fetch/validate/assemble all but last trail byte
+                (c >= 0xe0 ?
+                    (c < 0xf0 ?  // U+0800..U+FFFF except surrogates
+                        U8_LEAD3_T1_BITS[c &= 0xf] & (1 << ((t = *p) >> 5)) &&
+                        (t &= 0x3f, 1)
+                    :  // U+10000..U+10FFFF
+                        (c -= 0xf0) <= 4 &&
+                        U8_LEAD4_T1_BITS[(t = *p) >> 4] & (1 << c) &&
+                        (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
+                        (t = *p - 0x80) <= 0x3f) &&
+                    // valid second-to-last trail byte
+                    (c = (c << 6) | t, ++length, ++p != limit)
+                :  // U+0080..U+07FF
+                    c >= 0xc2 && (c &= 0x1f, 1)) &&
+                // last trail byte
+                (t = *p - 0x80) <= 0x3f) {
+            c = (c << 6) | t;
+            ++length;
+            ++p;
+            if constexpr (isMultiPass) {
+                return {c, length, true, p0, p};
+            } else {
+                return {c, length, true};
+            }
+        }
+        if constexpr (isMultiPass) {
+            return {sub(), length, false, p0, p};
+        } else {
+            return {sub(), length, false};
+        }
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
+        // Very similar to U8_PREV_OR_FFFD().
+        UnitIter p0 = p;
+        CP32 c = uint8_t(*--p);
+        if (U8_IS_SINGLE(c)) {
+            return {c, 1, true, p, p0};
+        }
+        if (U8_IS_TRAIL(c) && p != start) {
+            UnitIter p1 = p;
+            uint8_t b1 = *--p1;
+            if (U8_IS_LEAD(b1)) {
+                if (b1 < 0xe0) {
+                    p = p1;
+                    c = ((b1 - 0xc0) << 6) | (c & 0x3f);
+                    return {c, 2, true, p, p0};
+                } else if (b1 < 0xf0 ?
+                            U8_IS_VALID_LEAD3_AND_T1(b1, c) :
+                            U8_IS_VALID_LEAD4_AND_T1(b1, c)) {
+                    // Truncated 3- or 4-byte sequence.
+                    p = p1;
+                    return {sub(), 2, false, p, p0};
+                }
+            } else if (U8_IS_TRAIL(b1) && p1 != start) {
+                // Extract the value bits from the last trail byte.
+                c &= 0x3f;
+                uint8_t b2 = *--p1;
+                if (0xe0 <= b2 && b2 <= 0xf4) {
+                    if (b2 < 0xf0) {
+                        b2 &= 0xf;
+                        if (U8_IS_VALID_LEAD3_AND_T1(b2, b1)) {
+                            p = p1;
+                            c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
+                            return {c, 3, true, p, p0};
+                        }
+                    } else if (U8_IS_VALID_LEAD4_AND_T1(b2, b1)) {
+                        // Truncated 4-byte sequence.
+                        p = p1;
+                        return {sub(), 3, false, p, p0};
+                    }
+                } else if (U8_IS_TRAIL(b2) && p1 != start) {
+                    uint8_t b3 = *--p1;
+                    if (0xf0 <= b3 && b3 <= 0xf4) {
+                        b3 &= 7;
+                        if (U8_IS_VALID_LEAD4_AND_T1(b3, b2)) {
+                            p = p1;
+                            c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
+                            return {c, 4, true, p, p0};
+                        }
+                    }
+                }
+            }
+        }
+        return {sub(), 1, false, p, p0};
+    }
+};
+
+// UTF-16
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter, typename LimitIter>
+class UTFImpl<
+        CP32, behavior,
+        UnitIter, LimitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    // Handle ill-formed UTF-16: One unpaired surrogate.
+    U_FORCE_INLINE static CP32 sub(CP32 surrogate) {
+        switch (behavior) {
+            case UTF_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+            case UTF_BEHAVIOR_FFFD: return 0xfffd;
+            case UTF_BEHAVIOR_SURROGATE: return surrogate;
+        }
+    }
+
+    U_FORCE_INLINE static void inc(UnitIter &p, const LimitIter &limit) {
+        // Very similar to U16_FWD_1().
+        auto c = *p;
+        ++p;
+        if (U16_IS_LEAD(c) && p != limit && U16_IS_TRAIL(*p)) {
+            ++p;
+        }
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter start, UnitIter &p) {
+        // Very similar to U16_BACK_1().
+        UnitIter p1;
+        if (U16_IS_TRAIL(*--p) && p != start && (p1 = p, U16_IS_LEAD(*--p1))) {
+            p = p1;
+        }
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> readAndInc(
+            UnitIter &p0, UnitIter &p, const LimitIter &limit) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        // Very similar to U16_NEXT_OR_FFFD().
+        CP32 c = static_cast<CP32>(*p);
+        ++p;
+        if (!U16_IS_SURROGATE(c)) {
+            if constexpr (isMultiPass) {
+                return {c, 1, true, p0, p};
+            } else {
+                return {c, 1, true};
+            }
+        } else {
+            uint16_t c2;
+            if (U16_IS_SURROGATE_LEAD(c) && p != limit && U16_IS_TRAIL(c2 = *p)) {
+                ++p;
+                c = U16_GET_SUPPLEMENTARY(c, c2);
+                if constexpr (isMultiPass) {
+                    return {c, 2, true, p0, p};
+                } else {
+                    return {c, 2, true};
+                }
+            } else {
+                if constexpr (isMultiPass) {
+                    return {sub(c), 1, false, p0, p};
+                } else {
+                    return {sub(c), 1, false};
+                }
+            }
+        }
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
+        // Very similar to U16_PREV_OR_FFFD().
+        UnitIter p0 = p;
+        CP32 c = static_cast<CP32>(*--p);
+        if (!U16_IS_SURROGATE(c)) {
+            return {c, 1, true, p, p0};
+        } else {
+            UnitIter p1;
+            uint16_t c2;
+            if (U16_IS_SURROGATE_TRAIL(c) && p != start && (p1 = p, U16_IS_LEAD(c2 = *--p1))) {
+                p = p1;
+                c = U16_GET_SUPPLEMENTARY(c2, c);
+                return {c, 2, true, p, p0};
+            } else {
+                return {sub(c), 1, false, p, p0};
+            }
+        }
+    }
+};
+
+// UTF-32: trivial, but still validating
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter, typename LimitIter>
+class UTFImpl<
+        CP32, behavior,
+        UnitIter, LimitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    // Handle ill-formed UTF-32
+    U_FORCE_INLINE static CP32 sub(bool forSurrogate, CP32 surrogate) {
+        switch (behavior) {
+            case UTF_BEHAVIOR_NEGATIVE: return U_SENTINEL;
+            case UTF_BEHAVIOR_FFFD: return 0xfffd;
+            case UTF_BEHAVIOR_SURROGATE: return forSurrogate ? surrogate : 0xfffd;
+        }
+    }
+
+    U_FORCE_INLINE static void inc(UnitIter &p, const LimitIter &/*limit*/) {
+        ++p;
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter /*start*/, UnitIter &p) {
+        --p;
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> readAndInc(
+            UnitIter &p0, UnitIter &p, const LimitIter &/*limit*/) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        uint32_t uc = *p;
+        CP32 c = uc;
+        ++p;
+        if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
+            if constexpr (isMultiPass) {
+                return {c, 1, true, p0, p};
+            } else {
+                return {c, 1, true};
+            }
+        } else {
+            if constexpr (isMultiPass) {
+                return {sub(uc < 0xe000, c), 1, false, p0, p};
+            } else {
+                return {sub(uc < 0xe000, c), 1, false};
+            }
+        }
+    }
+
+    U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter /*start*/, UnitIter &p) {
+        UnitIter p0 = p;
+        uint32_t uc = *--p;
+        CP32 c = uc;
+        if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
+            return {c, 1, true, p, p0};
+        } else {
+            return {sub(uc < 0xe000, c), 1, false, p, p0};
+        }
+    }
+};
+
+// Non-validating implementations ------------------------------------------ ***
+
+template<typename CP32, typename UnitIter, typename = void>
+class UnsafeUTFImpl;
+
+// UTF-8
+template<typename CP32, typename UnitIter>
+class UnsafeUTFImpl<
+        CP32,
+        UnitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    U_FORCE_INLINE static void inc(UnitIter &p) {
+        // Very similar to U8_FWD_1_UNSAFE().
+        uint8_t b = *p;
+        std::advance(p, 1 + U8_COUNT_TRAIL_BYTES_UNSAFE(b));
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter &p) {
+        // Very similar to U8_BACK_1_UNSAFE().
+        while (U8_IS_TRAIL(*--p)) {}
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        // Very similar to U8_NEXT_UNSAFE().
+        CP32 c = uint8_t(*p);
+        ++p;
+        if (U8_IS_SINGLE(c)) {
+            if constexpr (isMultiPass) {
+                return {c, 1, p0, p};
+            } else {
+                return {c, 1};
+            }
+        } else if (c < 0xe0) {
+            c = ((c & 0x1f) << 6) | (*p & 0x3f);
+            ++p;
+            if constexpr (isMultiPass) {
+                return {c, 2, p0, p};
+            } else {
+                return {c, 2};
+            }
+        } else if (c < 0xf0) {
+            // No need for (c&0xf) because the upper bits are truncated
+            // after <<12 in the cast to uint16_t.
+            c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
+            ++p;
+            c |= *p & 0x3f;
+            ++p;
+            if constexpr (isMultiPass) {
+                return {c, 3, p0, p};
+            } else {
+                return {c, 3};
+            }
+        } else {
+            c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
+            ++p;
+            c |= (*p & 0x3f) << 6;
+            ++p;
+            c |= *p & 0x3f;
+            ++p;
+            if constexpr (isMultiPass) {
+                return {c, 4, p0, p};
+            } else {
+                return {c, 4};
+            }
+        }
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
+        // Very similar to U8_PREV_UNSAFE().
+        UnitIter p0 = p;
+        CP32 c = uint8_t(*--p);
+        if (U8_IS_SINGLE(c)) {
+            return {c, 1, p, p0};
+        }
+        // U8_IS_TRAIL(c) if well-formed
+        c &= 0x3f;
+        uint8_t count = 1;
+        for (uint8_t shift = 6;;) {
+            uint8_t b = *--p;
+            if (b >= 0xc0) {
+                U8_MASK_LEAD_BYTE(b, count);
+                c |= uint32_t{b} << shift;
+                break;
+            } else {
+                c |= (uint32_t{b} & 0x3f) << shift;
+                ++count;
+                shift += 6;
+            }
+        }
+        ++count;
+        return {c, count, p, p0};
+    }
+};
+
+// UTF-16
+template<typename CP32, typename UnitIter>
+class UnsafeUTFImpl<
+        CP32,
+        UnitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    U_FORCE_INLINE static void inc(UnitIter &p) {
+        // Very similar to U16_FWD_1_UNSAFE().
+        auto c = *p;
+        ++p;
+        if (U16_IS_LEAD(c)) {
+            ++p;
+        }
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter &p) {
+        // Very similar to U16_BACK_1_UNSAFE().
+        if (U16_IS_TRAIL(*--p)) {
+            --p;
+        }
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        // Very similar to U16_NEXT_UNSAFE().
+        CP32 c = static_cast<CP32>(*p);
+        ++p;
+        if (!U16_IS_LEAD(c)) {
+            if constexpr (isMultiPass) {
+                return {c, 1, p0, p};
+            } else {
+                return {c, 1};
+            }
+        } else {
+            uint16_t c2 = *p;
+            ++p;
+            c = U16_GET_SUPPLEMENTARY(c, c2);
+            if constexpr (isMultiPass) {
+                return {c, 2, p0, p};
+            } else {
+                return {c, 2};
+            }
+        }
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
+        // Very similar to U16_PREV_UNSAFE().
+        UnitIter p0 = p;
+        CP32 c = static_cast<CP32>(*--p);
+        if (!U16_IS_TRAIL(c)) {
+            return {c, 1, p, p0};
+        } else {
+            uint16_t c2 = *--p;
+            c = U16_GET_SUPPLEMENTARY(c2, c);
+            return {c, 2, p, p0};
+        }
+    }
+};
+
+// UTF-32: trivial
+template<typename CP32, typename UnitIter>
+class UnsafeUTFImpl<
+        CP32,
+        UnitIter,
+        std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    U_FORCE_INLINE static void inc(UnitIter &p) {
+        ++p;
+    }
+
+    U_FORCE_INLINE static void dec(UnitIter &p) {
+        --p;
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
+        constexpr bool isMultiPass = prv::forward_iterator<UnitIter>;
+        CP32 c = *p;
+        ++p;
+        if constexpr (isMultiPass) {
+            return {c, 1, p0, p};
+        } else {
+            return {c, 1};
+        }
+    }
+
+    U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
+        UnitIter p0 = p;
+        CP32 c = *--p;
+        return {c, 1, p, p0};
+    }
+};
+
+#endif
+
+// Validating iterators ---------------------------------------------------- ***
+
+/**
+ * Validating iterator over the code points in a Unicode string.
+ *
+ * The UnitIter can be
+ * an input_iterator, a forward_iterator, or a bidirectional_iterator (including a pointer).
+ * The UTFIterator will have the corresponding iterator_category.
+ *
+ * Call utfIterator() to have the compiler deduce the UnitIter and LimitIter types.
+ *
+ * For reverse iteration, either use this iterator directly as in <code>*--iter</code>
+ * or wrap it using std::make_reverse_iterator(iter).
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if UTF_BEHAVIOR_NEGATIVE
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @tparam LimitIter Either the same as UnitIter, or an iterator sentinel type.
+ * @draft ICU 78
+ * @see utfIterator
+ */
+template<typename CP32, UTFIllFormedBehavior behavior,
+         typename UnitIter, typename LimitIter = UnitIter, typename = void>
+class UTFIterator {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
+
+    // Proxy type for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning CodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
+        CodeUnits<CP32, UnitIter> &operator*() { return units_; }
+        CodeUnits<CP32, UnitIter> *operator->() { return &units_; }
+    private:
+        CodeUnits<CP32, UnitIter> units_;
+    };
+
+public:
+    /** C++ iterator boilerplate @internal */
+    using value_type = CodeUnits<CP32, UnitIter>;
+    /** C++ iterator boilerplate @internal */
+    using reference = value_type;
+    /** C++ iterator boilerplate @internal */
+    using pointer = Proxy;
+    /** C++ iterator boilerplate @internal */
+    using difference_type = prv::iter_difference_t<UnitIter>;
+    /** C++ iterator boilerplate @internal */
+    using iterator_category = std::conditional_t<
+        prv::bidirectional_iterator<UnitIter>,
+        std::bidirectional_iterator_tag,
+        std::forward_iterator_tag>;
+
+    /**
+     * Constructor with start <= p < limit.
+     * All of these iterators/pointers should be at code point boundaries.
+     * Only enabled if UnitIter is a (multi-pass) forward_iterator or better.
+     *
+     * When using a code unit sentinel (UnitIter≠LimitIter),
+     * then that sentinel also works as a sentinel for this code point iterator.
+     *
+     * @param start Start of the range
+     * @param p Initial position inside the range
+     * @param limit Limit (exclusive end) of the range
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UTFIterator(UnitIter start, UnitIter p, LimitIter limit) :
+            p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
+    /**
+     * Constructor with start == p < limit.
+     * All of these iterators/pointers should be at code point boundaries.
+     *
+     * When using a code unit sentinel (UnitIter≠LimitIter),
+     * then that sentinel also works as a sentinel for this code point iterator.
+     *
+     * @param p Start of the range, and the initial position
+     * @param limit Limit (exclusive end) of the range
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UTFIterator(UnitIter p, LimitIter limit) :
+            p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
+    /**
+     * Constructs an iterator start or limit sentinel.
+     * The iterator/pointer should be at a code point boundary.
+     * Requires UnitIter to be copyable.
+     *
+     * When using a code unit sentinel (UnitIter≠LimitIter),
+     * then that sentinel also works as a sentinel for this code point iterator.
+     *
+     * @param p Range start or limit
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE explicit UTFIterator(UnitIter p) : p_(p), start_(p), limit_(p), units_(0, 0, false, p, p) {}
+    /**
+     * Default constructor. Makes a non-functional iterator.
+     *
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UTFIterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
+
+    /** Move constructor. @draft ICU 78 */
+    U_FORCE_INLINE UTFIterator(UTFIterator &&src) noexcept = default;
+    /** Move assignment operator. @draft ICU 78 */
+    U_FORCE_INLINE UTFIterator &operator=(UTFIterator &&src) noexcept = default;
+
+    /** Copy constructor. @draft ICU 78 */
+    U_FORCE_INLINE UTFIterator(const UTFIterator &other) = default;
+    /** Copy assignment operator. @draft ICU 78 */
+    U_FORCE_INLINE UTFIterator &operator=(const UTFIterator &other) = default;
+
+    /**
+     * @param other Another iterator
+     * @return true if this iterator is at the same position as the other one
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE bool operator==(const UTFIterator &other) const {
+        return getLogicalPosition() == other.getLogicalPosition();
+    }
+    /**
+     * @param other Another iterator
+     * @return true if this iterator is not at the same position as the other one
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE bool operator!=(const UTFIterator &other) const { return !operator==(other); }
+
+    // Asymmetric equality & nonequality with a sentinel type.
+
+    /**
+     * @param iter A UTFIterator
+     * @param s A unit iterator sentinel
+     * @return true if the iterator’s position is equal to the sentinel
+     * @draft ICU 78
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const UTFIterator &iter, const Sentinel &s) {
+        return iter.getLogicalPosition() == s;
+    }
+
+#if U_CPLUSPLUS_VERSION < 20
+    // C++17: Need to define all four combinations of == / != vs. parameter order.
+    // Once we require C++20, we could remove all but the first == because
+    // the compiler would generate the rest.
+
+    /**
+     * @param s A unit iterator sentinel
+     * @param iter A UTFIterator
+     * @return true if the iterator’s position is equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const Sentinel &s, const UTFIterator &iter) {
+        return iter.getLogicalPosition() == s;
+    }
+    /**
+     * @param iter A UTFIterator
+     * @param s A unit iterator sentinel
+     * @return true if the iterator’s position is not equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const UTFIterator &iter, const Sentinel &s) { return !(iter == s); }
+    /**
+     * @param s A unit iterator sentinel
+     * @param iter A UTFIterator
+     * @return true if the iterator’s position is not equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const Sentinel &s, const UTFIterator &iter) { return !(iter == s); }
+#endif  // C++17
+
+    /**
+     * Decodes the code unit sequence at the current position.
+     *
+     * @return CodeUnits with the decoded code point etc.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE CodeUnits<CP32, UnitIter> operator*() const {
+        if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_, limit_);
+            state_ = 1;
+        }
+        return units_;
+    }
+
+    /**
+     * Decodes the code unit sequence at the current position.
+     * Used like <code>iter->codePoint()</code> or <code>iter->stringView()</code> etc.
+     *
+     * @return CodeUnits with the decoded code point etc., wrapped into
+     *     an opaque proxy object so that <code>iter->codePoint()</code> etc. works.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE Proxy operator->() const {
+        if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_, limit_);
+            state_ = 1;
+        }
+        return Proxy(units_);
+    }
+
+    /**
+     * Pre-increment operator.
+     *
+     * @return this iterator
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UTFIterator &operator++() {  // pre-increment
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            state_ = 0;
+        } else if (state_ == 0) {
+            Impl::inc(p_, limit_);
+        } else /* state_ < 0 */ {
+            // operator--() called decAndRead() so we know how far to skip.
+            p_ = units_.end();
+            state_ = 0;
+        }
+        return *this;
+    }
+
+    /**
+     * Post-increment operator.
+     *
+     * @return a copy of this iterator from before the increment.
+     *     If UnitIter is a single-pass input_iterator, then this function
+     *     returns an opaque proxy object so that <code>*iter++</code> still works.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UTFIterator operator++(int) {  // post-increment
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            UTFIterator result(*this);
+            state_ = 0;
+            return result;
+        } else if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_, limit_);
+            UTFIterator result(*this);
+            result.state_ = 1;
+            // keep this->state_ == 0
+            return result;
+        } else /* state_ < 0 */ {
+            UTFIterator result(*this);
+            // operator--() called decAndRead() so we know how far to skip.
+            p_ = units_.end();
+            state_ = 0;
+            return result;
+        }
+    }
+
+    /**
+     * Pre-decrement operator.
+     * Only enabled if UnitIter is a bidirectional_iterator (including a pointer).
+     *
+     * @return this iterator
+     * @draft ICU 78
+     */
+    template<typename Iter = UnitIter>
+    U_FORCE_INLINE
+    std::enable_if_t<prv::bidirectional_iterator<Iter>, UTFIterator &>
+    operator--() {  // pre-decrement
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is ahead of the logical position.
+            p_ = units_.begin();
+        }
+        units_ = Impl::decAndRead(start_, p_);
+        state_ = -1;
+        return *this;
+    }
+
+    /**
+     * Post-decrement operator.
+     * Only enabled if UnitIter is a bidirectional_iterator (including a pointer).
+     *
+     * @return a copy of this iterator from before the decrement.
+     * @draft ICU 78
+     */
+    template<typename Iter = UnitIter>
+    U_FORCE_INLINE
+    std::enable_if_t<prv::bidirectional_iterator<Iter>, UTFIterator>
+    operator--(int) {  // post-decrement
+        UTFIterator result(*this);
+        operator--();
+        return result;
+    }
+
+private:
+    friend class std::reverse_iterator<UTFIterator<CP32, behavior, UnitIter>>;
+
+    U_FORCE_INLINE UnitIter getLogicalPosition() const {
+        return state_ <= 0 ? p_ : units_.begin();
+    }
+
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // In a validating iterator, we need start_ & limit_ so that when we read a code point
+    // (forward or backward) we can test if there are enough code units.
+    UnitIter start_;
+    LimitIter limit_;
+    // Keep state so that we call readAndInc() only once for both operator*() and ++
+    // to make it easy for the compiler to optimize.
+    mutable CodeUnits<CP32, UnitIter> units_;
+    // >0: units_ = readAndInc(), p_ = units limit
+    //     which means that p_ is ahead of its logical position
+    //  0: initial state
+    // <0: units_ = decAndRead(), p_ = units start
+    mutable int8_t state_ = 0;
+};
+
+#ifndef U_IN_DOXYGEN
+// Partial template specialization for single-pass input iterator.
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter, typename LimitIter>
+class UTFIterator<
+        CP32, behavior,
+        UnitIter, LimitIter,
+        std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
+
+    // Proxy type for post-increment return value, to make *iter++ work.
+    // Also for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning CodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
+        CodeUnits<CP32, UnitIter> &operator*() { return units_; }
+        CodeUnits<CP32, UnitIter> *operator->() { return &units_; }
+    private:
+        CodeUnits<CP32, UnitIter> units_;
+    };
+
+public:
+    using value_type = CodeUnits<CP32, UnitIter>;
+    using reference = value_type;
+    using pointer = Proxy;
+    using difference_type = prv::iter_difference_t<UnitIter>;
+    using iterator_category = std::input_iterator_tag;
+
+    U_FORCE_INLINE UTFIterator(UnitIter p, LimitIter limit) : p_(std::move(p)), limit_(std::move(limit)) {}
+
+    // Constructs an iterator start or limit sentinel.
+    // Requires p to be copyable.
+    U_FORCE_INLINE explicit UTFIterator(UnitIter p) : p_(std::move(p)), limit_(p_) {}
+
+    U_FORCE_INLINE UTFIterator(UTFIterator &&src) noexcept = default;
+    U_FORCE_INLINE UTFIterator &operator=(UTFIterator &&src) noexcept = default;
+
+    U_FORCE_INLINE UTFIterator(const UTFIterator &other) = default;
+    U_FORCE_INLINE UTFIterator &operator=(const UTFIterator &other) = default;
+
+    U_FORCE_INLINE bool operator==(const UTFIterator &other) const {
+        return p_ == other.p_ && ahead_ == other.ahead_;
+        // Strictly speaking, we should check if the logical position is the same.
+        // However, we cannot advance, or do arithmetic with, a single-pass UnitIter.
+    }
+    U_FORCE_INLINE bool operator!=(const UTFIterator &other) const { return !operator==(other); }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const UTFIterator &iter, const Sentinel &s) {
+        return !iter.ahead_ && iter.p_ == s;
+    }
+
+#if U_CPLUSPLUS_VERSION < 20
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const Sentinel &s, const UTFIterator &iter) {
+        return !iter.ahead_ && iter.p_ == s;
+    }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const UTFIterator &iter, const Sentinel &s) { return !(iter == s); }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const Sentinel &s, const UTFIterator &iter) { return !(iter == s); }
+#endif  // C++17
+
+    U_FORCE_INLINE CodeUnits<CP32, UnitIter> operator*() const {
+        if (!ahead_) {
+            units_ = Impl::readAndInc(p_, p_, limit_);
+            ahead_ = true;
+        }
+        return units_;
+    }
+
+    U_FORCE_INLINE Proxy operator->() const {
+        if (!ahead_) {
+            units_ = Impl::readAndInc(p_, p_, limit_);
+            ahead_ = true;
+        }
+        return Proxy(units_);
+    }
+
+    U_FORCE_INLINE UTFIterator &operator++() {  // pre-increment
+        if (ahead_) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            ahead_ = false;
+        } else {
+            Impl::inc(p_, limit_);
+        }
+        return *this;
+    }
+
+    U_FORCE_INLINE Proxy operator++(int) {  // post-increment
+        if (ahead_) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            ahead_ = false;
+        } else {
+            units_ = Impl::readAndInc(p_, p_, limit_);
+            // keep this->ahead_ == false
+        }
+        return Proxy(units_);
+    }
+
+private:
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // In a validating iterator, we need limit_ so that when we read a code point
+    // we can test if there are enough code units.
+    LimitIter limit_;
+    // Keep state so that we call readAndInc() only once for both operator*() and ++
+    // so that we can use a single-pass input iterator for UnitIter.
+    mutable CodeUnits<CP32, UnitIter> units_ = {0, 0, false};
+    // true: units_ = readAndInc(), p_ = units limit
+    //     which means that p_ is ahead of its logical position
+    // false: initial state
+    mutable bool ahead_ = false;
+};
+#endif  // U_IN_DOXYGEN
+
+}  // namespace U_HEADER_ONLY_NAMESPACE
+
+#ifndef U_IN_DOXYGEN
+// Bespoke specialization of reverse_iterator.
+// The default implementation implements reverse operator*() and ++ in a way
+// that does most of the same work twice for reading variable-length sequences.
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter>
+class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
+    using CodeUnits_ = U_HEADER_ONLY_NAMESPACE::CodeUnits<CP32, UnitIter>;
+
+    // Proxy type for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning CodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(CodeUnits_ units) : units_(units) {}
+        CodeUnits_ &operator*() { return units_; }
+        CodeUnits_ *operator->() { return &units_; }
+    private:
+        CodeUnits_ units_;
+    };
+
+public:
+    using value_type = CodeUnits_;
+    using reference = value_type;
+    using pointer = Proxy;
+    using difference_type = U_HEADER_ONLY_NAMESPACE::prv::iter_difference_t<UnitIter>;
+    using iterator_category = std::bidirectional_iterator_tag;
+
+    U_FORCE_INLINE explicit reverse_iterator(U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter> iter) :
+            p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
+            units_(0, 0, false, p_, p_) {}
+    U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
+
+    U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept = default;
+    U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
+
+    U_FORCE_INLINE reverse_iterator(const reverse_iterator &other) = default;
+    U_FORCE_INLINE reverse_iterator &operator=(const reverse_iterator &other) = default;
+
+    U_FORCE_INLINE bool operator==(const reverse_iterator &other) const {
+        return getLogicalPosition() == other.getLogicalPosition();
+    }
+    U_FORCE_INLINE bool operator!=(const reverse_iterator &other) const { return !operator==(other); }
+
+    U_FORCE_INLINE CodeUnits_ operator*() const {
+        if (state_ == 0) {
+            units_ = Impl::decAndRead(start_, p_);
+            state_ = -1;
+        }
+        return units_;
+    }
+
+    U_FORCE_INLINE Proxy operator->() const {
+        if (state_ == 0) {
+            units_ = Impl::decAndRead(start_, p_);
+            state_ = -1;
+        }
+        return Proxy(units_);
+    }
+
+    U_FORCE_INLINE reverse_iterator &operator++() {  // pre-increment
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is already behind.
+            state_ = 0;
+        } else if (state_ == 0) {
+            Impl::dec(start_, p_);
+        } else /* state_ > 0 */ {
+            // operator--() called readAndInc() so we know how far to skip.
+            p_ = units_.begin();
+            state_ = 0;
+        }
+        return *this;
+    }
+
+    U_FORCE_INLINE reverse_iterator operator++(int) {  // post-increment
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is already behind.
+            reverse_iterator result(*this);
+            state_ = 0;
+            return result;
+        } else if (state_ == 0) {
+            units_ = Impl::decAndRead(start_, p_);
+            reverse_iterator result(*this);
+            result.state_ = -1;
+            // keep this->state_ == 0
+            return result;
+        } else /* state_ > 0 */ {
+            reverse_iterator result(*this);
+            // operator--() called readAndInc() so we know how far to skip.
+            p_ = units_.begin();
+            state_ = 0;
+            return result;
+        }
+    }
+
+    U_FORCE_INLINE reverse_iterator &operator--() {  // pre-decrement
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is behind the logical position.
+            p_ = units_.end();
+        }
+        UnitIter p0 = p_;
+        units_ = Impl::readAndInc(p0, p_, limit_);
+        state_ = 1;
+        return *this;
+    }
+
+    U_FORCE_INLINE reverse_iterator operator--(int) {  // post-decrement
+        reverse_iterator result(*this);
+        operator--();
+        return result;
+    }
+
+private:
+    U_FORCE_INLINE UnitIter getLogicalPosition() const {
+        return state_ >= 0 ? p_ : units_.end();
+    }
+
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // In a validating iterator, we need start_ & limit_ so that when we read a code point
+    // (forward or backward) we can test if there are enough code units.
+    UnitIter start_;
+    UnitIter limit_;
+    // Keep state so that we call decAndRead() only once for both operator*() and ++
+    // to make it easy for the compiler to optimize.
+    mutable CodeUnits_ units_;
+    // >0: units_ = readAndInc(), p_ = units limit
+    //  0: initial state
+    // <0: units_ = decAndRead(), p_ = units start
+    //     which means that p_ is behind its logical position
+    mutable int8_t state_ = 0;
+};
+#endif  // U_IN_DOXYGEN
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+/**
+ * UTFIterator factory function for start <= p < limit.
+ * Deduces the UnitIter and LimitIter template parameters from the inputs.
+ * Only enabled if UnitIter is a (multi-pass) forward_iterator or better.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam UnitIter Can usually be omitted/deduced:
+ *     An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @tparam LimitIter Either the same as UnitIter, or an iterator sentinel type.
+ * @param start start code unit iterator
+ * @param p current-position code unit iterator
+ * @param limit limit (exclusive-end) code unit iterator.
+ *     When using a code unit sentinel (UnitIter≠LimitIter),
+ *     then that sentinel also works as a sentinel for the code point iterator.
+ * @return a UTFIterator&lt;CP32, behavior, UnitIter&gt;
+ *     for the given code unit iterators or character pointers
+ * @draft ICU 78
+ */
+template<typename CP32, UTFIllFormedBehavior behavior,
+         typename UnitIter, typename LimitIter = UnitIter>
+auto utfIterator(UnitIter start, UnitIter p, LimitIter limit) {
+    return UTFIterator<CP32, behavior, UnitIter, LimitIter>(
+        std::move(start), std::move(p), std::move(limit));
+}
+
+/**
+ * UTFIterator factory function for start = p < limit.
+ * Deduces the UnitIter and LimitIter template parameters from the inputs.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam UnitIter Can usually be omitted/deduced:
+ *     An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @tparam LimitIter Either the same as UnitIter, or an iterator sentinel type.
+ * @param p start and current-position code unit iterator
+ * @param limit limit (exclusive-end) code unit iterator.
+ *     When using a code unit sentinel (UnitIter≠LimitIter),
+ *     then that sentinel also works as a sentinel for the code point iterator.
+ * @return a UTFIterator&lt;CP32, behavior, UnitIter&gt;
+ *     for the given code unit iterators or character pointers
+ * @draft ICU 78
+ */
+template<typename CP32, UTFIllFormedBehavior behavior,
+         typename UnitIter, typename LimitIter = UnitIter>
+auto utfIterator(UnitIter p, LimitIter limit) {
+    return UTFIterator<CP32, behavior, UnitIter, LimitIter>(
+        std::move(p), std::move(limit));
+}
+
+// Note: We should only enable the following factory function for a copyable UnitIter.
+// In C++17, we would have to partially specialize with enable_if_t testing for forward_iterator,
+// but a function template partial specialization is not allowed.
+// In C++20, we might be able to require the std::copyable concept.
+
+/**
+ * UTFIterator factory function for a start or limit sentinel.
+ * Deduces the UnitIter template parameter from the input.
+ * Requires UnitIter to be copyable.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam UnitIter Can usually be omitted/deduced:
+ *     An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @param p code unit iterator.
+ *     When using a code unit sentinel,
+ *     then that sentinel also works as a sentinel for the code point iterator.
+ * @return a UTFIterator&lt;CP32, behavior, UnitIter&gt;
+ *     for the given code unit iterator or character pointer
+ * @draft ICU 78
+ */
+template<typename CP32, UTFIllFormedBehavior behavior, typename UnitIter>
+auto utfIterator(UnitIter p) {
+    return UTFIterator<CP32, behavior, UnitIter>(std::move(p));
+}
+
+/**
+ * A C++ "range" for validating iteration over all of the code points of a code unit range.
+ *
+ * Call utfStringCodePoints() to have the compiler deduce the Range type.
+ *
+ * UTFStringCodePoints is conditionally borrowed; that is, if Range is a borrowed range
+ * so is UTFStringCodePoints<CP32, behavior, Range>.
+ * Note that when given a range r that is an lvalue and is not a view,  utfStringCodePoints(r) uses a
+ * ref_view of r as the Range type, which is a borrowed range.
+ * In practice, this means that given a container variable r, the iterators of utfStringCodePoints(r) can
+ * be used as long as iterators on r are valid, without having to keep utfStringCodePoints(r) around.
+ * For instance:
+ * \code
+ *     std::u8string s = "𒇧𒇧";
+ *     // it outlives utfStringCodePoints<char32_t>(s).
+ *     auto it = utfStringCodePoints<char32_t>(s).begin();
+ *     ++it;
+ *     char32_t second_code_point = it->codePoint();  // OK.
+ * \endcode
+ * 
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if UTF_BEHAVIOR_NEGATIVE
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam Range A C++ "range" of Unicode UTF-8/16/32 code units
+ * @draft ICU 78
+ * @see utfStringCodePoints
+ */
+template<typename CP32, UTFIllFormedBehavior behavior, typename Range>
+class UTFStringCodePoints {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    /**
+     * Constructs an empty C++ "range" object.
+     * @draft ICU 78
+     */
+    UTFStringCodePoints() = default;
+
+    /**
+     * Constructs a C++ "range" object over the code points in the string.
+     * @param unitRange input range
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<!std::is_reference_v<R>>>
+    explicit UTFStringCodePoints(Range unitRange) : unitRange(std::move(unitRange)) {}
+    /**
+     * Constructs a C++ "range" object over the code points in the string,
+     * keeping a reference to the code unit range.  This overload is used by
+     * utfStringCodePoints in C++17; in C+20, a ref_view is used instead (via
+     * views::all).
+     * @param unitRange input range
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<std::is_reference_v<R>>, typename = void>
+    explicit UTFStringCodePoints(Range unitRange) : unitRange(unitRange) {}
+
+    /** Copy constructor. @draft ICU 78 */
+    UTFStringCodePoints(const UTFStringCodePoints &other) = default;
+
+    /** Copy assignment operator. @draft ICU 78 */
+    UTFStringCodePoints &operator=(const UTFStringCodePoints &other) = default;
+
+    /**
+     * @return the range start iterator
+     * @draft ICU 78
+     */
+    auto begin() {
+        return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
+    }
+
+    /**
+     * @return the range start iterator
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<prv::range<const R>>>
+    auto begin() const {
+        return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
+    }
+
+    /**
+     * @return the range limit (exclusive end) iterator
+     * @draft ICU 78
+     */
+    auto end() {
+        using UnitIter = decltype(unitRange.begin());
+        using LimitIter = decltype(unitRange.end());
+        if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
+            // Return the code unit sentinel.
+            return unitRange.end();
+        } else if constexpr (prv::bidirectional_iterator<UnitIter>) {
+            return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
+        } else {
+            // The input iterator specialization has no three-argument constructor.
+            return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
+        }
+    }
+
+    /**
+     * @return the range limit (exclusive end) iterator
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<prv::range<const R>>>
+    auto end() const {
+        using UnitIter = decltype(unitRange.begin());
+        using LimitIter = decltype(unitRange.end());
+        if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
+            // Return the code unit sentinel.
+            return unitRange.end();
+        } else if constexpr (prv::bidirectional_iterator<UnitIter>) {
+            return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
+        } else {
+            // The input iterator specialization has no three-argument constructor.
+            return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
+        }
+    }
+
+    /**
+     * @return std::reverse_iterator(end())
+     * @draft ICU 78
+     */
+    auto rbegin() const {
+        return std::make_reverse_iterator(end());
+    }
+
+    /**
+     * @return std::reverse_iterator(begin())
+     * @draft ICU 78
+     */
+    auto rend() const {
+        return std::make_reverse_iterator(begin());
+    }
+
+private:
+    Range unitRange;
+};
+
+/** @internal */
+template<typename CP32, UTFIllFormedBehavior behavior>
+struct UTFStringCodePointsAdaptor
+#if U_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 &&                                         \
+    __cpp_lib_bind_back >= 2022'02 // http://wg21.link/P2387R3.
+    : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
+#endif
+{
+    /** @internal */
+    template<typename Range>
+    auto operator()(Range &&unitRange) const {
+#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10  // We need https://wg21.link/P2415R2.
+        return UTFStringCodePoints<CP32, behavior, std::ranges::views::all_t<Range>>(
+            std::forward<Range>(unitRange));
+#else
+        if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
+            // Take basic_string_view by copy, not by reference.  In C++20 this is handled by
+            // all_t<Range>, which is Range if Range is a view.
+            return UTFStringCodePoints<CP32, behavior, std::decay_t<Range>>(
+                std::forward<Range>(unitRange));
+        } else {
+            return UTFStringCodePoints<CP32, behavior, Range>(std::forward<Range>(unitRange));
+        }
+#endif
+    }
+};
+
+/**
+ * Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of code
+ * points in a code unit range, which validates while decoding.
+ * Deduces the Range template parameter from the input, taking into account the value category: the
+ * code units will be referenced if possible, and moved if necessary.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t;
+ *              should be signed if UTF_BEHAVIOR_NEGATIVE
+ * @tparam behavior How to handle ill-formed Unicode strings
+ * @tparam Range A C++ "range" of Unicode UTF-8/16/32 code units
+ * @param unitRange input range
+ * @return a UTFStringCodePoints&lt;CP32, behavior, Range&gt; for the given unitRange
+ * @draft ICU 78
+ */
+template<typename CP32, UTFIllFormedBehavior behavior>
+constexpr UTFStringCodePointsAdaptor<CP32, behavior> utfStringCodePoints;
+
+// Non-validating iterators ------------------------------------------------ ***
+
+/**
+ * Non-validating iterator over the code points in a Unicode string.
+ * The string must be well-formed.
+ *
+ * The UnitIter can be
+ * an input_iterator, a forward_iterator, or a bidirectional_iterator (including a pointer).
+ * The UTFIterator will have the corresponding iterator_category.
+ *
+ * Call unsafeUTFIterator() to have the compiler deduce the UnitIter type.
+ *
+ * For reverse iteration, either use this iterator directly as in <code>*--iter</code>
+ * or wrap it using std::make_reverse_iterator(iter).
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam UnitIter An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @draft ICU 78
+ * @see unsafeUTFIterator
+ */
+template<typename CP32, typename UnitIter, typename = void>
+class UnsafeUTFIterator {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = UnsafeUTFImpl<CP32, UnitIter>;
+
+    // Proxy type for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning UnsafeCodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
+        UnsafeCodeUnits<CP32, UnitIter> &operator*() { return units_; }
+        UnsafeCodeUnits<CP32, UnitIter> *operator->() { return &units_; }
+    private:
+        UnsafeCodeUnits<CP32, UnitIter> units_;
+    };
+
+public:
+    /** C++ iterator boilerplate @internal */
+    using value_type = UnsafeCodeUnits<CP32, UnitIter>;
+    /** C++ iterator boilerplate @internal */
+    using reference = value_type;
+    /** C++ iterator boilerplate @internal */
+    using pointer = Proxy;
+    /** C++ iterator boilerplate @internal */
+    using difference_type = prv::iter_difference_t<UnitIter>;
+    /** C++ iterator boilerplate @internal */
+    using iterator_category = std::conditional_t<
+        prv::bidirectional_iterator<UnitIter>,
+        std::bidirectional_iterator_tag,
+        std::forward_iterator_tag>;
+
+    /**
+     * Constructor; the iterator/pointer should be at a code point boundary.
+     *
+     * When using a code unit sentinel,
+     * then that sentinel also works as a sentinel for this code point iterator.
+     *
+     * @param p Initial position inside the range, or a range sentinel
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE explicit UnsafeUTFIterator(UnitIter p) : p_(p), units_(0, 0, p, p) {}
+    /**
+     * Default constructor. Makes a non-functional iterator.
+     *
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UnsafeUTFIterator() : p_{}, units_(0, 0, p_, p_) {}
+
+    /** Move constructor. @draft ICU 78 */
+    U_FORCE_INLINE UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
+    /** Move assignment operator. @draft ICU 78 */
+    U_FORCE_INLINE UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
+
+    /** Copy constructor. @draft ICU 78 */
+    U_FORCE_INLINE UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
+    /** Copy assignment operator. @draft ICU 78 */
+    U_FORCE_INLINE UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
+
+    /**
+     * @param other Another iterator
+     * @return true if this iterator is at the same position as the other one
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE bool operator==(const UnsafeUTFIterator &other) const {
+        return getLogicalPosition() == other.getLogicalPosition();
+    }
+    /**
+     * @param other Another iterator
+     * @return true if this iterator is not at the same position as the other one
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE bool operator!=(const UnsafeUTFIterator &other) const { return !operator==(other); }
+
+    /**
+     * @param iter An UnsafeUTFIterator
+     * @param s A unit iterator sentinel
+     * @return true if the iterator’s position is equal to the sentinel
+     * @draft ICU 78
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const UnsafeUTFIterator &iter, const Sentinel &s) {
+        return iter.getLogicalPosition() == s;
+    }
+
+#if U_CPLUSPLUS_VERSION < 20
+    /**
+     * @param s A unit iterator sentinel
+     * @param iter An UnsafeUTFIterator
+     * @return true if the iterator’s position is equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const Sentinel &s, const UnsafeUTFIterator &iter) {
+        return iter.getLogicalPosition() == s;
+    }
+    /**
+     * @param iter An UnsafeUTFIterator
+     * @param s A unit iterator sentinel
+     * @return true if the iterator’s position is not equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const UnsafeUTFIterator &iter, const Sentinel &s) { return !(iter == s); }
+    /**
+     * @param s A unit iterator sentinel
+     * @param iter An UnsafeUTFIterator
+     * @return true if the iterator’s position is not equal to the sentinel
+     * @internal
+     */
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const Sentinel &s, const UnsafeUTFIterator &iter) { return !(iter == s); }
+#endif  // C++17
+
+    /**
+     * Decodes the code unit sequence at the current position.
+     *
+     * @return CodeUnits with the decoded code point etc.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UnsafeCodeUnits<CP32, UnitIter> operator*() const {
+        if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_);
+            state_ = 1;
+        }
+        return units_;
+    }
+
+    /**
+     * Decodes the code unit sequence at the current position.
+     * Used like <code>iter->codePoint()</code> or <code>iter->stringView()</code> etc.
+     *
+     * @return CodeUnits with the decoded code point etc., wrapped into
+     *     an opaque proxy object so that <code>iter->codePoint()</code> etc. works.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE Proxy operator->() const {
+        if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_);
+            state_ = 1;
+        }
+        return Proxy(units_);
+    }
+
+    /**
+     * Pre-increment operator.
+     *
+     * @return this iterator
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UnsafeUTFIterator &operator++() {  // pre-increment
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            state_ = 0;
+        } else if (state_ == 0) {
+            Impl::inc(p_);
+        } else /* state_ < 0 */ {
+            // operator--() called decAndRead() so we know how far to skip.
+            p_ = units_.end();
+            state_ = 0;
+        }
+        return *this;
+    }
+
+    /**
+     * Post-increment operator.
+     *
+     * @return a copy of this iterator from before the increment.
+     *     If UnitIter is a single-pass input_iterator, then this function
+     *     returns an opaque proxy object so that <code>*iter++</code> still works.
+     * @draft ICU 78
+     */
+    U_FORCE_INLINE UnsafeUTFIterator operator++(int) {  // post-increment
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            UnsafeUTFIterator result(*this);
+            state_ = 0;
+            return result;
+        } else if (state_ == 0) {
+            UnitIter p0 = p_;
+            units_ = Impl::readAndInc(p0, p_);
+            UnsafeUTFIterator result(*this);
+            result.state_ = 1;
+            // keep this->state_ == 0
+            return result;
+        } else /* state_ < 0 */ {
+            UnsafeUTFIterator result(*this);
+            // operator--() called decAndRead() so we know how far to skip.
+            p_ = units_.end();
+            state_ = 0;
+            return result;
+        }
+    }
+
+    /**
+     * Pre-decrement operator.
+     * Only enabled if UnitIter is a bidirectional_iterator (including a pointer).
+     *
+     * @return this iterator
+     * @draft ICU 78
+     */
+    template<typename Iter = UnitIter>
+    U_FORCE_INLINE
+    std::enable_if_t<prv::bidirectional_iterator<Iter>, UnsafeUTFIterator &>
+    operator--() {  // pre-decrement
+        if (state_ > 0) {
+            // operator*() called readAndInc() so p_ is ahead of the logical position.
+            p_ = units_.begin();
+        }
+        units_ = Impl::decAndRead(p_);
+        state_ = -1;
+        return *this;
+    }
+
+    /**
+     * Post-decrement operator.
+     * Only enabled if UnitIter is a bidirectional_iterator (including a pointer).
+     *
+     * @return a copy of this iterator from before the decrement.
+     * @draft ICU 78
+     */
+    template<typename Iter = UnitIter>
+    U_FORCE_INLINE
+    std::enable_if_t<prv::bidirectional_iterator<Iter>, UnsafeUTFIterator>
+    operator--(int) {  // post-decrement
+        UnsafeUTFIterator result(*this);
+        operator--();
+        return result;
+    }
+
+private:
+    friend class std::reverse_iterator<UnsafeUTFIterator<CP32, UnitIter>>;
+
+    U_FORCE_INLINE UnitIter getLogicalPosition() const {
+        return state_ <= 0 ? p_ : units_.begin();
+    }
+
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // Keep state so that we call readAndInc() only once for both operator*() and ++
+    // to make it easy for the compiler to optimize.
+    mutable UnsafeCodeUnits<CP32, UnitIter> units_;
+    // >0: units_ = readAndInc(), p_ = units limit
+    //     which means that p_ is ahead of its logical position
+    //  0: initial state
+    // <0: units_ = decAndRead(), p_ = units start
+    mutable int8_t state_ = 0;
+};
+
+#ifndef U_IN_DOXYGEN
+// Partial template specialization for single-pass input iterator.
+template<typename CP32, typename UnitIter>
+class UnsafeUTFIterator<
+        CP32,
+        UnitIter,
+        std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = UnsafeUTFImpl<CP32, UnitIter>;
+
+    // Proxy type for post-increment return value, to make *iter++ work.
+    // Also for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning UnsafeCodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
+        UnsafeCodeUnits<CP32, UnitIter> &operator*() { return units_; }
+        UnsafeCodeUnits<CP32, UnitIter> *operator->() { return &units_; }
+    private:
+        UnsafeCodeUnits<CP32, UnitIter> units_;
+    };
+
+public:
+    using value_type = UnsafeCodeUnits<CP32, UnitIter>;
+    using reference = value_type;
+    using pointer = Proxy;
+    using difference_type = prv::iter_difference_t<UnitIter>;
+    using iterator_category = std::input_iterator_tag;
+
+    U_FORCE_INLINE explicit UnsafeUTFIterator(UnitIter p) : p_(std::move(p)) {}
+
+    U_FORCE_INLINE UnsafeUTFIterator(UnsafeUTFIterator &&src) noexcept = default;
+    U_FORCE_INLINE UnsafeUTFIterator &operator=(UnsafeUTFIterator &&src) noexcept = default;
+
+    U_FORCE_INLINE UnsafeUTFIterator(const UnsafeUTFIterator &other) = default;
+    U_FORCE_INLINE UnsafeUTFIterator &operator=(const UnsafeUTFIterator &other) = default;
+
+    U_FORCE_INLINE bool operator==(const UnsafeUTFIterator &other) const {
+        return p_ == other.p_ && ahead_ == other.ahead_;
+        // Strictly speaking, we should check if the logical position is the same.
+        // However, we cannot advance, or do arithmetic with, a single-pass UnitIter.
+    }
+    U_FORCE_INLINE bool operator!=(const UnsafeUTFIterator &other) const { return !operator==(other); }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const UnsafeUTFIterator &iter, const Sentinel &s) {
+        return !iter.ahead_ && iter.p_ == s;
+    }
+
+#if U_CPLUSPLUS_VERSION < 20
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator==(const Sentinel &s, const UnsafeUTFIterator &iter) {
+        return !iter.ahead_ && iter.p_ == s;
+    }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const UnsafeUTFIterator &iter, const Sentinel &s) { return !(iter == s); }
+
+    template<typename Sentinel> U_FORCE_INLINE friend
+    std::enable_if_t<
+        !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
+        bool>
+    operator!=(const Sentinel &s, const UnsafeUTFIterator &iter) { return !(iter == s); }
+#endif  // C++17
+
+    U_FORCE_INLINE UnsafeCodeUnits<CP32, UnitIter> operator*() const {
+        if (!ahead_) {
+            units_ = Impl::readAndInc(p_, p_);
+            ahead_ = true;
+        }
+        return units_;
+    }
+
+    U_FORCE_INLINE Proxy operator->() const {
+        if (!ahead_) {
+            units_ = Impl::readAndInc(p_, p_);
+            ahead_ = true;
+        }
+        return Proxy(units_);
+    }
+
+    U_FORCE_INLINE UnsafeUTFIterator &operator++() {  // pre-increment
+        if (ahead_) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            ahead_ = false;
+        } else {
+            Impl::inc(p_);
+        }
+        return *this;
+    }
+
+    U_FORCE_INLINE Proxy operator++(int) {  // post-increment
+        if (ahead_) {
+            // operator*() called readAndInc() so p_ is already ahead.
+            ahead_ = false;
+        } else {
+            units_ = Impl::readAndInc(p_, p_);
+            // keep this->ahead_ == false
+        }
+        return Proxy(units_);
+    }
+
+private:
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // Keep state so that we call readAndInc() only once for both operator*() and ++
+    // so that we can use a single-pass input iterator for UnitIter.
+    mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
+    // true: units_ = readAndInc(), p_ = units limit
+    //     which means that p_ is ahead of its logical position
+    // false: initial state
+    mutable bool ahead_ = false;
+};
+#endif  // U_IN_DOXYGEN
+
+}  // namespace U_HEADER_ONLY_NAMESPACE
+
+#ifndef U_IN_DOXYGEN
+// Bespoke specialization of reverse_iterator.
+// The default implementation implements reverse operator*() and ++ in a way
+// that does most of the same work twice for reading variable-length sequences.
+template<typename CP32, typename UnitIter>
+class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+    using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
+    using UnsafeCodeUnits_ = U_HEADER_ONLY_NAMESPACE::UnsafeCodeUnits<CP32, UnitIter>;
+
+    // Proxy type for operator->() (required by LegacyInputIterator)
+    // so that we don't promise always returning UnsafeCodeUnits.
+    class Proxy {
+    public:
+        explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
+        UnsafeCodeUnits_ &operator*() { return units_; }
+        UnsafeCodeUnits_ *operator->() { return &units_; }
+    private:
+        UnsafeCodeUnits_ units_;
+    };
+
+public:
+    using value_type = UnsafeCodeUnits_;
+    using reference = value_type;
+    using pointer = Proxy;
+    using difference_type = U_HEADER_ONLY_NAMESPACE::prv::iter_difference_t<UnitIter>;
+    using iterator_category = std::bidirectional_iterator_tag;
+
+    U_FORCE_INLINE explicit reverse_iterator(U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter> iter) :
+            p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
+    U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
+
+    U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept = default;
+    U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept = default;
+
+    U_FORCE_INLINE reverse_iterator(const reverse_iterator &other) = default;
+    U_FORCE_INLINE reverse_iterator &operator=(const reverse_iterator &other) = default;
+
+    U_FORCE_INLINE bool operator==(const reverse_iterator &other) const {
+        return getLogicalPosition() == other.getLogicalPosition();
+    }
+    U_FORCE_INLINE bool operator!=(const reverse_iterator &other) const { return !operator==(other); }
+
+    U_FORCE_INLINE UnsafeCodeUnits_ operator*() const {
+        if (state_ == 0) {
+            units_ = Impl::decAndRead(p_);
+            state_ = -1;
+        }
+        return units_;
+    }
+
+    U_FORCE_INLINE Proxy operator->() const {
+        if (state_ == 0) {
+            units_ = Impl::decAndRead(p_);
+            state_ = -1;
+        }
+        return Proxy(units_);
+    }
+
+    U_FORCE_INLINE reverse_iterator &operator++() {  // pre-increment
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is already behind.
+            state_ = 0;
+        } else if (state_ == 0) {
+            Impl::dec(p_);
+        } else /* state_ > 0 */ {
+            // operator--() called readAndInc() so we know how far to skip.
+            p_ = units_.begin();
+            state_ = 0;
+        }
+        return *this;
+    }
+
+    U_FORCE_INLINE reverse_iterator operator++(int) {  // post-increment
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is already behind.
+            reverse_iterator result(*this);
+            state_ = 0;
+            return result;
+        } else if (state_ == 0) {
+            units_ = Impl::decAndRead(p_);
+            reverse_iterator result(*this);
+            result.state_ = -1;
+            // keep this->state_ == 0
+            return result;
+        } else /* state_ > 0 */ {
+            reverse_iterator result(*this);
+            // operator--() called readAndInc() so we know how far to skip.
+            p_ = units_.begin();
+            state_ = 0;
+            return result;
+        }
+    }
+
+    U_FORCE_INLINE reverse_iterator &operator--() {  // pre-decrement
+        if (state_ < 0) {
+            // operator*() called decAndRead() so p_ is behind the logical position.
+            p_ = units_.end();
+        }
+        UnitIter p0 = p_;
+        units_ = Impl::readAndInc(p0, p_);
+        state_ = 1;
+        return *this;
+    }
+
+    U_FORCE_INLINE reverse_iterator operator--(int) {  // post-decrement
+        reverse_iterator result(*this);
+        operator--();
+        return result;
+    }
+
+private:
+    U_FORCE_INLINE UnitIter getLogicalPosition() const {
+        return state_ >= 0 ? p_ : units_.end();
+    }
+
+    // operator*() etc. are logically const.
+    mutable UnitIter p_;
+    // Keep state so that we call decAndRead() only once for both operator*() and ++
+    // to make it easy for the compiler to optimize.
+    mutable UnsafeCodeUnits_ units_;
+    // >0: units_ = readAndInc(), p_ = units limit
+    //  0: initial state
+    // <0: units_ = decAndRead(), p_ = units start
+    //     which means that p_ is behind its logical position
+    mutable int8_t state_ = 0;
+};
+#endif  // U_IN_DOXYGEN
+
+namespace U_HEADER_ONLY_NAMESPACE {
+
+/**
+ * UnsafeUTFIterator factory function.
+ * Deduces the UnitIter template parameter from the input.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam UnitIter Can usually be omitted/deduced:
+ *     An iterator (often a pointer) that returns a code unit type:
+ *     UTF-8: char or char8_t or uint8_t;
+ *     UTF-16: char16_t or uint16_t or (on Windows) wchar_t;
+ *     UTF-32: char32_t or UChar32=int32_t or (on Linux) wchar_t
+ * @param iter code unit iterator
+ * @return an UnsafeUTFIterator&lt;CP32, UnitIter&gt;
+ *     for the given code unit iterator or character pointer
+ * @draft ICU 78
+ */
+template<typename CP32, typename UnitIter>
+auto unsafeUTFIterator(UnitIter iter) {
+    return UnsafeUTFIterator<CP32, UnitIter>(std::move(iter));
+}
+
+/**
+ * A C++ "range" for non-validating iteration over all of the code points of a code unit range.
+ * The string must be well-formed.
+ *
+ * Call unsafeUTFStringCodePoints() to have the compiler deduce the Range type.
+ *
+ * UnsafeUTFStringCodePoints is conditionally borrowed; that is, if Range is a borrowed range
+ * so is UnsafeUTFStringCodePoints<CP32, behavior, Range>.
+ * Note that when given a range r that is an lvalue and is not a view,  unsafeUTFStringCodePoints(r) uses
+ * a ref_view of r as the Range type, which is a borrowed range.
+ * In practice, this means that given a container variable r, the iterators of
+ * unsafeUTFStringCodePoints(r) can be used as long as iterators on r are valid, without having to keep
+ * unsafeUTFStringCodePoints(r) around.
+ * For instance:
+ * \code
+ *     std::u8string s = "𒇧𒇧";
+ *     // it outlives unsafeUTFStringCodePoints<char32_t>(s).
+ *     auto it = unsafeUTFStringCodePoints<char32_t>(s).begin();
+ *     ++it;
+ *     char32_t second_code_point = it->codePoint();  // OK.
+ * \endcode
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam Range A C++ "range" of Unicode UTF-8/16/32 code units
+ * @draft ICU 78
+ * @see unsafeUTFStringCodePoints
+ */
+template<typename CP32, typename Range>
+class UnsafeUTFStringCodePoints {
+    static_assert(sizeof(CP32) == 4, "CP32 must be a 32-bit type to hold a code point");
+public:
+    /**
+     * Constructs an empty C++ "range" object.
+     * @draft ICU 78
+     */
+    UnsafeUTFStringCodePoints() = default;
+
+    /**
+     * Constructs a C++ "range" object over the code points in the string.
+     * @param unitRange input range
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<!std::is_reference_v<R>>>
+    explicit UnsafeUTFStringCodePoints(Range unitRange) : unitRange(std::move(unitRange)) {}
+    /**
+     * Constructs a C++ "range" object over the code points in the string,
+     * keeping a reference to the code unit range.  This overload is used by
+     * utfStringCodePoints in C++17; in C++20, a ref_view is used instead (via
+     * views::all).
+     * @param unitRange input range
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<std::is_reference_v<R>>, typename = void>
+    explicit UnsafeUTFStringCodePoints(Range unitRange) : unitRange(unitRange) {}
+
+    /** Copy constructor. @draft ICU 78 */
+    UnsafeUTFStringCodePoints(const UnsafeUTFStringCodePoints &other) = default;
+
+    /** Copy assignment operator. @draft ICU 78 */
+    UnsafeUTFStringCodePoints &operator=(const UnsafeUTFStringCodePoints &other) = default;
+
+    /**
+     * @return the range start iterator
+     * @draft ICU 78
+     */
+    auto begin() {
+        return unsafeUTFIterator<CP32>(unitRange.begin());
+    }
+
+    /**
+     * @return the range start iterator
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<prv::range<const R>>>
+    auto begin() const {
+        return unsafeUTFIterator<CP32>(unitRange.begin());
+    }
+
+    /**
+     * @return the range limit (exclusive end) iterator
+     * @draft ICU 78
+     */
+    auto end() {
+        using UnitIter = decltype(unitRange.begin());
+        using LimitIter = decltype(unitRange.end());
+        if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
+            // Return the code unit sentinel.
+            return unitRange.end();
+        } else {
+            return unsafeUTFIterator<CP32>(unitRange.end());
+        }
+    }
+
+    /**
+     * @return the range limit (exclusive end) iterator
+     * @draft ICU 78
+     */
+    template<typename R = Range, typename = std::enable_if_t<prv::range<const R>>>
+    auto end() const {
+        using UnitIter = decltype(unitRange.begin());
+        using LimitIter = decltype(unitRange.end());
+        if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
+            // Return the code unit sentinel.
+            return unitRange.end();
+        } else {
+            return unsafeUTFIterator<CP32>(unitRange.end());
+        }
+    }
+
+    /**
+     * @return std::reverse_iterator(end())
+     * @draft ICU 78
+     */
+    auto rbegin() const {
+        return std::make_reverse_iterator(end());
+    }
+
+    /**
+     * @return std::reverse_iterator(begin())
+     * @draft ICU 78
+     */
+    auto rend() const {
+        return std::make_reverse_iterator(begin());
+    }
+
+private:
+    Range unitRange;
+};
+
+/** @internal */
+template<typename CP32>
+struct UnsafeUTFStringCodePointsAdaptor
+#if U_CPLUSPLUS_VERSION >= 23 && __cpp_lib_ranges >= 2022'02 &&                                         \
+    __cpp_lib_bind_back >= 2022'02 // http://wg21.link/P2387R3.
+    : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
+#endif
+{
+    /** @internal */
+    template<typename Range>
+    auto operator()(Range &&unitRange) const {
+#if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10  // We need https://wg21.link/P2415R2.
+        return UnsafeUTFStringCodePoints<CP32, std::ranges::views::all_t<Range>>(std::forward<Range>(unitRange));
+#else
+        if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
+            // Take basic_string_view by copy, not by reference.  In C++20 this is handled by
+            // all_t<Range>, which is Range if Range is a view.
+            return UnsafeUTFStringCodePoints<CP32, std::decay_t<Range>>(std::forward<Range>(unitRange));
+        } else {
+            return UnsafeUTFStringCodePoints<CP32, Range>(std::forward<Range>(unitRange));
+        }
+#endif
+    }
+};
+
+
+/**
+ * Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a
+ * "range" of code points in a code unit range. The string must be well-formed.
+ * Deduces the Range template parameter from the input, taking into account the value category: the
+ * code units will be referenced if possible, and moved if necessary.
+ *
+ * @tparam CP32 Code point type: UChar32 (=int32_t) or char32_t or uint32_t
+ * @tparam Range A C++ "range" of Unicode UTF-8/16/32 code units
+ * @param unitRange input range
+ * @return an UnsafeUTFStringCodePoints&lt;CP32, Range&gt; for the given unitRange
+ * @draft ICU 78
+ */
+template<typename CP32>
+constexpr UnsafeUTFStringCodePointsAdaptor<CP32> unsafeUTFStringCodePoints;
+
+}  // namespace U_HEADER_ONLY_NAMESPACE
+
+
+#if defined(__cpp_lib_ranges)
+template <typename CP32, UTFIllFormedBehavior behavior, typename Range>
+constexpr bool std::ranges::enable_borrowed_range<
+    U_HEADER_ONLY_NAMESPACE::UTFStringCodePoints<CP32, behavior, Range>> =
+    std::ranges::enable_borrowed_range<Range>;
+
+template <typename CP32, typename Range>
+constexpr bool std::ranges::enable_borrowed_range<
+    U_HEADER_ONLY_NAMESPACE::UnsafeUTFStringCodePoints<CP32, Range>> =
+    std::ranges::enable_borrowed_range<Range>;
+#endif
+
+#endif  // U_HIDE_DRAFT_API
+#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#endif  // __UTFITERATOR_H__

+ 161 - 0
thirdparty/icu4c/common/unicode/utfstring.h

@@ -0,0 +1,161 @@
+// © 2025 and later: Unicode, Inc. and others.
+// License & terms of use: https://www.unicode.org/copyright.html
+
+// utfstring.h
+// created: 2025jul18 Markus W. Scherer
+
+#ifndef __UTFSTRING_H__
+#define __UTFSTRING_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
+
+#include "unicode/utf16.h"
+
+/**
+ * \file
+ * \brief C++ header-only API: C++ string helper functions.
+ */
+
+#ifndef U_HIDE_DRAFT_API
+
+namespace U_HEADER_ONLY_NAMESPACE {
+namespace utfstring {
+
+// Write code points to strings -------------------------------------------- ***
+
+#ifndef U_IN_DOXYGEN
+namespace prv {
+
+// This function, and the public wrappers,
+// want to be U_FORCE_INLINE but the gcc-debug-build-and-test CI check failed with
+// error: ‘always_inline’ function might not be inlinable [-Werror=attributes]
+template<typename StringClass, bool validate>
+inline StringClass &appendCodePoint(StringClass &s, uint32_t c) {
+    using Unit = typename StringClass::value_type;
+    if constexpr (sizeof(Unit) == 1) {
+        // UTF-8: Similar to U8_APPEND().
+        if (c <= 0x7f) {
+            s.push_back(static_cast<Unit>(c));
+        } else {
+            Unit buf[4];
+            uint8_t len;
+            if (c <= 0x7ff) {
+                len = 2;
+                buf[2] = (c >> 6) | 0xc0;
+            } else {
+                if (validate ?
+                        c < 0xd800 ||
+                            (c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
+                        c <= 0xffff) {
+                    len = 3;
+                    buf[1] = (c >> 12) | 0xe0;
+                } else {
+                    len = 4;
+                    buf[0] = (c >> 18) | 0xf0;
+                    buf[1] = ((c >> 12) & 0x3f) | 0x80;
+                }
+                buf[2] = ((c >> 6) & 0x3f) | 0x80;
+            }
+            buf[3] = (c & 0x3f) | 0x80;
+            s.append(buf + 4 - len, len);
+        }
+    } else if constexpr (sizeof(Unit) == 2) {
+        // UTF-16: Similar to U16_APPEND().
+        if (validate ?
+                c < 0xd800 || (c < 0xe000 || c > 0x10ffff ? (c = 0xfffd, true) : c <= 0xffff) :
+                c <= 0xffff) {
+            s.push_back(static_cast<Unit>(c));
+        } else {
+            Unit buf[2] = { U16_LEAD(c), U16_TRAIL(c) };
+            s.append(buf, 2);
+        }
+    } else {
+        // UTF-32
+        s.push_back(!validate || U_IS_SCALAR_VALUE(c) ? c : 0xfffd);
+    }
+    return s;
+}
+
+}  // namespace prv
+#endif  // U_IN_DOXYGEN
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Appends the code point to the string.
+ * Appends the U+FFFD replacement character instead if c is not a scalar value.
+ * See https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * @tparam StringClass A version of std::basic_string (or a compatible type)
+ * @param s The string to append to
+ * @param c The code point to append
+ * @return s
+ * @draft ICU 78
+ * @see U_IS_SCALAR_VALUE
+ */
+template<typename StringClass>
+inline StringClass &appendOrFFFD(StringClass &s, UChar32 c) {
+    return prv::appendCodePoint<StringClass, true>(s, c);
+}
+
+/**
+ * Appends the code point to the string.
+ * The code point must be a scalar value; otherwise the behavior is undefined.
+ * See https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * @tparam StringClass A version of std::basic_string (or a compatible type)
+ * @param s The string to append to
+ * @param c The code point to append (must be a scalar value)
+ * @return s
+ * @draft ICU 78
+ * @see U_IS_SCALAR_VALUE
+ */
+template<typename StringClass>
+inline StringClass &appendUnsafe(StringClass &s, UChar32 c) {
+    return prv::appendCodePoint<StringClass, false>(s, c);
+}
+
+/**
+ * Returns the code point as a string of code units.
+ * Returns the U+FFFD replacement character instead if c is not a scalar value.
+ * See https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * @tparam StringClass A version of std::basic_string (or a compatible type)
+ * @param c The code point
+ * @return the string of c's code units
+ * @draft ICU 78
+ * @see U_IS_SCALAR_VALUE
+ */
+template<typename StringClass>
+inline StringClass encodeOrFFFD(UChar32 c) {
+    StringClass s;
+    prv::appendCodePoint<StringClass, true>(s, c);
+    return s;
+}
+
+/**
+ * Returns the code point as a string of code units.
+ * The code point must be a scalar value; otherwise the behavior is undefined.
+ * See https://www.unicode.org/glossary/#unicode_scalar_value
+ *
+ * @tparam StringClass A version of std::basic_string (or a compatible type)
+ * @param c The code point
+ * @return the string of c's code units
+ * @draft ICU 78
+ * @see U_IS_SCALAR_VALUE
+ */
+template<typename StringClass>
+inline StringClass encodeUnsafe(UChar32 c) {
+    StringClass s;
+    prv::appendCodePoint<StringClass, false>(s, c);
+    return s;
+}
+#endif  // U_HIDE_DRAFT_API
+
+}  // namespace utfstring
+}  // namespace U_HEADER_ONLY_NAMESPACE
+
+#endif  // U_HIDE_DRAFT_API
+#endif  // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
+#endif  // __UTFSTRING_H__

+ 79 - 0
thirdparty/icu4c/common/unicode/utypes.h

@@ -384,6 +384,85 @@ typedef double UDate;
 #define U_TOOLUTIL_API U_IMPORT
 #endif
 
+#ifndef U_FORCE_HIDE_DRAFT_API
+
+/**
+ * \def U_DATA_API_CLASS
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_COMMON_API_CLASS
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_I18N_API_CLASS
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_LAYOUT_API_CLASS
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_LAYOUTEX_API_CLASS
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_IO_API_CLASS
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+/**
+ * \def U_TOOLUTIL_API_CLASS
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside, to be used on a class.
+ * @draft ICU 78
+ */
+
+// When used on Windows, the U_..._API macros expand to __declspec(dllexport)
+// and __declspec(dllimport), which when used on a class results in all members
+// of the class being exported, including private members, which is problematic
+// for classes that have private members that can't be exported (such as
+// templates from the standard library):
+//
+// https://learn.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-1-c4251
+//
+#if U_PLATFORM_HAS_WIN32_API
+#define U_DATA_API_CLASS
+#define U_COMMON_API_CLASS
+#define U_I18N_API_CLASS
+#define U_LAYOUT_API_CLASS
+#define U_LAYOUTEX_API_CLASS
+#define U_IO_API_CLASS
+#define U_TOOLUTIL_API_CLASS
+#else
+#define U_DATA_API_CLASS     U_DATA_API
+#define U_COMMON_API_CLASS   U_COMMON_API
+#define U_I18N_API_CLASS     U_I18N_API
+#define U_LAYOUT_API_CLASS   U_LAYOUT_API
+#define U_LAYOUTEX_API_CLASS U_LAYOUTEX_API
+#define U_IO_API_CLASS       U_IO_API
+#define U_TOOLUTIL_API_CLASS U_TOOLUTIL_API
+#endif
+
+#endif  // U_FORCE_HIDE_DRAFT_API
+
 /**
  * \def U_STANDARD_CPP_NAMESPACE
  * Control of C++ Namespace

+ 5 - 5
thirdparty/icu4c/common/unicode/uvernum.h

@@ -53,7 +53,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @stable ICU 2.4
  */
-#define U_ICU_VERSION_MAJOR_NUM 77
+#define U_ICU_VERSION_MAJOR_NUM 78
 
 /** The current ICU minor version as an integer.
  *  This value will change in the subsequent releases of ICU
@@ -79,7 +79,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @stable ICU 2.6
  */
-#define U_ICU_VERSION_SUFFIX _77
+#define U_ICU_VERSION_SUFFIX _78
 
 /**
  * \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -132,7 +132,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @stable ICU 2.4
  */
-#define U_ICU_VERSION "77.1"
+#define U_ICU_VERSION "78.1"
 
 /**
  * The current ICU library major version number as a string, for library name suffixes.
@@ -145,13 +145,13 @@
  *
  * @stable ICU 2.6
  */
-#define U_ICU_VERSION_SHORT "77"
+#define U_ICU_VERSION_SHORT "78"
 
 #ifndef U_HIDE_INTERNAL_API
 /** Data version in ICU4C.
  * @internal ICU 4.4 Internal Use Only
  **/
-#define U_ICU_DATA_VERSION "77.1"
+#define U_ICU_DATA_VERSION "78.1"
 #endif  /* U_HIDE_INTERNAL_API */
 
 /*===========================================================================

+ 4 - 4
thirdparty/icu4c/common/unicode/uversion.h

@@ -125,7 +125,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
         U_NAMESPACE_USE
 #   endif
 
-#ifndef U_FORCE_HIDE_DRAFT_API
 /**
  * \def U_HEADER_NESTED_NAMESPACE
  * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs.
@@ -136,7 +135,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
  * this is always "header". Header-only types are not marked for export,
  * which on Windows already avoids callers linking with library instantiations.
  *
- * @draft ICU 76
+ * @stable ICU 76
  * @see U_HEADER_ONLY_NAMESPACE
  */
 
@@ -147,9 +146,10 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
  * "U_ICU_NAMESPACE::header" or "U_ICU_NAMESPACE::internal",
  * see U_HEADER_NESTED_NAMESPACE for details.
  *
- * @draft ICU 76
+ * @stable ICU 76
  */
 
+#ifndef U_FORCE_HIDE_DRAFT_API
 /**
  * \def U_ICU_NAMESPACE_OR_INTERNAL
  * Namespace used for header-only APIs that used to be regular C++ APIs.
@@ -159,6 +159,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
  *
  * @draft ICU 77
  */
+#endif  // U_FORCE_HIDE_DRAFT_API
 
 // The first test is the same as for defining U_EXPORT for Windows.
 #if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
@@ -180,7 +181,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
 #define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE
 
 namespace U_HEADER_ONLY_NAMESPACE {}
-#endif  // U_FORCE_HIDE_DRAFT_API
 
 #endif /* __cplusplus */
 

+ 3 - 3
thirdparty/icu4c/common/uniset.cpp

@@ -974,12 +974,12 @@ void UnicodeSet::_add(const UnicodeString& s) {
         setToBogus();
         return;
     }
-    UnicodeString* t = new UnicodeString(s);
-    if (t == nullptr) { // Check for memory allocation error.
+    LocalPointer<UnicodeString> t(new UnicodeString(s));
+    if (t.isNull()) { // Check for memory allocation error.
         setToBogus();
         return;
     }
-    strings_->sortedInsert(t, compareUnicodeString, ec);
+    strings_->sortedInsert(t.orphan(), compareUnicodeString, ec);
     if (U_FAILURE(ec)) {
         setToBogus();
     }

+ 15 - 10
thirdparty/icu4c/common/unistr_cnv.cpp

@@ -275,20 +275,24 @@ UnicodeString::doExtract(int32_t start, int32_t length,
     }
 
     // perform the conversion
-    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
+    UErrorCode bufferStatus = U_ZERO_ERROR;
+    ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
     length = static_cast<int32_t>(dest - originalDest);
 
     // if an overflow occurs, then get the preflighting length
-    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+    if(bufferStatus==U_BUFFER_OVERFLOW_ERROR) {
         char buffer[1024];
 
         destLimit=buffer+sizeof(buffer);
         do {
             dest=buffer;
-            errorCode=U_ZERO_ERROR;
-            ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
+            bufferStatus=U_ZERO_ERROR;
+            ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &bufferStatus);
             length += static_cast<int32_t>(dest - buffer);
-        } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
+        } while(bufferStatus==U_BUFFER_OVERFLOW_ERROR);
+    }
+    if (U_FAILURE(bufferStatus)) {
+        errorCode = bufferStatus;
     }
 
     return u_terminateChars(originalDest, destCapacity, length, &errorCode);
@@ -389,17 +393,15 @@ UnicodeString::doCodepageCreate(const char *codepageData,
         // perform the conversion
         array = getArrayStart();
         myTarget = array + length();
+        UErrorCode bufferStatus = U_ZERO_ERROR;
         ucnv_toUnicode(converter, &myTarget,  array + getCapacity(),
-            &mySource, mySourceEnd, nullptr, true, &status);
+            &mySource, mySourceEnd, nullptr, true, &bufferStatus);
 
         // update the conversion parameters
         setLength(static_cast<int32_t>(myTarget - array));
 
         // allocate more space and copy data, if needed
-        if(status == U_BUFFER_OVERFLOW_ERROR) {
-            // reset the error code
-            status = U_ZERO_ERROR;
-
+        if(bufferStatus == U_BUFFER_OVERFLOW_ERROR) {
             // keep the previous conversion results
             doCopyArray = true;
 
@@ -407,6 +409,9 @@ UnicodeString::doCodepageCreate(const char *codepageData,
             // try 2 char16_t's per remaining source byte
             arraySize = static_cast<int32_t>(length() + 2 * (mySourceEnd - mySource));
         } else {
+            if (U_FAILURE(bufferStatus)) {
+                status = bufferStatus;
+            }
             break;
         }
     }

+ 5 - 0
thirdparty/icu4c/common/uposixdefs.h

@@ -74,4 +74,9 @@
 #define _POSIX_C_SOURCE 200809L
 #endif
 
+/* Prevent _XOPEN_SOURCE from breaking build on macOS when aligned_alloc exists. */
+#if defined(__APPLE__) && !defined(_DARWIN_C_SOURCE)
+#   define _DARWIN_C_SOURCE
+#endif
+
 #endif  /* __UPOSIXDEFS_H__ */

+ 5 - 1
thirdparty/icu4c/common/uprops.h

@@ -336,7 +336,7 @@ U_CFUNC uint32_t
 u_getUnicodeProperties(UChar32 c, int32_t column);
 
 /**
- * Get the the maximum values for some enum/int properties.
+ * Get the maximum values for some enum/int properties.
  * Use the same column numbers as for u_getUnicodeProperties().
  * The returned value will contain maximum values stored in the same bit fields
  * as where the enum values are stored in the u_getUnicodeProperties()
@@ -500,6 +500,10 @@ ublock_addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode);
 uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode);
 */
 
+/** @internal for icuexportdata */
+U_CAPI void U_EXPORT2
+uprv_addScriptExtensionsCodePoints(const USetAdder *sa, UErrorCode *pErrorCode);
+
 // TODO: Move this into a different header file (udataswp.h? new unames.h?) so that uprops.h
 // need not be C-compatible any more.
 /**

+ 0 - 2
thirdparty/icu4c/common/uresimp.h

@@ -120,9 +120,7 @@ public:
     // No heap allocation. Use only on the stack.
     static void* U_EXPORT2 operator new(size_t) noexcept = delete;
     static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
-#if U_HAVE_PLACEMENT_NEW
     static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
-#endif
 
     StackUResourceBundle();
     ~StackUResourceBundle();

+ 7 - 3
thirdparty/icu4c/common/uscript_props.cpp

@@ -45,11 +45,11 @@ const int32_t SCRIPT_PROPS[] = {
     // Begin copy-paste output from
     // tools/trunk/unicode/py/parsescriptmetadata.py
     0x0040 | RECOMMENDED,  // Zyyy
-    0x0308 | RECOMMENDED,  // Zinh
+    0x030F | RECOMMENDED,  // Zinh
     0x0628 | RECOMMENDED | RTL,  // Arab
     0x0531 | RECOMMENDED | CASED,  // Armn
     0x0995 | RECOMMENDED,  // Beng
-    0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
+    0x3105 | LIMITED_USE | LB_LETTERS,  // Bopo
     0x13C4 | LIMITED_USE | CASED,  // Cher
     0x03E2 | EXCLUSION | CASED,  // Copt
     0x042F | RECOMMENDED | CASED,  // Cyrl
@@ -223,7 +223,7 @@ const int32_t SCRIPT_PROPS[] = {
     0x11A5C | EXCLUSION,  // Soyo
     0x11A0B | EXCLUSION,  // Zanb
     0x1180B | EXCLUSION,  // Dogr
-    0x11D71 | LIMITED_USE,  // Gong
+    0x11D71 | EXCLUSION,  // Gong
     0x11EE5 | EXCLUSION,  // Maka
     0x16E40 | EXCLUSION | CASED,  // Medf
     0x10D12 | LIMITED_USE | RTL,  // Rohg
@@ -252,6 +252,10 @@ const int32_t SCRIPT_PROPS[] = {
     0x11BC4 | EXCLUSION,  // Sunu
     0x105C2 | EXCLUSION,  // Todr
     0x11392 | EXCLUSION,  // Tutg
+    0x16EA1 | EXCLUSION | CASED,  // Berf
+    0x10950 | EXCLUSION | RTL,  // Sidt
+    0x1E6D5 | EXCLUSION | LB_LETTERS,  // Tayo
+    0x11DC6 | EXCLUSION,  // Tols
     // End copy-paste from parsescriptmetadata.py
 };
 

+ 9 - 7
thirdparty/icu4c/common/usprep.cpp

@@ -666,11 +666,12 @@ usprep_prepare(   const UStringPrepProfile* profile,
         *status = U_MEMORY_ALLOCATION_ERROR;
         return 0;
     }
+    UErrorCode bufferStatus = U_ZERO_ERROR;
     int32_t b1Len = usprep_map(profile, src, srcLength,
-                               b1, s1.getCapacity(), options, parseError, status);
-    s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
+                               b1, s1.getCapacity(), options, parseError, &bufferStatus);
+    s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
 
-    if(*status == U_BUFFER_OVERFLOW_ERROR){
+    if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){
         // redo processing of string
         /* we do not have enough room so grow the buffer*/
         b1 = s1.getBuffer(b1Len);
@@ -679,12 +680,13 @@ usprep_prepare(   const UStringPrepProfile* profile,
             return 0;
         }
 
-        *status = U_ZERO_ERROR; // reset error
+        bufferStatus = U_ZERO_ERROR; // reset error
         b1Len = usprep_map(profile, src, srcLength,
-                           b1, s1.getCapacity(), options, parseError, status);
-        s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
+                           b1, s1.getCapacity(), options, parseError, &bufferStatus);
+        s1.releaseBuffer(U_SUCCESS(bufferStatus) ? b1Len : 0);
     }
-    if(U_FAILURE(*status)){
+    if(U_FAILURE(bufferStatus)){
+        *status = bufferStatus;
         return 0;
     }
 

+ 16 - 16
thirdparty/icu4c/common/ustr_wcs.cpp

@@ -95,15 +95,14 @@ _strToWCS(wchar_t *dest,
     pSrcLimit = pSrc + srcLength;
 
     for(;;) {
-        /* reset the error state */
-        *pErrorCode = U_ZERO_ERROR;
+        UErrorCode bufferStatus = U_ZERO_ERROR;
 
         /* convert to chars using default converter */
-        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),pErrorCode);
+        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),&bufferStatus);
         count =(tempBuf - saveBuf);
         
         /* This should rarely occur */
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
             tempBuf = saveBuf;
             
             /* we don't have enough room on the stack grow the buffer */
@@ -119,16 +118,15 @@ _strToWCS(wchar_t *dest,
            saveBuf = tempBuf;
            tempBufLimit = tempBuf + tempBufCapacity;
            tempBuf = tempBuf + count;
-
         } else {
+            if (U_FAILURE(bufferStatus)) {
+                *pErrorCode = bufferStatus;
+                goto cleanup;
+            }
             break;
         }
     }
 
-    if(U_FAILURE(*pErrorCode)){
-        goto cleanup;
-    }
-
     /* done with conversion null terminate the char buffer */
     if(count>=tempBufCapacity){
         tempBuf = saveBuf;
@@ -441,20 +439,22 @@ _strFromWCS( char16_t   *dest,
     }
     
     for(;;) {
-        
-        *pErrorCode = U_ZERO_ERROR;
-        
+        UErrorCode bufferStatus = U_ZERO_ERROR;
+
         /* convert to stack buffer*/
-        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
-        
+        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),&bufferStatus);
+
         /* increment count to number written to stack */
         count+= pTarget - target;
-        
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+
+        if(bufferStatus==U_BUFFER_OVERFLOW_ERROR){
             target = uStack;
             pTarget = uStack;
             pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
         } else {
+            if (U_FAILURE(bufferStatus)) {
+                *pErrorCode = bufferStatus;
+            }
             break;
         }
         

+ 7 - 5
thirdparty/icu4c/common/uts46.cpp

@@ -872,11 +872,12 @@ UTS46::processLabel(UnicodeString &dest,
                 buffer[1]=0x6e;
                 buffer[2]=0x2d;
                 buffer[3]=0x2d;
+                UErrorCode punycodeErrorCode=U_ZERO_ERROR;
                 int32_t punycodeLength=u_strToPunycode(label, labelLength,
                                                       buffer+4, punycode.getCapacity()-4,
-                                                      nullptr, &errorCode);
-                if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
-                    errorCode=U_ZERO_ERROR;
+                                                      nullptr, &punycodeErrorCode);
+                if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+                    punycodeErrorCode=U_ZERO_ERROR;
                     punycode.releaseBuffer(4);
                     buffer=punycode.getBuffer(4+punycodeLength);
                     if(buffer==nullptr) {
@@ -885,11 +886,12 @@ UTS46::processLabel(UnicodeString &dest,
                     }
                     punycodeLength=u_strToPunycode(label, labelLength,
                                                   buffer+4, punycode.getCapacity()-4,
-                                                  nullptr, &errorCode);
+                                                  nullptr, &punycodeErrorCode);
                 }
                 punycodeLength+=4;
                 punycode.releaseBuffer(punycodeLength);
-                if(U_FAILURE(errorCode)) {
+                if(U_FAILURE(punycodeErrorCode)) {
+                    errorCode = punycodeErrorCode;
                     return destLabelLength;
                 }
                 if(punycodeLength>63) {

+ 3 - 1
thirdparty/icu4c/i18n/scriptset.cpp

@@ -40,7 +40,9 @@ ScriptSet::ScriptSet(const ScriptSet &other) {
 }
 
 ScriptSet & ScriptSet::operator =(const ScriptSet &other) {
-    uprv_memcpy(bits, other.bits, sizeof(bits));
+    if (this != &other) {
+        uprv_memcpy(bits, other.bits, sizeof(bits));
+    }
     return *this;
 }
 

+ 1 - 0
thirdparty/icu4c/i18n/ucln_in.h

@@ -64,6 +64,7 @@ typedef enum ECleanupI18NType {
     UCLN_I18N_LIST_FORMATTER,
     UCLN_I18N_NUMSYS,
     UCLN_I18N_MF2_UNISETS,
+    UCLN_I18N_MF2_DATE_PARSERS,
     UCLN_I18N_COUNT /* This must be last */
 } ECleanupI18NType;
 

BIN
thirdparty/icu4c/icudt_godot.dat


Some files were not shown because too many files changed in this diff