Browse Source

Update ICU to 75.1

bruvzg 1 year ago
parent
commit
e74fea2864
100 changed files with 6528 additions and 6341 deletions
  1. 1 1
      COPYRIGHT.txt
  2. 1 1
      modules/text_server_adv/SCsub
  3. 1 1
      modules/text_server_adv/gdextension_build/SConstruct
  4. 3 3
      thirdparty/README.md
  5. 3 1
      thirdparty/icu4c/LICENSE
  6. 2 4
      thirdparty/icu4c/common/brkeng.cpp
  7. 3 6
      thirdparty/icu4c/common/brkiter.cpp
  8. 90 22
      thirdparty/icu4c/common/bytesinkutil.h
  9. 29 21
      thirdparty/icu4c/common/caniter.cpp
  10. 7 0
      thirdparty/icu4c/common/charstr.h
  11. 1 1
      thirdparty/icu4c/common/filteredbrk.cpp
  12. 5 5
      thirdparty/icu4c/common/hash.h
  13. 33 21
      thirdparty/icu4c/common/localebuilder.cpp
  14. 1013 1019
      thirdparty/icu4c/common/localefallback_data.h
  15. 31 23
      thirdparty/icu4c/common/localematcher.cpp
  16. 3 3
      thirdparty/icu4c/common/localeprioritylist.cpp
  17. 13 18
      thirdparty/icu4c/common/locavailable.cpp
  18. 3 3
      thirdparty/icu4c/common/locbased.cpp
  19. 67 72
      thirdparty/icu4c/common/locdispnames.cpp
  20. 4 4
      thirdparty/icu4c/common/locdistance.cpp
  21. 5 5
      thirdparty/icu4c/common/locdistance.h
  22. 64 92
      thirdparty/icu4c/common/locdspnm.cpp
  23. 80 101
      thirdparty/icu4c/common/locid.cpp
  24. 240 646
      thirdparty/icu4c/common/loclikely.cpp
  25. 128 62
      thirdparty/icu4c/common/loclikelysubtags.cpp
  26. 8 9
      thirdparty/icu4c/common/loclikelysubtags.h
  27. 41 45
      thirdparty/icu4c/common/locmap.cpp
  28. 51 49
      thirdparty/icu4c/common/locresdata.cpp
  29. 13 12
      thirdparty/icu4c/common/locutil.cpp
  30. 1 1
      thirdparty/icu4c/common/locutil.h
  31. 0 1
      thirdparty/icu4c/common/messagepattern.cpp
  32. 8 2
      thirdparty/icu4c/common/normalizer2impl.cpp
  33. 4 4
      thirdparty/icu4c/common/normalizer2impl.h
  34. 844 840
      thirdparty/icu4c/common/propname_data.h
  35. 5 6
      thirdparty/icu4c/common/putil.cpp
  36. 2 2
      thirdparty/icu4c/common/rbbi.cpp
  37. 0 4
      thirdparty/icu4c/common/rbbi_cache.cpp
  38. 64 7
      thirdparty/icu4c/common/rbbinode.cpp
  39. 2 1
      thirdparty/icu4c/common/rbbinode.h
  40. 3 2
      thirdparty/icu4c/common/rbbirb.cpp
  41. 32 15
      thirdparty/icu4c/common/rbbiscan.cpp
  42. 1 1
      thirdparty/icu4c/common/rbbistbl.cpp
  43. 4 1
      thirdparty/icu4c/common/rbbitblb.cpp
  44. 8 10
      thirdparty/icu4c/common/resbund.cpp
  45. 10 10
      thirdparty/icu4c/common/ruleiter.cpp
  46. 1 1
      thirdparty/icu4c/common/ruleiter.h
  47. 0 1
      thirdparty/icu4c/common/ubidiln.cpp
  48. 8 8
      thirdparty/icu4c/common/ubrk.cpp
  49. 1 1
      thirdparty/icu4c/common/ucase.h
  50. 10 12
      thirdparty/icu4c/common/ucasemap.cpp
  51. 1939 1938
      thirdparty/icu4c/common/uchar_props_data.h
  52. 6 5
      thirdparty/icu4c/common/uchriter.cpp
  53. 4 6
      thirdparty/icu4c/common/ucnv.cpp
  54. 0 4
      thirdparty/icu4c/common/ucnv_err.cpp
  55. 0 4
      thirdparty/icu4c/common/ucnv_u7.cpp
  56. 0 2
      thirdparty/icu4c/common/ucnvbocu.cpp
  57. 2 2
      thirdparty/icu4c/common/ucnvisci.cpp
  58. 2 4
      thirdparty/icu4c/common/ucnvscsu.cpp
  59. 43 72
      thirdparty/icu4c/common/ucurr.cpp
  60. 6 6
      thirdparty/icu4c/common/udata.cpp
  61. 2 2
      thirdparty/icu4c/common/udataswp.cpp
  62. 14 14
      thirdparty/icu4c/common/uiter.cpp
  63. 665 373
      thirdparty/icu4c/common/uloc.cpp
  64. 17 15
      thirdparty/icu4c/common/uloc_keytype.cpp
  65. 205 208
      thirdparty/icu4c/common/uloc_tag.cpp
  66. 14 17
      thirdparty/icu4c/common/ulocale.cpp
  67. 10 15
      thirdparty/icu4c/common/ulocbuilder.cpp
  68. 172 75
      thirdparty/icu4c/common/ulocimp.h
  69. 3 3
      thirdparty/icu4c/common/umapfile.cpp
  70. 0 2
      thirdparty/icu4c/common/umutex.cpp
  71. 9 10
      thirdparty/icu4c/common/unicode/brkiter.h
  72. 4 3
      thirdparty/icu4c/common/unicode/caniter.h
  73. 34 34
      thirdparty/icu4c/common/unicode/chariter.h
  74. 5 0
      thirdparty/icu4c/common/unicode/docmain.h
  75. 2 3
      thirdparty/icu4c/common/unicode/dtintrv.h
  76. 5 3
      thirdparty/icu4c/common/unicode/localematcher.h
  77. 57 43
      thirdparty/icu4c/common/unicode/localpointer.h
  78. 43 39
      thirdparty/icu4c/common/unicode/locid.h
  79. 12 12
      thirdparty/icu4c/common/unicode/normlzr.h
  80. 2 2
      thirdparty/icu4c/common/unicode/parsepos.h
  81. 15 47
      thirdparty/icu4c/common/unicode/platform.h
  82. 11 75
      thirdparty/icu4c/common/unicode/ptypes.h
  83. 14 16
      thirdparty/icu4c/common/unicode/rbbi.h
  84. 8 17
      thirdparty/icu4c/common/unicode/resbund.h
  85. 2 2
      thirdparty/icu4c/common/unicode/schriter.h
  86. 1 3
      thirdparty/icu4c/common/unicode/simpleformatter.h
  87. 4 4
      thirdparty/icu4c/common/unicode/stringpiece.h
  88. 143 2
      thirdparty/icu4c/common/unicode/uchar.h
  89. 17 17
      thirdparty/icu4c/common/unicode/uchriter.h
  90. 0 3
      thirdparty/icu4c/common/unicode/ucnv.h
  91. 11 0
      thirdparty/icu4c/common/unicode/uconfig.h
  92. 21 4
      thirdparty/icu4c/common/unicode/uloc.h
  93. 1 1
      thirdparty/icu4c/common/unicode/ulocbuilder.h
  94. 3 11
      thirdparty/icu4c/common/unicode/umachine.h
  95. 2 2
      thirdparty/icu4c/common/unicode/unifunct.h
  96. 10 10
      thirdparty/icu4c/common/unicode/uniset.h
  97. 24 26
      thirdparty/icu4c/common/unicode/unistr.h
  98. 8 1
      thirdparty/icu4c/common/unicode/urename.h
  99. 4 1
      thirdparty/icu4c/common/unicode/uscript.h
  100. 1 3
      thirdparty/icu4c/common/unicode/uset.h

+ 1 - 1
COPYRIGHT.txt

@@ -285,7 +285,7 @@ License: HarfBuzz
 
 Files: ./thirdparty/icu4c/
 Comment: International Components for Unicode
-Copyright: 1991-2021, Unicode
+Copyright: 2016-2024, Unicode, Inc.
 License: Unicode
 
 Files: ./thirdparty/jpeg-compressor/

+ 1 - 1
modules/text_server_adv/SCsub

@@ -468,7 +468,7 @@ if env["builtin_icu4c"]:
     ]
     thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
 
-    icu_data_name = "icudt74l.dat"
+    icu_data_name = "icudt75l.dat"
 
     if env.editor_build:
         env_icu.Depends("#thirdparty/icu4c/icudata.gen.h", "#thirdparty/icu4c/" + icu_data_name)

+ 1 - 1
modules/text_server_adv/gdextension_build/SConstruct

@@ -703,7 +703,7 @@ thirdparty_icu_sources = [
 ]
 thirdparty_icu_sources = [thirdparty_icu_dir + file for file in thirdparty_icu_sources]
 
-icu_data_name = "icudt74l.dat"
+icu_data_name = "icudt75l.dat"
 
 if env["static_icu_data"]:
     env_icu.Depends("../../../thirdparty/icu4c/icudata.gen.h", "../../../thirdparty/icu4c/" + icu_data_name)

+ 3 - 3
thirdparty/README.md

@@ -391,7 +391,7 @@ Files extracted from upstream source:
 ## icu4c
 
 - Upstream: https://github.com/unicode-org/icu
-- Version: 74.2 (2d029329c82c7792b985024b2bdab5fc7278fbc8, 2023)
+- Version: 75.1 (7750081bda4b3bc1768ae03849ec70f67ea10625, 2024)
 - License: Unicode
 
 Files extracted from upstream source:
@@ -403,7 +403,7 @@ Files extracted from upstream source:
 
 Files generated from upstream source:
 
-- The `icudt74l.dat` built with the provided `godot_data.json` config file (see
+- The `icudt75l.dat` built with the provided `godot_data.json` config file (see
   https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md
   for instructions).
 
@@ -413,7 +413,7 @@ Files generated from upstream source:
 3. Reconfigure ICU with custom data config:
    `ICU_DATA_FILTER_FILE={GODOT_SOURCE}/thirdparty/icu4c/godot_data.json ./runConfigureICU {PLATFORM} --with-data-packaging=common`
 4. Delete `data/out` folder and rebuild data: `cd data && rm -rf ./out && make`
-5. Copy `source/data/out/icudt74l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt74l.dat`
+5. Copy `source/data/out/icudt75l.dat` to the `{GODOT_SOURCE}/thirdparty/icu4c/icudt75l.dat`
 
 
 ## jpeg-compressor

+ 3 - 1
thirdparty/icu4c/LICENSE

@@ -2,7 +2,7 @@ UNICODE LICENSE V3
 
 COPYRIGHT AND PERMISSION NOTICE
 
-Copyright © 2016-2023 Unicode, Inc.
+Copyright © 2016-2024 Unicode, Inc.
 
 NOTICE TO USER: Carefully read the following legal agreement. BY
 DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
@@ -38,6 +38,8 @@ not be used in advertising or otherwise to promote the sale, use or other
 dealings in these Data Files or Software without prior written
 authorization of the copyright holder.
 
+SPDX-License-Identifier: Unicode-3.0
+
 ----------------------------------------------------------------------
 
 Third-Party Software Licenses

+ 2 - 4
thirdparty/icu4c/common/brkeng.cpp

@@ -114,13 +114,11 @@ UnhandledEngine::handleCharacter(UChar32 c) {
  */
 
 ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
-    fEngines = 0;
+    fEngines = nullptr;
 }
 
 ICULanguageBreakFactory::~ICULanguageBreakFactory() {
-    if (fEngines != 0) {
-        delete fEngines;
-    }
+    delete fEngines;
 }
 
 void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {

+ 3 - 6
thirdparty/icu4c/common/brkiter.cpp

@@ -438,17 +438,14 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
             UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
             uprv_strcpy(lb_lw, "line");
             UErrorCode kvStatus = U_ZERO_ERROR;
-            CharString value;
-            CharStringByteSink valueSink(&value);
-            loc.getKeywordValue("lb", valueSink, kvStatus);
+            auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
             if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
                 uprv_strcat(lb_lw, "_");
                 uprv_strcat(lb_lw, value.data());
             }
             // lw=phrase is only supported in Japanese and Korean
             if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
-                value.clear();
-                loc.getKeywordValue("lw", valueSink, kvStatus);
+                value = loc.getKeywordValue<CharString>("lw", kvStatus);
                 if (U_SUCCESS(kvStatus) && value == "phrase") {
                     uprv_strcat(lb_lw, "_");
                     uprv_strcat(lb_lw, value.data());
@@ -500,7 +497,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
 Locale
 BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
     if (type == ULOC_REQUESTED_LOCALE) {
-        return Locale(requestLocale);
+        return {requestLocale};
     }
     U_LOCALE_BASED(locBased, *this);
     return locBased.getLocale(type, status);

+ 90 - 22
thirdparty/icu4c/common/bytesinkutil.h

@@ -7,18 +7,52 @@
 #ifndef BYTESINKUTIL_H
 #define BYTESINKUTIL_H
 
+#include <type_traits>
+
 #include "unicode/utypes.h"
 #include "unicode/bytestream.h"
 #include "unicode/edits.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "uassert.h"
+#include "ustr_imp.h"
 
 U_NAMESPACE_BEGIN
 
 class ByteSink;
-class CharString;
 class Edits;
 
+class U_COMMON_API CharStringByteSink : public ByteSink {
+public:
+    CharStringByteSink(CharString* dest);
+    ~CharStringByteSink() override;
+
+    CharStringByteSink() = delete;
+    CharStringByteSink(const CharStringByteSink&) = delete;
+    CharStringByteSink& operator=(const CharStringByteSink&) = delete;
+
+    void Append(const char* bytes, int32_t n) override;
+
+    char* GetAppendBuffer(int32_t min_capacity,
+                          int32_t desired_capacity_hint,
+                          char* scratch,
+                          int32_t scratch_capacity,
+                          int32_t* result_capacity) override;
+
+private:
+    CharString& dest_;
+};
+
+// CharString doesn't provide the public API that StringByteSink requires a
+// string class to have so this template specialization replaces the default
+// implementation of StringByteSink<CharString> with CharStringByteSink.
+template<>
+class StringByteSink<CharString> : public CharStringByteSink {
+ public:
+  StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
+  StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
+};
+
 class U_COMMON_API ByteSinkUtil {
 public:
     ByteSinkUtil() = delete;  // all static
@@ -57,30 +91,64 @@ public:
                                  ByteSink &sink, uint32_t options, Edits *edits,
                                  UErrorCode &errorCode);
 
-private:
-    static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
-                                        ByteSink &sink, uint32_t options, Edits *edits);
-};
-
-class U_COMMON_API CharStringByteSink : public ByteSink {
-public:
-    CharStringByteSink(CharString* dest);
-    ~CharStringByteSink() override;
-
-    CharStringByteSink() = delete;
-    CharStringByteSink(const CharStringByteSink&) = delete;
-    CharStringByteSink& operator=(const CharStringByteSink&) = delete;
-
-    void Append(const char* bytes, int32_t n) override;
+    /**
+     * Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
+     * and then returns through u_terminateChars(), in order to implement
+     * the classic ICU4C C API writing to a fix sized buffer on top of a
+     * contemporary C++ API.
+     *
+     * @param buffer receiving buffer
+     * @param capacity capacity of receiving buffer
+     * @param lambda that gets called with the sink as an argument
+     * @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
+     * @return number of bytes written, or needed (in case of overflow)
+     * @internal
+     */
+    template <typename F,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
+    static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
+                                                F&& lambda,
+                                                UErrorCode& status) {
+        if (U_FAILURE(status)) { return 0; }
+        CheckedArrayByteSink sink(buffer, capacity);
+        lambda(sink, status);
+        if (U_FAILURE(status)) { return 0; }
+
+        int32_t reslen = sink.NumberOfBytesAppended();
+
+        if (sink.Overflowed()) {
+            status = U_BUFFER_OVERFLOW_ERROR;
+            return reslen;
+        }
+
+        return u_terminateChars(buffer, capacity, reslen, &status);
+    }
 
-    char* GetAppendBuffer(int32_t min_capacity,
-                          int32_t desired_capacity_hint,
-                          char* scratch,
-                          int32_t scratch_capacity,
-                          int32_t* result_capacity) override;
+    /**
+     * Calls a lambda that writes to a ByteSink with a CharStringByteSink and
+     * then returns a CharString, in order to implement a contemporary C++ API
+     * on top of a C/C++ compatibility ByteSink API.
+     *
+     * @param lambda that gets called with the sink as an argument
+     * @param status to check and report
+     * @return the resulting string, or an empty string (in case of error)
+     * @internal
+     */
+    template <typename F,
+              typename = std::enable_if_t<
+                  std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
+    static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
+        if (U_FAILURE(status)) { return {}; }
+        CharString result;
+        CharStringByteSink sink(&result);
+        lambda(sink, status);
+        return result;
+    }
 
 private:
-    CharString& dest_;
+    static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
+                                        ByteSink &sink, uint32_t options, Edits *edits);
 };
 
 U_NAMESPACE_END

+ 29 - 21
thirdparty/icu4c/common/caniter.cpp

@@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN
 
 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
 
+
 /**
  *@param source string to get results for
  */
@@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
     pieces_lengths(nullptr),
     current(nullptr),
     current_length(0),
-    nfd(*Normalizer2::getNFDInstance(status)),
-    nfcImpl(*Normalizer2Factory::getNFCImpl(status))
+    nfd(Normalizer2::getNFDInstance(status)),
+    nfcImpl(Normalizer2Factory::getNFCImpl(status))
 {
-    if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
+    if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
       setSource(sourceStr, status);
     }
 }
@@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
     int32_t i = 0;
     UnicodeString *list = nullptr;
 
-    nfd.normalize(newSource, source, status);
+    nfd->normalize(newSource, source, status);
     if(U_FAILURE(status)) {
       return;
     }
@@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
         current[0] = 0;
         pieces[0] = new UnicodeString[1];
         pieces_lengths[0] = 1;
-        if (pieces[0] == 0) {
+        if (pieces[0] == nullptr) {
             status = U_MEMORY_ALLOCATION_ERROR;
             goto CleanPartialInitialization;
         }
@@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
 
 
     list = new UnicodeString[source.length()];
-    if (list == 0) {
+    if (list == nullptr) {
         status = U_MEMORY_ALLOCATION_ERROR;
         goto CleanPartialInitialization;
     }
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
     // on the NFD form - see above).
     for (; i < source.length(); i += U16_LENGTH(cp)) {
         cp = source.char32At(i);
-        if (nfcImpl.isCanonSegmentStarter(cp)) {
+        if (nfcImpl->isCanonSegmentStarter(cp)) {
             source.extract(start, i-start, list[list_length++]); // add up to i
             start = i;
         }
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
     return;
 // Common section to cleanup all local variables and reset object variables.
 CleanPartialInitialization:
-    if (list != nullptr) {
-        delete[] list;
-    }
+    delete[] list;
     cleanPieces();
 }
 
@@ -264,10 +263,19 @@ CleanPartialInitialization:
  * @param source the string to find permutations for
  * @return the results in a set.
  */
-void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
+void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
     if(U_FAILURE(status)) {
         return;
     }
+    // To avoid infinity loop caused by permute, we limit the depth of recursive
+    // call to permute and return U_UNSUPPORTED_ERROR.
+    // We know in some unit test we need at least 4. Set to 8 just in case some
+    // unforseen use cases.
+    constexpr int32_t kPermuteDepthLimit = 8;
+    if (depth > kPermuteDepthLimit) {
+        status = U_UNSUPPORTED_ERROR;
+        return;
+    }
     //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
     int32_t i = 0;
 
@@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
     if (source.length() <= 2 && source.countChar32() <= 1) {
         UnicodeString *toPut = new UnicodeString(source);
         /* test for nullptr */
-        if (toPut == 0) {
+        if (toPut == nullptr) {
             status = U_MEMORY_ALLOCATION_ERROR;
             return;
         }
@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
 
         // see what the permutations of the characters before and after this one are
         //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
-        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
+        permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
         /* Test for buffer overflows */
         if(U_FAILURE(status)) {
             return;
@@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
     Hashtable permutations(status);
     Hashtable basic(status);
     if (U_FAILURE(status)) {
-        return 0;
+        return nullptr;
     }
     result.setValueDeleter(uprv_deleteUObject);
     permutations.setValueDeleter(uprv_deleteUObject);
@@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
             //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
             UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
             UnicodeString attempt;
-            nfd.normalize(possible, attempt, status);
+            nfd->normalize(possible, attempt, status);
 
             // TODO: check if operator == is semanticaly the same as attempt.equals(segment)
             if (attempt==segment) {
@@ -399,7 +407,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
 
     /* Test for buffer overflows */
     if(U_FAILURE(status)) {
-        return 0;
+        return nullptr;
     }
     // convert into a String[] to clean up storage
     //String[] finalResult = new String[result.size()];
@@ -407,7 +415,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
     int32_t resultCount;
     if((resultCount = result.count()) != 0) {
         finalResult = new UnicodeString[resultCount];
-        if (finalResult == 0) {
+        if (finalResult == nullptr) {
             status = U_MEMORY_ALLOCATION_ERROR;
             return nullptr;
         }
@@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
     for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
         // see if any character is at the start of some decomposition
         U16_GET(segment, 0, i, segLen, cp);
-        if (!nfcImpl.getCanonStartSet(cp, starts)) {
+        if (!nfcImpl->getCanonStartSet(cp, starts)) {
             continue;
         }
         // if so, see which decompositions match
@@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
                 UnicodeString item = *((UnicodeString *)(ne->value.pointer));
                 UnicodeString *toAdd = new UnicodeString(prefix);
                 /* test for nullptr */
-                if (toAdd == 0) {
+                if (toAdd == nullptr) {
                     status = U_MEMORY_ALLOCATION_ERROR;
                     return nullptr;
                 }
@@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
     UnicodeString temp(comp);
     int32_t inputLen=temp.length();
     UnicodeString decompString;
-    nfd.normalize(temp, decompString, status);
+    nfd->normalize(temp, decompString, status);
     if (U_FAILURE(status)) {
         return nullptr;
     }
@@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
     // brute force approach
     // check to make sure result is canonically equivalent
     UnicodeString trial;
-    nfd.normalize(temp, trial, status);
+    nfd->normalize(temp, trial, status);
     if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
         return nullptr;
     }

+ 7 - 0
thirdparty/icu4c/common/charstr.h

@@ -104,6 +104,13 @@ public:
      */
     int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
 
+    bool operator==(const CharString& other) const {
+        return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
+    }
+    bool operator!=(const CharString& other) const {
+        return !operator==(other);
+    }
+
     bool operator==(StringPiece other) const {
         return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
     }

+ 1 - 1
thirdparty/icu4c/common/filteredbrk.cpp

@@ -147,7 +147,7 @@ public:
         if(umtx_atomic_dec(&refcount) <= 0) {
             delete this;
         }
-        return 0;
+        return nullptr;
     }
     virtual ~SimpleFilteredSentenceBreakData();
 

+ 5 - 5
thirdparty/icu4c/common/hash.h

@@ -148,12 +148,12 @@ inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp,
 }
 
 inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp,
-                 UErrorCode& status) : hash(0) {
+                 UErrorCode& status) : hash(nullptr) {
     init( uhash_hashUnicodeString, keyComp, valueComp, status);
 }
 
 inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
     init(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                         : uhash_hashUnicodeString,
@@ -164,7 +164,7 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status)
 }
 
 inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
     initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString
                         : uhash_hashUnicodeString,
@@ -175,13 +175,13 @@ inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& statu
 }
 
 inline Hashtable::Hashtable(UErrorCode& status)
- : hash(0)
+ : hash(nullptr)
 {
     init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);
 }
 
 inline Hashtable::Hashtable()
- : hash(0)
+ : hash(nullptr)
 {
     UErrorCode status = U_ZERO_ERROR;
     init(uhash_hashUnicodeString, uhash_compareUnicodeString, nullptr, status);

+ 33 - 21
thirdparty/icu4c/common/localebuilder.cpp

@@ -3,21 +3,21 @@
 
 #include <utility>
 
-#include "bytesinkutil.h"  // CharStringByteSink
+#include "bytesinkutil.h"  // StringByteSink<CharString>
 #include "charstr.h"
 #include "cstring.h"
 #include "ulocimp.h"
 #include "unicode/localebuilder.h"
 #include "unicode/locid.h"
 
-U_NAMESPACE_BEGIN
+namespace {
 
-#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
-#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
+inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
+inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
 
 constexpr const char* kAttributeKey = "attribute";
 
-static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
+bool _isExtensionSubtags(char key, const char* s, int32_t len) {
     switch (uprv_tolower(key)) {
         case 'u':
             return ultag_isUnicodeExtensionSubtags(s, len);
@@ -30,6 +30,10 @@ static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
     }
 }
 
+}  // namespace
+
+U_NAMESPACE_BEGIN
+
 LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
     script_(), region_(), variant_(nullptr), extensions_(nullptr)
 {
@@ -68,8 +72,10 @@ LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
     return *this;
 }
 
-static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
-                     UBool (*test)(const char*, int32_t)) {
+namespace {
+
+void setField(StringPiece input, char* dest, UErrorCode& errorCode,
+              bool (*test)(const char*, int32_t)) {
     if (U_FAILURE(errorCode)) { return; }
     if (input.empty()) {
         dest[0] = '\0';
@@ -81,6 +87,8 @@ static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
     }
 }
 
+}  // namespace
+
 LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
 {
     setField(language, language_, status_, &ultag_isLanguageSubtag);
@@ -99,7 +107,9 @@ LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
     return *this;
 }
 
-static void transform(char* data, int32_t len) {
+namespace {
+
+void transform(char* data, int32_t len) {
     for (int32_t i = 0; i < len; i++, data++) {
         if (*data == '_') {
             *data = '-';
@@ -109,6 +119,8 @@ static void transform(char* data, int32_t len) {
     }
 }
 
+}  // namespace
+
 LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
 {
     if (U_FAILURE(status_)) { return *this; }
@@ -134,7 +146,9 @@ LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
     return *this;
 }
 
-static bool
+namespace {
+
+bool
 _isKeywordValue(const char* key, const char* value, int32_t value_len)
 {
     if (key[1] == '\0') {
@@ -156,7 +170,7 @@ _isKeywordValue(const char* key, const char* value, int32_t value_len)
            ultag_isUnicodeLocaleType(unicode_locale_type, -1);
 }
 
-static void
+void
 _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
                 Locale& to, bool validate, UErrorCode& errorCode)
 {
@@ -169,9 +183,7 @@ _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
     }
     const char* key;
     while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
-        CharString value;
-        CharStringByteSink sink(&value);
-        from.getKeywordValue(key, sink, errorCode);
+        auto value = from.getKeywordValue<CharString>(key, errorCode);
         if (U_FAILURE(errorCode)) { return; }
         if (uprv_strcmp(key, kAttributeKey) == 0) {
             transform(value.data(), value.length());
@@ -186,9 +198,10 @@ _copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
     }
 }
 
-void static
+void
 _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
 {
+    if (U_FAILURE(errorCode)) { return; }
     // Clear Unicode attributes
     locale.setKeywordValue(kAttributeKey, "", errorCode);
 
@@ -201,9 +214,10 @@ _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
     }
 }
 
-static void
+void
 _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
 {
+    if (U_FAILURE(errorCode)) { return; }
     // Add the unicode extensions to extensions_
     CharString locale_str("und-u-", errorCode);
     locale_str.append(value, errorCode);
@@ -212,6 +226,8 @@ _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& error
         locale, false, errorCode);
 }
 
+}  // namespace
+
 LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
 {
     if (U_FAILURE(status_)) { return *this; }
@@ -289,10 +305,8 @@ LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
         return *this;
     }
 
-    CharString attributes;
-    CharStringByteSink sink(&attributes);
     UErrorCode localErrorCode = U_ZERO_ERROR;
-    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
     if (U_FAILURE(localErrorCode)) {
         CharString new_attributes(value_str.data(), status_);
         // No attributes, set the attribute.
@@ -344,9 +358,7 @@ LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
     }
     if (extensions_ == nullptr) { return *this; }
     UErrorCode localErrorCode = U_ZERO_ERROR;
-    CharString attributes;
-    CharStringByteSink sink(&attributes);
-    extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
+    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
     // get failure, just return
     if (U_FAILURE(localErrorCode)) { return *this; }
     // Do not have any attributes, just return.

+ 1013 - 1019
thirdparty/icu4c/common/localefallback_data.h

@@ -26,10 +26,10 @@ const char scriptCodeChars[] =
 const char dsLocaleIDChars[] =
     "aaf\0aao\0aat\0ab\0abh\0abl\0abv\0acm\0acq\0acw\0acx\0adf\0adx\0"
     "ady\0ae\0aeb\0aec\0aee\0aeq\0afb\0agi\0agj\0agx\0ahg\0aho\0ahr\0"
-    "aib\0aij\0ain\0aio\0aiq\0ajp\0akk\0akv\0alk\0all\0alr\0alt\0alw\0"
-    "am\0ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0aph\0aqc\0ar\0"
-    "arc\0arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0auj\0auz\0av\0"
-    "avd\0avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0az_IR\0az_RU\0"
+    "aib\0aij\0ain\0aio\0aiq\0akk\0akv\0alk\0all\0alr\0alt\0alw\0am\0"
+    "ams\0amw\0ani\0anp\0anr\0anu\0aot\0apc\0apd\0aph\0aqc\0ar\0arc\0"
+    "arq\0ars\0ary\0arz\0as\0ase\0ask\0atn\0atv\0auj\0auz\0av\0avd\0"
+    "avl\0awa\0awn\0axm\0ayh\0ayl\0ayn\0ayp\0az_IQ\0az_IR\0az_RU\0"
     "azb\0ba\0bal\0bap\0bax\0bbl\0bcq\0bdv\0bdz\0be\0bee\0bej\0bfb\0"
     "bfq\0bft\0bfu\0bfw\0bfy\0bfz\0bg\0bgc\0bgd\0bgn\0bgp\0bgq\0bgw\0"
     "bgx\0bha\0bhb\0bhd\0bhe\0bhh\0bhi\0bhj\0bhm\0bhn\0bho\0bht\0bhu\0"
@@ -78,33 +78,33 @@ const char dsLocaleIDChars[] =
     "otb\0otk\0oty\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0"
     "pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0"
     "pho\0phr\0pht\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0pra\0"
-    "prc\0prd\0prp\0prt\0prx\0ps\0psh\0psi\0pst\0pum\0pwo\0pwr\0pww\0"
-    "pyx\0qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0"
-    "rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0rom_BG\0rsk\0rtw\0ru\0rue\0"
-    "rut\0rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scl_IN\0"
+    "prc\0prd\0prt\0prx\0ps\0psh\0psi\0pst\0pum\0pwo\0pwr\0pww\0pyx\0"
+    "qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0rjs\0"
+    "rka\0rki\0rkt\0rmi\0rmt\0rmz\0rom_BG\0rsk\0rtw\0ru\0rue\0rut\0"
+    "rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scl_IN\0"
     "scp\0sct\0scu\0scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sds\0sel\0"
     "sfm\0sga\0sgh\0sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0"
-    "shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0slq\0"
-    "smh\0smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0"
-    "sr\0srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0"
-    "swi\0swv\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0"
-    "tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0"
-    "th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0"
-    "tjl\0tjo\0tkb\0tks\0tkt\0tmk\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0"
-    "trw\0tsd\0tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0"
-    "ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0"
-    "ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0"
-    "vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0"
-    "wbr\0wlo\0wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0"
-    "xcr\0xdq\0xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0"
-    "xmn\0xmr\0xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xss\0"
-    "xub\0xuj\0xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0"
-    "ygp\0yhd\0yi\0yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0"
-    "ysn\0ysp\0ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0"
-    "zba\0zch\0zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0"
-    "zh_GB\0zh_GF\0zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0"
-    "zh_TH\0zh_TW\0zh_US\0zh_VN\0zhd\0zhx\0zkb\0zko\0zkt\0zkz\0zlj\0"
-    "zln\0zlq\0zqe\0zrp\0zum\0zyg\0zyn\0zzj\0";
+    "shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0smh\0"
+    "smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0sr\0"
+    "srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0swi\0"
+    "swv\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0tcx\0"
+    "tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0th\0"
+    "the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0tjl\0"
+    "tjo\0tkb\0tks\0tkt\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0trw\0tsd\0"
+    "tsj\0tt\0tth\0tto\0tts\0tvn\0twm\0txg\0txo\0tyr\0tyv\0ude\0udg\0"
+    "udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0ulc\0unr\0"
+    "unr_NP\0unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0vaa\0vaf\0"
+    "vah\0vai\0vas\0vav\0vay\0vgr\0vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wlo\0"
+    "wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xal\0xan\0xas\0xco\0xcr\0xdq\0"
+    "xhe\0xhm\0xis\0xka\0xkc\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0xmn\0xmr\0"
+    "xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xub\0xuj\0xve\0"
+    "xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0yi\0"
+    "yig\0yih\0yiv\0ykg\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0ysr\0"
+    "ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0zdj\0"
+    "zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0"
+    "zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0"
+    "zh_US\0zh_VN\0zhd\0zhx\0zko\0zkt\0zkz\0zlj\0zln\0zlq\0zqe\0zrp\0"
+    "zum\0zyg\0zyn\0zzj\0";
 
 const int32_t defaultScriptTable[] = {
     0, 320,  // aaf -> Mlym
@@ -138,996 +138,990 @@ const int32_t defaultScriptTable[] = {
     110, 220,  // ain -> Kana
     114, 345,  // aio -> Mymr
     118, 10,  // aiq -> Arab
-    122, 10,  // ajp -> Arab
-    126, 570,  // akk -> Xsux
-    130, 100,  // akv -> Cyrl
-    134, 260,  // alk -> Laoo
-    138, 320,  // all -> Mlym
-    142, 100,  // alr -> Cyrl
-    146, 100,  // alt -> Cyrl
-    150, 120,  // alw -> Ethi
-    154, 120,  // am -> Ethi
-    157, 210,  // ams -> Jpan
-    161, 475,  // amw -> Syrc
-    165, 100,  // ani -> Cyrl
-    169, 105,  // anp -> Deva
-    173, 105,  // anr -> Deva
-    177, 120,  // anu -> Ethi
-    181, 45,  // aot -> Beng
-    185, 10,  // apc -> Arab
-    189, 10,  // apd -> Arab
-    193, 105,  // aph -> Deva
-    197, 100,  // aqc -> Cyrl
-    201, 10,  // ar -> Arab
-    204, 15,  // arc -> Armi
-    208, 10,  // arq -> Arab
-    212, 10,  // ars -> Arab
-    216, 10,  // ary -> Arab
-    220, 10,  // arz -> Arab
-    224, 45,  // as -> Beng
-    227, 450,  // ase -> Sgnw
-    231, 10,  // ask -> Arab
-    235, 10,  // atn -> Arab
-    239, 100,  // atv -> Cyrl
-    243, 10,  // auj -> Arab
-    247, 10,  // auz -> Arab
-    251, 100,  // av -> Cyrl
-    254, 10,  // avd -> Arab
-    258, 10,  // avl -> Arab
-    262, 105,  // awa -> Deva
-    266, 120,  // awn -> Ethi
-    270, 20,  // axm -> Armn
-    274, 10,  // ayh -> Arab
-    278, 10,  // ayl -> Arab
-    282, 10,  // ayn -> Arab
-    286, 10,  // ayp -> Arab
-    290, 10,  // az_IQ -> Arab
-    296, 10,  // az_IR -> Arab
-    302, 100,  // az_RU -> Cyrl
-    308, 10,  // azb -> Arab
-    312, 100,  // ba -> Cyrl
-    315, 10,  // bal -> Arab
-    319, 105,  // bap -> Deva
-    323, 30,  // bax -> Bamu
-    327, 125,  // bbl -> Geor
-    331, 120,  // bcq -> Ethi
-    335, 385,  // bdv -> Orya
-    339, 10,  // bdz -> Arab
-    343, 100,  // be -> Cyrl
-    346, 105,  // bee -> Deva
-    350, 10,  // bej -> Arab
-    354, 105,  // bfb -> Deva
-    358, 500,  // bfq -> Taml
-    362, 10,  // bft -> Arab
-    366, 535,  // bfu -> Tibt
-    370, 385,  // bfw -> Orya
-    374, 105,  // bfy -> Deva
-    378, 105,  // bfz -> Deva
-    382, 100,  // bg -> Cyrl
-    385, 105,  // bgc -> Deva
-    389, 105,  // bgd -> Deva
-    393, 10,  // bgn -> Arab
-    397, 10,  // bgp -> Arab
-    401, 105,  // bgq -> Deva
-    405, 105,  // bgw -> Deva
-    409, 150,  // bgx -> Grek
-    413, 105,  // bha -> Deva
-    417, 105,  // bhb -> Deva
-    421, 105,  // bhd -> Deva
-    425, 10,  // bhe -> Arab
-    429, 100,  // bhh -> Cyrl
-    433, 105,  // bhi -> Deva
-    437, 105,  // bhj -> Deva
-    441, 10,  // bhm -> Arab
-    445, 475,  // bhn -> Syrc
-    449, 105,  // bho -> Deva
-    453, 485,  // bht -> Takr
-    457, 105,  // bhu -> Deva
-    461, 105,  // biy -> Deva
-    465, 475,  // bjf -> Syrc
-    469, 105,  // bjj -> Deva
-    473, 10,  // bjm -> Arab
-    477, 345,  // blk -> Mymr
-    481, 510,  // blt -> Tavt
-    485, 105,  // bmj -> Deva
-    489, 45,  // bn -> Beng
-    492, 105,  // bns -> Deva
-    496, 535,  // bo -> Tibt
-    499, 100,  // bph -> Cyrl
-    503, 105,  // bpx -> Deva
-    507, 45,  // bpy -> Beng
-    511, 10,  // bqi -> Arab
-    515, 105,  // bra -> Deva
-    519, 235,  // brb -> Khmr
-    523, 105,  // brd -> Deva
-    527, 10,  // brh -> Arab
-    531, 10,  // brk -> Arab
-    535, 260,  // brv -> Laoo
-    539, 105,  // brx -> Deva
-    543, 10,  // bsh -> Arab
-    547, 10,  // bsk -> Arab
-    551, 35,  // bsq -> Bass
-    555, 120,  // bst -> Ethi
-    559, 40,  // btd -> Batk
-    563, 40,  // btm -> Batk
-    567, 105,  // btv -> Deva
-    571, 100,  // bua -> Cyrl
-    575, 345,  // bwe -> Mymr
-    579, 100,  // bxm -> Cyrl
-    583, 330,  // bxu -> Mong
-    587, 105,  // byh -> Deva
-    591, 120,  // byn -> Ethi
-    595, 105,  // byw -> Deva
-    599, 530,  // bzi -> Thai
-    603, 530,  // cbn -> Thai
-    607, 60,  // ccp -> Cakm
-    611, 515,  // cde -> Telu
-    615, 105,  // cdh -> Deva
-    619, 155,  // cdi -> Gujr
-    623, 105,  // cdj -> Deva
-    627, 105,  // cdm -> Deva
-    631, 175,  // cdo -> Hans
-    635, 45,  // cdz -> Beng
-    639, 100,  // ce -> Cyrl
-    642, 535,  // cgk -> Tibt
-    646, 10,  // chg -> Arab
-    650, 100,  // chm -> Cyrl
-    654, 80,  // chr -> Cher
-    658, 105,  // chx -> Deva
-    662, 105,  // cih -> Deva
-    666, 10,  // cja -> Arab
-    670, 100,  // cji -> Cyrl
-    674, 75,  // cjm -> Cham
-    678, 175,  // cjy -> Hans
-    682, 10,  // ckb -> Arab
-    686, 100,  // ckt -> Cyrl
-    690, 10,  // clh -> Arab
-    694, 100,  // clw -> Cyrl
-    698, 470,  // cmg -> Soyo
-    702, 535,  // cna -> Tibt
-    706, 175,  // cnp -> Hans
-    710, 530,  // cog -> Thai
-    714, 90,  // cop -> Copt
-    718, 150,  // cpg -> Grek
-    722, 65,  // cr -> Cans
-    725, 100,  // crh -> Cyrl
-    729, 65,  // crj -> Cans
-    733, 65,  // crk -> Cans
-    737, 65,  // crl -> Cans
-    741, 65,  // crm -> Cans
-    745, 345,  // csh -> Mymr
-    749, 175,  // csp -> Hans
-    753, 65,  // csw -> Cans
-    757, 395,  // ctd -> Pauc
-    761, 45,  // ctg -> Beng
-    765, 105,  // ctn -> Deva
-    769, 500,  // ctt -> Taml
-    773, 100,  // cu -> Cyrl
-    776, 255,  // cuu -> Lana
-    780, 100,  // cv -> Cyrl
-    783, 175,  // czh -> Hans
-    787, 185,  // czk -> Hebr
-    791, 105,  // daq -> Deva
-    795, 100,  // dar -> Cyrl
-    799, 10,  // dcc -> Arab
-    803, 100,  // ddo -> Cyrl
-    807, 10,  // def -> Arab
-    811, 10,  // deh -> Arab
-    815, 45,  // der -> Beng
-    819, 105,  // dhi -> Deva
-    823, 155,  // dhn -> Gujr
-    827, 105,  // dho -> Deva
-    831, 105,  // dhw -> Deva
-    835, 535,  // dka -> Tibt
-    839, 100,  // dlg -> Cyrl
-    843, 310,  // dmf -> Medf
-    847, 10,  // dmk -> Arab
-    851, 10,  // dml -> Arab
-    855, 100,  // dng -> Cyrl
-    859, 345,  // dnu -> Mymr
-    863, 345,  // dnv -> Mymr
-    867, 105,  // doi -> Deva
-    871, 120,  // dox -> Ethi
-    875, 535,  // dre -> Tibt
-    879, 105,  // drq -> Deva
-    883, 120,  // drs -> Ethi
-    887, 105,  // dry -> Deva
-    891, 385,  // dso -> Orya
-    895, 105,  // dty -> Deva
-    899, 155,  // dub -> Gujr
-    903, 105,  // duh -> Deva
-    907, 105,  // dus -> Deva
-    911, 525,  // dv -> Thaa
-    914, 385,  // dwk -> Orya
-    918, 105,  // dwz -> Deva
-    922, 535,  // dz -> Tibt
-    925, 535,  // dzl -> Tibt
-    929, 150,  // ecr -> Grek
-    933, 95,  // ecy -> Cprt
-    937, 110,  // egy -> Egyp
-    941, 215,  // eky -> Kali
-    945, 150,  // el -> Grek
-    948, 105,  // emg -> Deva
-    952, 105,  // emu -> Deva
-    956, 100,  // enf -> Cyrl
-    960, 100,  // enh -> Cyrl
-    964, 500,  // era -> Taml
-    968, 135,  // esg -> Gonm
-    972, 10,  // esh -> Arab
-    976, 200,  // ett -> Ital
-    980, 100,  // eve -> Cyrl
-    984, 100,  // evn -> Cyrl
-    988, 10,  // fa -> Arab
-    991, 10,  // fay -> Arab
-    995, 10,  // faz -> Arab
-    999, 10,  // fia -> Arab
-    1003, 105,  // fmu -> Deva
-    1007, 10,  // fub -> Arab
-    1011, 175,  // gan -> Hans
-    1015, 385,  // gaq -> Orya
-    1019, 155,  // gas -> Gujr
-    1023, 515,  // gau -> Telu
-    1027, 385,  // gbj -> Orya
-    1031, 105,  // gbk -> Deva
-    1035, 155,  // gbl -> Gujr
-    1039, 105,  // gbm -> Deva
-    1043, 10,  // gbz -> Arab
-    1047, 385,  // gdb -> Orya
-    1051, 100,  // gdo -> Cyrl
-    1055, 105,  // gdx -> Deva
-    1059, 120,  // gez -> Ethi
-    1063, 10,  // ggg -> Arab
-    1067, 10,  // gha -> Arab
-    1071, 105,  // ghe -> Deva
-    1075, 10,  // ghr -> Arab
-    1079, 535,  // ght -> Tibt
-    1083, 10,  // gig -> Arab
-    1087, 100,  // gin -> Cyrl
-    1091, 10,  // gjk -> Arab
-    1095, 10,  // gju -> Arab
-    1099, 100,  // gld -> Cyrl
-    1103, 10,  // glh -> Arab
-    1107, 10,  // glk -> Arab
-    1111, 120,  // gmv -> Ethi
-    1115, 275,  // gmy -> Linb
-    1119, 535,  // goe -> Tibt
-    1123, 120,  // gof -> Ethi
-    1127, 105,  // gok -> Deva
-    1131, 105,  // gom -> Deva
-    1135, 515,  // gon -> Telu
-    1139, 140,  // got -> Goth
-    1143, 105,  // gra -> Deva
-    1147, 95,  // grc -> Cprt
-    1151, 45,  // grt -> Beng
-    1155, 120,  // gru -> Ethi
-    1159, 155,  // gu -> Gujr
-    1162, 105,  // gvr -> Deva
-    1166, 10,  // gwc -> Arab
-    1170, 10,  // gwf -> Arab
-    1174, 10,  // gwt -> Arab
-    1178, 105,  // gyo -> Deva
-    1182, 10,  // gzi -> Arab
-    1186, 10,  // ha_CM -> Arab
-    1192, 10,  // ha_SD -> Arab
-    1198, 10,  // hac -> Arab
-    1202, 175,  // hak -> Hans
-    1206, 120,  // har -> Ethi
-    1210, 10,  // haz -> Arab
-    1214, 185,  // hbo -> Hebr
-    1218, 120,  // hdy -> Ethi
-    1222, 185,  // he -> Hebr
-    1225, 105,  // hi -> Deva
-    1228, 485,  // hii -> Takr
-    1232, 570,  // hit -> Xsux
-    1236, 10,  // hkh -> Arab
-    1240, 105,  // hlb -> Deva
-    1244, 190,  // hlu -> Hluw
-    1248, 410,  // hmd -> Plrd
-    1252, 50,  // hmj -> Bopo
-    1256, 50,  // hmq -> Bopo
-    1260, 10,  // hnd -> Arab
-    1264, 105,  // hne -> Deva
-    1268, 195,  // hnj -> Hmnp
-    1272, 260,  // hnj_AU -> Laoo
-    1279, 260,  // hnj_CN -> Laoo
-    1286, 260,  // hnj_FR -> Laoo
-    1293, 260,  // hnj_GF -> Laoo
-    1300, 260,  // hnj_LA -> Laoo
-    1307, 260,  // hnj_MM -> Laoo
-    1314, 260,  // hnj_SR -> Laoo
-    1321, 260,  // hnj_TH -> Laoo
-    1328, 260,  // hnj_VN -> Laoo
-    1335, 10,  // hno -> Arab
-    1339, 105,  // hoc -> Deva
-    1343, 10,  // hoh -> Arab
-    1347, 105,  // hoj -> Deva
-    1351, 170,  // how -> Hani
-    1355, 105,  // hoy -> Deva
-    1359, 345,  // hpo -> Mymr
-    1363, 475,  // hrt -> Syrc
-    1367, 10,  // hrz -> Arab
-    1371, 175,  // hsn -> Hans
-    1375, 10,  // hss -> Arab
-    1379, 570,  // htx -> Xsux
-    1383, 105,  // hut -> Deva
-    1387, 185,  // huy -> Hebr
-    1391, 100,  // huz -> Cyrl
-    1395, 20,  // hy -> Armn
-    1398, 20,  // hyw -> Armn
-    1402, 575,  // ii -> Yiii
-    1405, 285,  // imy -> Lyci
-    1409, 100,  // inh -> Cyrl
-    1413, 345,  // int -> Mymr
-    1417, 120,  // ior -> Ethi
-    1421, 500,  // iru -> Taml
-    1425, 10,  // isk -> Arab
-    1429, 185,  // itk -> Hebr
-    1433, 100,  // itl -> Cyrl
-    1437, 65,  // iu -> Cans
-    1440, 185,  // iw -> Hebr
-    1443, 210,  // ja -> Jpan
-    1446, 10,  // jad -> Arab
-    1450, 10,  // jat -> Arab
-    1454, 185,  // jbe -> Hebr
-    1458, 10,  // jbn -> Arab
-    1462, 100,  // jct -> Cyrl
-    1466, 535,  // jda -> Tibt
-    1470, 10,  // jdg -> Arab
-    1474, 100,  // jdt -> Cyrl
-    1478, 105,  // jee -> Deva
-    1482, 125,  // jge -> Geor
-    1486, 185,  // ji -> Hebr
-    1489, 165,  // jje -> Hang
-    1493, 345,  // jkm -> Mymr
-    1497, 105,  // jml -> Deva
-    1501, 485,  // jna -> Takr
-    1505, 10,  // jnd -> Arab
-    1509, 105,  // jnl -> Deva
-    1513, 105,  // jns -> Deva
-    1517, 10,  // jog -> Arab
-    1521, 185,  // jpa -> Hebr
-    1525, 185,  // jpr -> Hebr
-    1529, 185,  // jrb -> Hebr
-    1533, 10,  // jrb_MA -> Arab
-    1540, 105,  // jul -> Deva
-    1544, 385,  // jun -> Orya
-    1548, 385,  // juy -> Orya
-    1552, 535,  // jya -> Tibt
-    1556, 185,  // jye -> Hebr
-    1560, 125,  // ka -> Geor
-    1563, 100,  // kaa -> Cyrl
-    1567, 100,  // kap -> Cyrl
-    1571, 225,  // kaw -> Kawi
-    1575, 100,  // kbd -> Cyrl
-    1579, 10,  // kbu -> Arab
-    1583, 10,  // kby -> Arab
-    1587, 100,  // kca -> Cyrl
-    1591, 45,  // kdq -> Beng
-    1595, 530,  // kdt -> Thai
-    1599, 100,  // ket -> Cyrl
-    1603, 105,  // kex -> Deva
-    1607, 515,  // key -> Telu
-    1611, 245,  // kfa -> Knda
-    1615, 105,  // kfb -> Deva
-    1619, 515,  // kfc -> Telu
-    1623, 245,  // kfd -> Knda
-    1627, 500,  // kfe -> Taml
-    1631, 320,  // kfh -> Mlym
-    1635, 500,  // kfi -> Taml
-    1639, 105,  // kfk -> Deva
-    1643, 10,  // kfm -> Arab
-    1647, 105,  // kfp -> Deva
-    1651, 105,  // kfq -> Deva
-    1655, 105,  // kfr -> Deva
-    1659, 105,  // kfs -> Deva
-    1663, 105,  // kfx -> Deva
-    1667, 105,  // kfy -> Deva
-    1671, 105,  // kgj -> Deva
-    1675, 105,  // kgy -> Deva
-    1679, 495,  // khb -> Talu
-    1683, 530,  // khf -> Thai
-    1687, 535,  // khg -> Tibt
-    1691, 105,  // khn -> Deva
-    1695, 345,  // kht -> Mymr
-    1699, 100,  // khv -> Cyrl
-    1703, 10,  // khw -> Arab
-    1707, 105,  // kif -> Deva
-    1711, 100,  // kim -> Cyrl
-    1715, 105,  // kip -> Deva
-    1719, 260,  // kjg -> Laoo
-    1723, 100,  // kjh -> Cyrl
-    1727, 105,  // kjl -> Deva
-    1731, 105,  // kjo -> Deva
-    1735, 345,  // kjp -> Mymr
-    1739, 530,  // kjt -> Thai
-    1743, 100,  // kk -> Cyrl
-    1746, 10,  // kk_AF -> Arab
-    1752, 10,  // kk_CN -> Arab
-    1758, 10,  // kk_IR -> Arab
-    1764, 10,  // kk_MN -> Arab
-    1770, 535,  // kkf -> Tibt
-    1774, 255,  // kkh -> Lana
-    1778, 105,  // kkt -> Deva
-    1782, 105,  // kle -> Deva
-    1786, 10,  // klj -> Arab
-    1790, 105,  // klr -> Deva
-    1794, 235,  // km -> Khmr
-    1797, 105,  // kmj -> Deva
-    1801, 10,  // kmz -> Arab
-    1805, 245,  // kn -> Knda
-    1808, 250,  // ko -> Kore
-    1811, 100,  // koi -> Cyrl
-    1815, 105,  // kok -> Deva
-    1819, 100,  // kpt -> Cyrl
-    1823, 100,  // kpy -> Cyrl
-    1827, 475,  // kqd -> Syrc
-    1831, 120,  // kqy -> Ethi
-    1835, 105,  // kra -> Deva
-    1839, 100,  // krc -> Cyrl
-    1843, 100,  // krk -> Cyrl
-    1847, 235,  // krr -> Khmr
-    1851, 105,  // kru -> Deva
-    1855, 235,  // krv -> Khmr
-    1859, 10,  // ks -> Arab
-    1862, 345,  // ksu -> Mymr
-    1866, 345,  // ksw -> Mymr
-    1870, 105,  // ksz -> Deva
-    1874, 120,  // ktb -> Ethi
-    1878, 10,  // ktl -> Arab
-    1882, 410,  // ktp -> Plrd
-    1886, 10,  // ku_LB -> Arab
-    1892, 260,  // kuf -> Laoo
-    1896, 100,  // kum -> Cyrl
-    1900, 100,  // kv -> Cyrl
-    1903, 100,  // kva -> Cyrl
-    1907, 345,  // kvq -> Mymr
-    1911, 345,  // kvt -> Mymr
-    1915, 10,  // kvx -> Arab
-    1919, 215,  // kvy -> Kali
-    1923, 345,  // kxf -> Mymr
-    1927, 345,  // kxk -> Mymr
-    1931, 530,  // kxm -> Thai
-    1935, 10,  // kxp -> Arab
-    1939, 100,  // ky -> Cyrl
-    1942, 10,  // ky_CN -> Arab
-    1948, 215,  // kyu -> Kali
-    1952, 105,  // kyv -> Deva
-    1956, 105,  // kyw -> Deva
-    1960, 270,  // lab -> Lina
-    1964, 185,  // lad -> Hebr
-    1968, 105,  // lae -> Deva
-    1972, 10,  // lah -> Arab
-    1976, 280,  // lbc -> Lisu
-    1980, 100,  // lbe -> Cyrl
-    1984, 105,  // lbf -> Deva
-    1988, 535,  // lbj -> Tibt
-    1992, 105,  // lbm -> Deva
-    1996, 260,  // lbo -> Laoo
-    2000, 105,  // lbr -> Deva
-    2004, 530,  // lcp -> Thai
-    2008, 265,  // lep -> Lepc
-    2012, 100,  // lez -> Cyrl
-    2016, 105,  // lhm -> Deva
-    2020, 475,  // lhs -> Syrc
-    2024, 105,  // lif -> Deva
-    2028, 280,  // lis -> Lisu
-    2032, 535,  // lkh -> Tibt
-    2036, 10,  // lki -> Arab
-    2040, 105,  // lmh -> Deva
-    2044, 515,  // lmn -> Telu
-    2048, 260,  // lo -> Laoo
-    2051, 105,  // loy -> Deva
-    2055, 410,  // lpo -> Plrd
-    2059, 10,  // lrc -> Arab
-    2063, 10,  // lrk -> Arab
-    2067, 10,  // lrl -> Arab
-    2071, 10,  // lsa -> Arab
-    2075, 185,  // lsd -> Hebr
-    2079, 10,  // lss -> Arab
-    2083, 535,  // luk -> Tibt
-    2087, 105,  // luu -> Deva
-    2091, 10,  // luv -> Arab
-    2095, 10,  // luz -> Arab
-    2099, 530,  // lwl -> Thai
-    2103, 530,  // lwm -> Thai
-    2107, 535,  // lya -> Tibt
-    2111, 175,  // lzh -> Hans
-    2115, 105,  // mag -> Deva
-    2119, 105,  // mai -> Deva
-    2123, 360,  // man_GN -> Nkoo
-    2130, 10,  // mby -> Arab
-    2134, 10,  // mde -> Arab
-    2138, 100,  // mdf -> Cyrl
-    2142, 120,  // mdx -> Ethi
-    2146, 120,  // mdy -> Ethi
-    2150, 10,  // mfa -> Arab
-    2154, 10,  // mfi -> Arab
-    2158, 105,  // mgp -> Deva
-    2162, 10,  // mhj -> Arab
-    2166, 295,  // mid -> Mand
-    2170, 105,  // mjl -> Deva
-    2174, 320,  // mjq -> Mlym
-    2178, 320,  // mjr -> Mlym
-    2182, 105,  // mjt -> Deva
-    2186, 515,  // mju -> Telu
-    2190, 320,  // mjv -> Mlym
-    2194, 105,  // mjz -> Deva
-    2198, 100,  // mk -> Cyrl
-    2201, 105,  // mkb -> Deva
-    2205, 105,  // mke -> Deva
-    2209, 10,  // mki -> Arab
-    2213, 530,  // mkm -> Thai
-    2217, 320,  // ml -> Mlym
-    2220, 530,  // mlf -> Thai
-    2224, 100,  // mn -> Cyrl
-    2227, 330,  // mn_CN -> Mong
-    2233, 45,  // mni -> Beng
-    2237, 10,  // mnj -> Arab
-    2241, 100,  // mns -> Cyrl
-    2245, 345,  // mnw -> Mymr
-    2249, 530,  // mpz -> Thai
-    2253, 105,  // mr -> Deva
-    2256, 530,  // mra -> Thai
-    2260, 105,  // mrd -> Deva
-    2264, 100,  // mrj -> Cyrl
-    2268, 335,  // mro -> Mroo
-    2272, 105,  // mrr -> Deva
-    2276, 10,  // ms_CC -> Arab
-    2282, 100,  // mtm -> Cyrl
-    2286, 105,  // mtr -> Deva
-    2290, 100,  // mud -> Cyrl
-    2294, 535,  // muk -> Tibt
-    2298, 105,  // mut -> Deva
-    2302, 500,  // muv -> Taml
-    2306, 120,  // muz -> Ethi
-    2310, 330,  // mvf -> Mong
-    2314, 10,  // mvy -> Arab
-    2318, 120,  // mvz -> Ethi
-    2322, 105,  // mwr -> Deva
-    2326, 345,  // mwt -> Mymr
-    2330, 195,  // mww -> Hmnp
-    2334, 345,  // my -> Mymr
-    2337, 120,  // mym -> Ethi
-    2341, 100,  // myv -> Cyrl
-    2345, 295,  // myz -> Mand
-    2349, 10,  // mzn -> Arab
-    2353, 175,  // nan -> Hans
-    2357, 105,  // nao -> Deva
-    2361, 105,  // ncd -> Deva
-    2365, 260,  // ncq -> Laoo
-    2369, 100,  // ndf -> Cyrl
-    2373, 105,  // ne -> Deva
-    2376, 100,  // neg -> Cyrl
-    2380, 535,  // neh -> Tibt
-    2384, 570,  // nei -> Xsux
-    2388, 105,  // new -> Deva
-    2392, 260,  // ngt -> Laoo
-    2396, 100,  // nio -> Cyrl
-    2400, 515,  // nit -> Telu
-    2404, 100,  // niv -> Cyrl
-    2408, 10,  // nli -> Arab
-    2412, 10,  // nlm -> Arab
-    2416, 105,  // nlx -> Deva
-    2420, 105,  // nmm -> Deva
-    2424, 560,  // nnp -> Wcho
-    2428, 255,  // nod -> Lana
-    2432, 105,  // noe -> Deva
-    2436, 100,  // nog -> Cyrl
-    2440, 105,  // noi -> Deva
-    2444, 430,  // non -> Runr
-    2448, 575,  // nos -> Yiii
-    2452, 535,  // npb -> Tibt
-    2456, 360,  // nqo -> Nkoo
-    2460, 575,  // nsd -> Yiii
-    2464, 575,  // nsf -> Yiii
-    2468, 65,  // nsk -> Cans
-    2472, 540,  // nst -> Tnsa
-    2476, 575,  // nsv -> Yiii
-    2480, 575,  // nty -> Yiii
-    2484, 10,  // ntz -> Arab
-    2488, 355,  // nwc -> Newa
-    2492, 105,  // nwx -> Deva
-    2496, 530,  // nyl -> Thai
-    2500, 10,  // nyq -> Arab
-    2504, 100,  // oaa -> Cyrl
-    2508, 100,  // oac -> Cyrl
-    2512, 475,  // oar -> Syrc
-    2516, 125,  // oav -> Geor
-    2520, 405,  // obm -> Phnx
-    2524, 345,  // obr -> Mymr
-    2528, 10,  // odk -> Arab
-    2532, 570,  // oht -> Xsux
-    2536, 65,  // oj -> Cans
-    2539, 65,  // ojs -> Cans
-    2543, 165,  // okm -> Hang
-    2547, 170,  // oko -> Hani
-    2551, 235,  // okz -> Khmr
-    2555, 105,  // ola -> Deva
-    2559, 535,  // ole -> Tibt
-    2563, 100,  // omk -> Cyrl
-    2567, 340,  // omp -> Mtei
-    2571, 325,  // omr -> Modi
-    2575, 105,  // oon -> Deva
-    2579, 385,  // or -> Orya
-    2582, 515,  // ort -> Telu
-    2586, 10,  // oru -> Arab
-    2590, 100,  // orv -> Cyrl
-    2594, 100,  // os -> Cyrl
-    2597, 390,  // osa -> Osge
-    2601, 200,  // osc -> Ital
-    2605, 205,  // osi -> Java
-    2609, 10,  // ota -> Arab
-    2613, 535,  // otb -> Tibt
-    2617, 380,  // otk -> Orkh
-    2621, 145,  // oty -> Gran
-    2625, 160,  // pa -> Guru
-    2628, 10,  // pa_PK -> Arab
-    2634, 400,  // pal -> Phli
-    2638, 100,  // paq -> Cyrl
-    2642, 10,  // pbt -> Arab
-    2646, 235,  // pcb -> Khmr
-    2650, 345,  // pce -> Mymr
-    2654, 320,  // pcf -> Mlym
-    2658, 320,  // pcg -> Mlym
-    2662, 105,  // pch -> Deva
-    2666, 105,  // pci -> Deva
-    2670, 515,  // pcj -> Telu
-    2674, 385,  // peg -> Orya
-    2678, 565,  // peo -> Xpeo
-    2682, 230,  // pgd -> Khar
-    2686, 105,  // pgg -> Deva
-    2690, 370,  // pgl -> Ogam
-    2694, 200,  // pgn -> Ital
-    2698, 105,  // phd -> Deva
-    2702, 345,  // phk -> Mymr
-    2706, 10,  // phl -> Arab
-    2710, 405,  // phn -> Phnx
-    2714, 260,  // pho -> Laoo
-    2718, 10,  // phr -> Arab
-    2722, 530,  // pht -> Thai
-    2726, 10,  // phv -> Arab
-    2730, 105,  // phw -> Deva
-    2734, 455,  // pi -> Sinh
-    2737, 55,  // pka -> Brah
-    2741, 320,  // pkr -> Mlym
-    2745, 10,  // plk -> Arab
-    2749, 345,  // pll -> Mymr
-    2753, 55,  // pmh -> Brah
-    2757, 150,  // pnt -> Grek
-    2761, 230,  // pra -> Khar
-    2765, 10,  // prc -> Arab
-    2769, 10,  // prd -> Arab
-    2773, 155,  // prp -> Gujr
-    2777, 530,  // prt -> Thai
-    2781, 10,  // prx -> Arab
-    2785, 10,  // ps -> Arab
-    2788, 10,  // psh -> Arab
-    2792, 10,  // psi -> Arab
-    2796, 10,  // pst -> Arab
-    2800, 105,  // pum -> Deva
-    2804, 345,  // pwo -> Mymr
-    2808, 105,  // pwr -> Deva
-    2812, 530,  // pww -> Thai
-    2816, 345,  // pyx -> Mymr
-    2820, 10,  // qxq -> Arab
-    2824, 105,  // raa -> Deva
-    2828, 105,  // rab -> Deva
-    2832, 105,  // raf -> Deva
-    2836, 45,  // rah -> Beng
-    2840, 105,  // raj -> Deva
-    2844, 105,  // rav -> Deva
-    2848, 345,  // rbb -> Mymr
-    2852, 10,  // rdb -> Arab
-    2856, 385,  // rei -> Orya
-    2860, 425,  // rhg -> Rohg
-    2864, 105,  // rji -> Deva
-    2868, 105,  // rjs -> Deva
-    2872, 235,  // rka -> Khmr
-    2876, 345,  // rki -> Mymr
-    2880, 45,  // rkt -> Beng
-    2884, 20,  // rmi -> Armn
-    2888, 10,  // rmt -> Arab
-    2892, 345,  // rmz -> Mymr
-    2896, 100,  // rom_BG -> Cyrl
-    2903, 100,  // rsk -> Cyrl
-    2907, 105,  // rtw -> Deva
-    2911, 100,  // ru -> Cyrl
-    2914, 100,  // rue -> Cyrl
-    2918, 100,  // rut -> Cyrl
-    2922, 105,  // rwr -> Deva
-    2926, 220,  // ryu -> Kana
-    2930, 105,  // sa -> Deva
-    2933, 100,  // sah -> Cyrl
-    2937, 435,  // sam -> Samr
-    2941, 375,  // sat -> Olck
-    2945, 445,  // saz -> Saur
-    2949, 10,  // sbn -> Arab
-    2953, 535,  // sbu -> Tibt
-    2957, 105,  // sck -> Deva
-    2961, 10,  // scl -> Arab
-    2965, 10,  // scl_IN -> Arab
-    2972, 105,  // scp -> Deva
-    2976, 260,  // sct -> Laoo
-    2980, 485,  // scu -> Takr
-    2984, 150,  // scx -> Grek
-    2988, 10,  // sd -> Arab
-    2991, 105,  // sd_IN -> Deva
-    2997, 10,  // sdb -> Arab
-    3001, 10,  // sdf -> Arab
-    3005, 10,  // sdg -> Arab
-    3009, 10,  // sdh -> Arab
-    3013, 10,  // sds -> Arab
-    3017, 100,  // sel -> Cyrl
-    3021, 410,  // sfm -> Plrd
-    3025, 370,  // sga -> Ogam
-    3029, 100,  // sgh -> Cyrl
-    3033, 105,  // sgj -> Deva
-    3037, 10,  // sgr -> Arab
-    3041, 535,  // sgt -> Tibt
-    3045, 120,  // sgw -> Ethi
-    3049, 10,  // sgy -> Arab
-    3053, 10,  // shd -> Arab
-    3057, 520,  // shi -> Tfng
-    3061, 10,  // shm -> Arab
-    3065, 345,  // shn -> Mymr
-    3069, 10,  // shu -> Arab
-    3073, 10,  // shv -> Arab
-    3077, 455,  // si -> Sinh
-    3080, 100,  // sia -> Cyrl
-    3084, 535,  // sip -> Tibt
-    3088, 10,  // siy -> Arab
-    3092, 10,  // siz -> Arab
-    3096, 100,  // sjd -> Cyrl
-    3100, 105,  // sjp -> Deva
-    3104, 100,  // sjt -> Cyrl
-    3108, 530,  // skb -> Thai
-    3112, 105,  // skj -> Deva
-    3116, 10,  // skr -> Arab
-    3120, 10,  // slq -> Arab
-    3124, 575,  // smh -> Yiii
-    3128, 435,  // smp -> Samr
-    3132, 235,  // smu -> Khmr
-    3136, 10,  // smy -> Arab
-    3140, 510,  // soa -> Tavt
-    3144, 460,  // sog -> Sogd
-    3148, 105,  // soi -> Deva
-    3152, 530,  // sou -> Thai
-    3156, 535,  // spt -> Tibt
-    3160, 385,  // spv -> Orya
-    3164, 10,  // sqo -> Arab
-    3168, 260,  // sqq -> Laoo
-    3172, 10,  // sqt -> Arab
-    3176, 100,  // sr -> Cyrl
-    3179, 465,  // srb -> Sora
-    3183, 10,  // srh -> Arab
-    3187, 105,  // srx -> Deva
-    3191, 10,  // srz -> Arab
-    3195, 10,  // ssh -> Arab
-    3199, 260,  // sss -> Laoo
-    3203, 10,  // sts -> Arab
-    3207, 120,  // stv -> Ethi
-    3211, 100,  // sty -> Cyrl
-    3215, 105,  // suz -> Deva
-    3219, 125,  // sva -> Geor
-    3223, 10,  // swb -> Arab
-    3227, 170,  // swi -> Hani
-    3231, 105,  // swv -> Deva
-    3235, 475,  // syc -> Syrc
-    3239, 45,  // syl -> Beng
-    3243, 475,  // syn -> Syrc
-    3247, 475,  // syr -> Syrc
-    3251, 105,  // syw -> Deva
-    3255, 500,  // ta -> Taml
-    3258, 100,  // tab -> Cyrl
-    3262, 105,  // taj -> Deva
-    3266, 480,  // tbk -> Tagb
-    3270, 535,  // tcn -> Tibt
-    3274, 345,  // tco -> Mymr
-    3278, 500,  // tcx -> Taml
-    3282, 245,  // tcy -> Knda
-    3286, 520,  // tda -> Tfng
-    3290, 105,  // tdb -> Deva
-    3294, 490,  // tdd -> Tale
-    3298, 105,  // tdg -> Deva
-    3302, 105,  // tdh -> Deva
-    3306, 515,  // te -> Telu
-    3309, 205,  // tes -> Java
-    3313, 100,  // tg -> Cyrl
-    3316, 10,  // tg_PK -> Arab
-    3322, 105,  // tge -> Deva
-    3326, 535,  // tgf -> Tibt
-    3330, 530,  // th -> Thai
-    3333, 105,  // the -> Deva
-    3337, 105,  // thf -> Deva
-    3341, 490,  // thi -> Tale
-    3345, 105,  // thl -> Deva
-    3349, 530,  // thm -> Thai
-    3353, 105,  // thq -> Deva
-    3357, 105,  // thr -> Deva
-    3361, 105,  // ths -> Deva
-    3365, 120,  // ti -> Ethi
-    3368, 120,  // tig -> Ethi
-    3372, 105,  // tij -> Deva
-    3376, 100,  // tin -> Cyrl
-    3380, 345,  // tjl -> Mymr
-    3384, 10,  // tjo -> Arab
-    3388, 105,  // tkb -> Deva
-    3392, 10,  // tks -> Arab
-    3396, 105,  // tkt -> Deva
-    3400, 105,  // tmk -> Deva
-    3404, 475,  // tmr -> Syrc
-    3408, 60,  // tnv -> Cakm
-    3412, 10,  // tov -> Arab
-    3416, 235,  // tpu -> Khmr
-    3420, 10,  // tra -> Arab
-    3424, 185,  // trg -> Hebr
-    3428, 10,  // trm -> Arab
-    3432, 10,  // trw -> Arab
-    3436, 150,  // tsd -> Grek
-    3440, 535,  // tsj -> Tibt
-    3444, 100,  // tt -> Cyrl
-    3447, 260,  // tth -> Laoo
-    3451, 260,  // tto -> Laoo
-    3455, 530,  // tts -> Thai
-    3459, 345,  // tvn -> Mymr
-    3463, 105,  // twm -> Deva
-    3467, 505,  // txg -> Tang
-    3471, 545,  // txo -> Toto
-    3475, 510,  // tyr -> Tavt
-    3479, 100,  // tyv -> Cyrl
-    3483, 100,  // ude -> Cyrl
-    3487, 320,  // udg -> Mlym
-    3491, 0,  // udi -> Aghb
-    3495, 100,  // udm -> Cyrl
-    3499, 10,  // ug -> Arab
-    3502, 100,  // ug_KZ -> Cyrl
-    3508, 100,  // ug_MN -> Cyrl
-    3514, 550,  // uga -> Ugar
-    3518, 100,  // ugh -> Cyrl
-    3522, 530,  // ugo -> Thai
-    3526, 100,  // uk -> Cyrl
-    3529, 385,  // uki -> Orya
-    3533, 100,  // ulc -> Cyrl
-    3537, 45,  // unr -> Beng
-    3541, 105,  // unr_NP -> Deva
-    3548, 45,  // unx -> Beng
-    3552, 10,  // ur -> Arab
-    3555, 530,  // urk -> Thai
-    3559, 10,  // ush -> Arab
-    3563, 150,  // uum -> Grek
-    3567, 10,  // uz_AF -> Arab
-    3573, 100,  // uz_CN -> Cyrl
-    3579, 10,  // uzs -> Arab
-    3583, 500,  // vaa -> Taml
-    3587, 10,  // vaf -> Arab
-    3591, 105,  // vah -> Deva
-    3595, 555,  // vai -> Vaii
-    3599, 105,  // vas -> Deva
-    3603, 105,  // vav -> Deva
-    3607, 105,  // vay -> Deva
-    3611, 10,  // vgr -> Arab
-    3615, 245,  // vmd -> Knda
-    3619, 10,  // vmh -> Arab
-    3623, 120,  // wal -> Ethi
-    3627, 10,  // wbk -> Arab
-    3631, 515,  // wbq -> Telu
-    3635, 105,  // wbr -> Deva
-    3639, 10,  // wlo -> Arab
-    3643, 105,  // wme -> Deva
-    3647, 10,  // wne -> Arab
-    3651, 10,  // wni -> Arab
-    3655, 130,  // wsg -> Gong
-    3659, 10,  // wsv -> Arab
-    3663, 105,  // wtm -> Deva
-    3667, 175,  // wuu -> Hans
-    3671, 100,  // xal -> Cyrl
-    3675, 120,  // xan -> Ethi
-    3679, 100,  // xas -> Cyrl
-    3683, 85,  // xco -> Chrs
-    3687, 70,  // xcr -> Cari
-    3691, 100,  // xdq -> Cyrl
-    3695, 10,  // xhe -> Arab
-    3699, 235,  // xhm -> Khmr
-    3703, 385,  // xis -> Orya
-    3707, 10,  // xka -> Arab
-    3711, 10,  // xkc -> Arab
-    3715, 10,  // xkj -> Arab
-    3719, 10,  // xkp -> Arab
-    3723, 285,  // xlc -> Lyci
-    3727, 290,  // xld -> Lydi
-    3731, 115,  // xly -> Elym
-    3735, 125,  // xmf -> Geor
-    3739, 300,  // xmn -> Mani
-    3743, 315,  // xmr -> Merc
-    3747, 350,  // xna -> Narb
-    3751, 105,  // xnr -> Deva
-    3755, 150,  // xpg -> Grek
-    3759, 370,  // xpi -> Ogam
-    3763, 100,  // xpm -> Cyrl
-    3767, 415,  // xpr -> Prti
-    3771, 100,  // xrm -> Cyrl
-    3775, 100,  // xrn -> Cyrl
-    3779, 440,  // xsa -> Sarb
-    3783, 105,  // xsr -> Deva
-    3787, 100,  // xss -> Cyrl
-    3791, 500,  // xub -> Taml
-    3795, 500,  // xuj -> Taml
-    3799, 200,  // xve -> Ital
-    3803, 10,  // xvi -> Arab
-    3807, 100,  // xwo -> Cyrl
-    3811, 305,  // xzh -> Marc
-    3815, 100,  // yai -> Cyrl
-    3819, 105,  // ybh -> Deva
-    3823, 105,  // ybi -> Deva
-    3827, 10,  // ydg -> Arab
-    3831, 320,  // yea -> Mlym
-    3835, 150,  // yej -> Grek
-    3839, 515,  // yeu -> Telu
-    3843, 410,  // ygp -> Plrd
-    3847, 185,  // yhd -> Hebr
-    3851, 185,  // yi -> Hebr
-    3854, 575,  // yig -> Yiii
-    3858, 185,  // yih -> Hebr
-    3862, 575,  // yiv -> Yiii
-    3866, 100,  // ykg -> Cyrl
-    3870, 410,  // yna -> Plrd
-    3874, 100,  // ynk -> Cyrl
-    3878, 210,  // yoi -> Jpan
-    3882, 530,  // yoy -> Thai
-    3886, 100,  // yrk -> Cyrl
-    3890, 575,  // ysd -> Yiii
-    3894, 575,  // ysn -> Yiii
-    3898, 575,  // ysp -> Yiii
-    3902, 100,  // ysr -> Cyrl
-    3906, 410,  // ysy -> Plrd
-    3910, 185,  // yud -> Hebr
-    3914, 180,  // yue -> Hant
-    3918, 175,  // yue_CN -> Hans
-    3925, 100,  // yug -> Cyrl
-    3929, 100,  // yux -> Cyrl
-    3933, 410,  // ywq -> Plrd
-    3937, 410,  // ywu -> Plrd
-    3941, 535,  // zau -> Tibt
-    3945, 10,  // zba -> Arab
-    3949, 170,  // zch -> Hani
-    3953, 10,  // zdj -> Arab
-    3957, 170,  // zeh -> Hani
-    3961, 520,  // zen -> Tfng
-    3965, 170,  // zgb -> Hani
-    3969, 520,  // zgh -> Tfng
-    3973, 170,  // zgm -> Hani
-    3977, 170,  // zgn -> Hani
-    3981, 175,  // zh -> Hans
-    3984, 180,  // zh_AU -> Hant
-    3990, 180,  // zh_BN -> Hant
-    3996, 180,  // zh_GB -> Hant
-    4002, 180,  // zh_GF -> Hant
-    4008, 180,  // zh_HK -> Hant
-    4014, 180,  // zh_ID -> Hant
-    4020, 180,  // zh_MO -> Hant
-    4026, 180,  // zh_PA -> Hant
-    4032, 180,  // zh_PF -> Hant
-    4038, 180,  // zh_PH -> Hant
-    4044, 180,  // zh_SR -> Hant
-    4050, 180,  // zh_TH -> Hant
-    4056, 180,  // zh_TW -> Hant
-    4062, 180,  // zh_US -> Hant
-    4068, 180,  // zh_VN -> Hant
-    4074, 170,  // zhd -> Hani
-    4078, 365,  // zhx -> Nshu
-    4082, 100,  // zkb -> Cyrl
-    4086, 100,  // zko -> Cyrl
-    4090, 240,  // zkt -> Kits
-    4094, 100,  // zkz -> Cyrl
-    4098, 170,  // zlj -> Hani
-    4102, 170,  // zln -> Hani
-    4106, 170,  // zlq -> Hani
-    4110, 170,  // zqe -> Hani
-    4114, 185,  // zrp -> Hebr
-    4118, 10,  // zum -> Arab
-    4122, 170,  // zyg -> Hani
-    4126, 170,  // zyn -> Hani
-    4130, 170,  // zzj -> Hani
+    122, 570,  // akk -> Xsux
+    126, 100,  // akv -> Cyrl
+    130, 260,  // alk -> Laoo
+    134, 320,  // all -> Mlym
+    138, 100,  // alr -> Cyrl
+    142, 100,  // alt -> Cyrl
+    146, 120,  // alw -> Ethi
+    150, 120,  // am -> Ethi
+    153, 210,  // ams -> Jpan
+    157, 475,  // amw -> Syrc
+    161, 100,  // ani -> Cyrl
+    165, 105,  // anp -> Deva
+    169, 105,  // anr -> Deva
+    173, 120,  // anu -> Ethi
+    177, 45,  // aot -> Beng
+    181, 10,  // apc -> Arab
+    185, 10,  // apd -> Arab
+    189, 105,  // aph -> Deva
+    193, 100,  // aqc -> Cyrl
+    197, 10,  // ar -> Arab
+    200, 15,  // arc -> Armi
+    204, 10,  // arq -> Arab
+    208, 10,  // ars -> Arab
+    212, 10,  // ary -> Arab
+    216, 10,  // arz -> Arab
+    220, 45,  // as -> Beng
+    223, 450,  // ase -> Sgnw
+    227, 10,  // ask -> Arab
+    231, 10,  // atn -> Arab
+    235, 100,  // atv -> Cyrl
+    239, 10,  // auj -> Arab
+    243, 10,  // auz -> Arab
+    247, 100,  // av -> Cyrl
+    250, 10,  // avd -> Arab
+    254, 10,  // avl -> Arab
+    258, 105,  // awa -> Deva
+    262, 120,  // awn -> Ethi
+    266, 20,  // axm -> Armn
+    270, 10,  // ayh -> Arab
+    274, 10,  // ayl -> Arab
+    278, 10,  // ayn -> Arab
+    282, 10,  // ayp -> Arab
+    286, 10,  // az_IQ -> Arab
+    292, 10,  // az_IR -> Arab
+    298, 100,  // az_RU -> Cyrl
+    304, 10,  // azb -> Arab
+    308, 100,  // ba -> Cyrl
+    311, 10,  // bal -> Arab
+    315, 105,  // bap -> Deva
+    319, 30,  // bax -> Bamu
+    323, 125,  // bbl -> Geor
+    327, 120,  // bcq -> Ethi
+    331, 385,  // bdv -> Orya
+    335, 10,  // bdz -> Arab
+    339, 100,  // be -> Cyrl
+    342, 105,  // bee -> Deva
+    346, 10,  // bej -> Arab
+    350, 105,  // bfb -> Deva
+    354, 500,  // bfq -> Taml
+    358, 10,  // bft -> Arab
+    362, 535,  // bfu -> Tibt
+    366, 385,  // bfw -> Orya
+    370, 105,  // bfy -> Deva
+    374, 105,  // bfz -> Deva
+    378, 100,  // bg -> Cyrl
+    381, 105,  // bgc -> Deva
+    385, 105,  // bgd -> Deva
+    389, 10,  // bgn -> Arab
+    393, 10,  // bgp -> Arab
+    397, 105,  // bgq -> Deva
+    401, 105,  // bgw -> Deva
+    405, 150,  // bgx -> Grek
+    409, 105,  // bha -> Deva
+    413, 105,  // bhb -> Deva
+    417, 105,  // bhd -> Deva
+    421, 10,  // bhe -> Arab
+    425, 100,  // bhh -> Cyrl
+    429, 105,  // bhi -> Deva
+    433, 105,  // bhj -> Deva
+    437, 10,  // bhm -> Arab
+    441, 475,  // bhn -> Syrc
+    445, 105,  // bho -> Deva
+    449, 485,  // bht -> Takr
+    453, 105,  // bhu -> Deva
+    457, 105,  // biy -> Deva
+    461, 475,  // bjf -> Syrc
+    465, 105,  // bjj -> Deva
+    469, 10,  // bjm -> Arab
+    473, 345,  // blk -> Mymr
+    477, 510,  // blt -> Tavt
+    481, 105,  // bmj -> Deva
+    485, 45,  // bn -> Beng
+    488, 105,  // bns -> Deva
+    492, 535,  // bo -> Tibt
+    495, 100,  // bph -> Cyrl
+    499, 105,  // bpx -> Deva
+    503, 45,  // bpy -> Beng
+    507, 10,  // bqi -> Arab
+    511, 105,  // bra -> Deva
+    515, 235,  // brb -> Khmr
+    519, 105,  // brd -> Deva
+    523, 10,  // brh -> Arab
+    527, 10,  // brk -> Arab
+    531, 260,  // brv -> Laoo
+    535, 105,  // brx -> Deva
+    539, 10,  // bsh -> Arab
+    543, 10,  // bsk -> Arab
+    547, 35,  // bsq -> Bass
+    551, 120,  // bst -> Ethi
+    555, 40,  // btd -> Batk
+    559, 40,  // btm -> Batk
+    563, 105,  // btv -> Deva
+    567, 100,  // bua -> Cyrl
+    571, 345,  // bwe -> Mymr
+    575, 100,  // bxm -> Cyrl
+    579, 330,  // bxu -> Mong
+    583, 105,  // byh -> Deva
+    587, 120,  // byn -> Ethi
+    591, 105,  // byw -> Deva
+    595, 530,  // bzi -> Thai
+    599, 530,  // cbn -> Thai
+    603, 60,  // ccp -> Cakm
+    607, 515,  // cde -> Telu
+    611, 105,  // cdh -> Deva
+    615, 155,  // cdi -> Gujr
+    619, 105,  // cdj -> Deva
+    623, 105,  // cdm -> Deva
+    627, 175,  // cdo -> Hans
+    631, 45,  // cdz -> Beng
+    635, 100,  // ce -> Cyrl
+    638, 535,  // cgk -> Tibt
+    642, 10,  // chg -> Arab
+    646, 100,  // chm -> Cyrl
+    650, 80,  // chr -> Cher
+    654, 105,  // chx -> Deva
+    658, 105,  // cih -> Deva
+    662, 10,  // cja -> Arab
+    666, 100,  // cji -> Cyrl
+    670, 75,  // cjm -> Cham
+    674, 175,  // cjy -> Hans
+    678, 10,  // ckb -> Arab
+    682, 100,  // ckt -> Cyrl
+    686, 10,  // clh -> Arab
+    690, 100,  // clw -> Cyrl
+    694, 470,  // cmg -> Soyo
+    698, 535,  // cna -> Tibt
+    702, 175,  // cnp -> Hans
+    706, 530,  // cog -> Thai
+    710, 90,  // cop -> Copt
+    714, 150,  // cpg -> Grek
+    718, 65,  // cr -> Cans
+    721, 100,  // crh -> Cyrl
+    725, 65,  // crj -> Cans
+    729, 65,  // crk -> Cans
+    733, 65,  // crl -> Cans
+    737, 65,  // crm -> Cans
+    741, 345,  // csh -> Mymr
+    745, 175,  // csp -> Hans
+    749, 65,  // csw -> Cans
+    753, 395,  // ctd -> Pauc
+    757, 45,  // ctg -> Beng
+    761, 105,  // ctn -> Deva
+    765, 500,  // ctt -> Taml
+    769, 100,  // cu -> Cyrl
+    772, 255,  // cuu -> Lana
+    776, 100,  // cv -> Cyrl
+    779, 175,  // czh -> Hans
+    783, 185,  // czk -> Hebr
+    787, 105,  // daq -> Deva
+    791, 100,  // dar -> Cyrl
+    795, 10,  // dcc -> Arab
+    799, 100,  // ddo -> Cyrl
+    803, 10,  // def -> Arab
+    807, 10,  // deh -> Arab
+    811, 45,  // der -> Beng
+    815, 105,  // dhi -> Deva
+    819, 155,  // dhn -> Gujr
+    823, 105,  // dho -> Deva
+    827, 105,  // dhw -> Deva
+    831, 535,  // dka -> Tibt
+    835, 100,  // dlg -> Cyrl
+    839, 310,  // dmf -> Medf
+    843, 10,  // dmk -> Arab
+    847, 10,  // dml -> Arab
+    851, 100,  // dng -> Cyrl
+    855, 345,  // dnu -> Mymr
+    859, 345,  // dnv -> Mymr
+    863, 105,  // doi -> Deva
+    867, 120,  // dox -> Ethi
+    871, 535,  // dre -> Tibt
+    875, 105,  // drq -> Deva
+    879, 120,  // drs -> Ethi
+    883, 105,  // dry -> Deva
+    887, 385,  // dso -> Orya
+    891, 105,  // dty -> Deva
+    895, 155,  // dub -> Gujr
+    899, 105,  // duh -> Deva
+    903, 105,  // dus -> Deva
+    907, 525,  // dv -> Thaa
+    910, 385,  // dwk -> Orya
+    914, 105,  // dwz -> Deva
+    918, 535,  // dz -> Tibt
+    921, 535,  // dzl -> Tibt
+    925, 150,  // ecr -> Grek
+    929, 95,  // ecy -> Cprt
+    933, 110,  // egy -> Egyp
+    937, 215,  // eky -> Kali
+    941, 150,  // el -> Grek
+    944, 105,  // emg -> Deva
+    948, 105,  // emu -> Deva
+    952, 100,  // enf -> Cyrl
+    956, 100,  // enh -> Cyrl
+    960, 500,  // era -> Taml
+    964, 135,  // esg -> Gonm
+    968, 10,  // esh -> Arab
+    972, 200,  // ett -> Ital
+    976, 100,  // eve -> Cyrl
+    980, 100,  // evn -> Cyrl
+    984, 10,  // fa -> Arab
+    987, 10,  // fay -> Arab
+    991, 10,  // faz -> Arab
+    995, 10,  // fia -> Arab
+    999, 105,  // fmu -> Deva
+    1003, 10,  // fub -> Arab
+    1007, 175,  // gan -> Hans
+    1011, 385,  // gaq -> Orya
+    1015, 155,  // gas -> Gujr
+    1019, 515,  // gau -> Telu
+    1023, 385,  // gbj -> Orya
+    1027, 105,  // gbk -> Deva
+    1031, 155,  // gbl -> Gujr
+    1035, 105,  // gbm -> Deva
+    1039, 10,  // gbz -> Arab
+    1043, 385,  // gdb -> Orya
+    1047, 100,  // gdo -> Cyrl
+    1051, 105,  // gdx -> Deva
+    1055, 120,  // gez -> Ethi
+    1059, 10,  // ggg -> Arab
+    1063, 10,  // gha -> Arab
+    1067, 105,  // ghe -> Deva
+    1071, 10,  // ghr -> Arab
+    1075, 535,  // ght -> Tibt
+    1079, 10,  // gig -> Arab
+    1083, 100,  // gin -> Cyrl
+    1087, 10,  // gjk -> Arab
+    1091, 10,  // gju -> Arab
+    1095, 100,  // gld -> Cyrl
+    1099, 10,  // glh -> Arab
+    1103, 10,  // glk -> Arab
+    1107, 120,  // gmv -> Ethi
+    1111, 275,  // gmy -> Linb
+    1115, 535,  // goe -> Tibt
+    1119, 120,  // gof -> Ethi
+    1123, 105,  // gok -> Deva
+    1127, 105,  // gom -> Deva
+    1131, 515,  // gon -> Telu
+    1135, 140,  // got -> Goth
+    1139, 105,  // gra -> Deva
+    1143, 95,  // grc -> Cprt
+    1147, 45,  // grt -> Beng
+    1151, 120,  // gru -> Ethi
+    1155, 155,  // gu -> Gujr
+    1158, 105,  // gvr -> Deva
+    1162, 10,  // gwc -> Arab
+    1166, 10,  // gwf -> Arab
+    1170, 10,  // gwt -> Arab
+    1174, 105,  // gyo -> Deva
+    1178, 10,  // gzi -> Arab
+    1182, 10,  // ha_CM -> Arab
+    1188, 10,  // ha_SD -> Arab
+    1194, 10,  // hac -> Arab
+    1198, 175,  // hak -> Hans
+    1202, 120,  // har -> Ethi
+    1206, 10,  // haz -> Arab
+    1210, 185,  // hbo -> Hebr
+    1214, 120,  // hdy -> Ethi
+    1218, 185,  // he -> Hebr
+    1221, 105,  // hi -> Deva
+    1224, 485,  // hii -> Takr
+    1228, 570,  // hit -> Xsux
+    1232, 10,  // hkh -> Arab
+    1236, 105,  // hlb -> Deva
+    1240, 190,  // hlu -> Hluw
+    1244, 410,  // hmd -> Plrd
+    1248, 50,  // hmj -> Bopo
+    1252, 50,  // hmq -> Bopo
+    1256, 10,  // hnd -> Arab
+    1260, 105,  // hne -> Deva
+    1264, 195,  // hnj -> Hmnp
+    1268, 260,  // hnj_AU -> Laoo
+    1275, 260,  // hnj_CN -> Laoo
+    1282, 260,  // hnj_FR -> Laoo
+    1289, 260,  // hnj_GF -> Laoo
+    1296, 260,  // hnj_LA -> Laoo
+    1303, 260,  // hnj_MM -> Laoo
+    1310, 260,  // hnj_SR -> Laoo
+    1317, 260,  // hnj_TH -> Laoo
+    1324, 260,  // hnj_VN -> Laoo
+    1331, 10,  // hno -> Arab
+    1335, 105,  // hoc -> Deva
+    1339, 10,  // hoh -> Arab
+    1343, 105,  // hoj -> Deva
+    1347, 170,  // how -> Hani
+    1351, 105,  // hoy -> Deva
+    1355, 345,  // hpo -> Mymr
+    1359, 475,  // hrt -> Syrc
+    1363, 10,  // hrz -> Arab
+    1367, 175,  // hsn -> Hans
+    1371, 10,  // hss -> Arab
+    1375, 570,  // htx -> Xsux
+    1379, 105,  // hut -> Deva
+    1383, 185,  // huy -> Hebr
+    1387, 100,  // huz -> Cyrl
+    1391, 20,  // hy -> Armn
+    1394, 20,  // hyw -> Armn
+    1398, 575,  // ii -> Yiii
+    1401, 285,  // imy -> Lyci
+    1405, 100,  // inh -> Cyrl
+    1409, 345,  // int -> Mymr
+    1413, 120,  // ior -> Ethi
+    1417, 500,  // iru -> Taml
+    1421, 10,  // isk -> Arab
+    1425, 185,  // itk -> Hebr
+    1429, 100,  // itl -> Cyrl
+    1433, 65,  // iu -> Cans
+    1436, 185,  // iw -> Hebr
+    1439, 210,  // ja -> Jpan
+    1442, 10,  // jad -> Arab
+    1446, 10,  // jat -> Arab
+    1450, 185,  // jbe -> Hebr
+    1454, 10,  // jbn -> Arab
+    1458, 100,  // jct -> Cyrl
+    1462, 535,  // jda -> Tibt
+    1466, 10,  // jdg -> Arab
+    1470, 100,  // jdt -> Cyrl
+    1474, 105,  // jee -> Deva
+    1478, 125,  // jge -> Geor
+    1482, 185,  // ji -> Hebr
+    1485, 165,  // jje -> Hang
+    1489, 345,  // jkm -> Mymr
+    1493, 105,  // jml -> Deva
+    1497, 485,  // jna -> Takr
+    1501, 10,  // jnd -> Arab
+    1505, 105,  // jnl -> Deva
+    1509, 105,  // jns -> Deva
+    1513, 10,  // jog -> Arab
+    1517, 185,  // jpa -> Hebr
+    1521, 185,  // jpr -> Hebr
+    1525, 185,  // jrb -> Hebr
+    1529, 10,  // jrb_MA -> Arab
+    1536, 105,  // jul -> Deva
+    1540, 385,  // jun -> Orya
+    1544, 385,  // juy -> Orya
+    1548, 535,  // jya -> Tibt
+    1552, 185,  // jye -> Hebr
+    1556, 125,  // ka -> Geor
+    1559, 100,  // kaa -> Cyrl
+    1563, 100,  // kap -> Cyrl
+    1567, 225,  // kaw -> Kawi
+    1571, 100,  // kbd -> Cyrl
+    1575, 10,  // kbu -> Arab
+    1579, 10,  // kby -> Arab
+    1583, 100,  // kca -> Cyrl
+    1587, 45,  // kdq -> Beng
+    1591, 530,  // kdt -> Thai
+    1595, 100,  // ket -> Cyrl
+    1599, 105,  // kex -> Deva
+    1603, 515,  // key -> Telu
+    1607, 245,  // kfa -> Knda
+    1611, 105,  // kfb -> Deva
+    1615, 515,  // kfc -> Telu
+    1619, 245,  // kfd -> Knda
+    1623, 500,  // kfe -> Taml
+    1627, 320,  // kfh -> Mlym
+    1631, 500,  // kfi -> Taml
+    1635, 105,  // kfk -> Deva
+    1639, 10,  // kfm -> Arab
+    1643, 105,  // kfp -> Deva
+    1647, 105,  // kfq -> Deva
+    1651, 105,  // kfr -> Deva
+    1655, 105,  // kfs -> Deva
+    1659, 105,  // kfx -> Deva
+    1663, 105,  // kfy -> Deva
+    1667, 105,  // kgj -> Deva
+    1671, 105,  // kgy -> Deva
+    1675, 495,  // khb -> Talu
+    1679, 530,  // khf -> Thai
+    1683, 535,  // khg -> Tibt
+    1687, 105,  // khn -> Deva
+    1691, 345,  // kht -> Mymr
+    1695, 100,  // khv -> Cyrl
+    1699, 10,  // khw -> Arab
+    1703, 105,  // kif -> Deva
+    1707, 100,  // kim -> Cyrl
+    1711, 105,  // kip -> Deva
+    1715, 260,  // kjg -> Laoo
+    1719, 100,  // kjh -> Cyrl
+    1723, 105,  // kjl -> Deva
+    1727, 105,  // kjo -> Deva
+    1731, 345,  // kjp -> Mymr
+    1735, 530,  // kjt -> Thai
+    1739, 100,  // kk -> Cyrl
+    1742, 10,  // kk_AF -> Arab
+    1748, 10,  // kk_CN -> Arab
+    1754, 10,  // kk_IR -> Arab
+    1760, 10,  // kk_MN -> Arab
+    1766, 535,  // kkf -> Tibt
+    1770, 255,  // kkh -> Lana
+    1774, 105,  // kkt -> Deva
+    1778, 105,  // kle -> Deva
+    1782, 10,  // klj -> Arab
+    1786, 105,  // klr -> Deva
+    1790, 235,  // km -> Khmr
+    1793, 105,  // kmj -> Deva
+    1797, 10,  // kmz -> Arab
+    1801, 245,  // kn -> Knda
+    1804, 250,  // ko -> Kore
+    1807, 100,  // koi -> Cyrl
+    1811, 105,  // kok -> Deva
+    1815, 100,  // kpt -> Cyrl
+    1819, 100,  // kpy -> Cyrl
+    1823, 475,  // kqd -> Syrc
+    1827, 120,  // kqy -> Ethi
+    1831, 105,  // kra -> Deva
+    1835, 100,  // krc -> Cyrl
+    1839, 100,  // krk -> Cyrl
+    1843, 235,  // krr -> Khmr
+    1847, 105,  // kru -> Deva
+    1851, 235,  // krv -> Khmr
+    1855, 10,  // ks -> Arab
+    1858, 345,  // ksu -> Mymr
+    1862, 345,  // ksw -> Mymr
+    1866, 105,  // ksz -> Deva
+    1870, 120,  // ktb -> Ethi
+    1874, 10,  // ktl -> Arab
+    1878, 410,  // ktp -> Plrd
+    1882, 10,  // ku_LB -> Arab
+    1888, 260,  // kuf -> Laoo
+    1892, 100,  // kum -> Cyrl
+    1896, 100,  // kv -> Cyrl
+    1899, 100,  // kva -> Cyrl
+    1903, 345,  // kvq -> Mymr
+    1907, 345,  // kvt -> Mymr
+    1911, 10,  // kvx -> Arab
+    1915, 215,  // kvy -> Kali
+    1919, 345,  // kxf -> Mymr
+    1923, 345,  // kxk -> Mymr
+    1927, 530,  // kxm -> Thai
+    1931, 10,  // kxp -> Arab
+    1935, 100,  // ky -> Cyrl
+    1938, 10,  // ky_CN -> Arab
+    1944, 215,  // kyu -> Kali
+    1948, 105,  // kyv -> Deva
+    1952, 105,  // kyw -> Deva
+    1956, 270,  // lab -> Lina
+    1960, 185,  // lad -> Hebr
+    1964, 105,  // lae -> Deva
+    1968, 10,  // lah -> Arab
+    1972, 280,  // lbc -> Lisu
+    1976, 100,  // lbe -> Cyrl
+    1980, 105,  // lbf -> Deva
+    1984, 535,  // lbj -> Tibt
+    1988, 105,  // lbm -> Deva
+    1992, 260,  // lbo -> Laoo
+    1996, 105,  // lbr -> Deva
+    2000, 530,  // lcp -> Thai
+    2004, 265,  // lep -> Lepc
+    2008, 100,  // lez -> Cyrl
+    2012, 105,  // lhm -> Deva
+    2016, 475,  // lhs -> Syrc
+    2020, 105,  // lif -> Deva
+    2024, 280,  // lis -> Lisu
+    2028, 535,  // lkh -> Tibt
+    2032, 10,  // lki -> Arab
+    2036, 105,  // lmh -> Deva
+    2040, 515,  // lmn -> Telu
+    2044, 260,  // lo -> Laoo
+    2047, 105,  // loy -> Deva
+    2051, 410,  // lpo -> Plrd
+    2055, 10,  // lrc -> Arab
+    2059, 10,  // lrk -> Arab
+    2063, 10,  // lrl -> Arab
+    2067, 10,  // lsa -> Arab
+    2071, 185,  // lsd -> Hebr
+    2075, 10,  // lss -> Arab
+    2079, 535,  // luk -> Tibt
+    2083, 105,  // luu -> Deva
+    2087, 10,  // luv -> Arab
+    2091, 10,  // luz -> Arab
+    2095, 530,  // lwl -> Thai
+    2099, 530,  // lwm -> Thai
+    2103, 535,  // lya -> Tibt
+    2107, 175,  // lzh -> Hans
+    2111, 105,  // mag -> Deva
+    2115, 105,  // mai -> Deva
+    2119, 360,  // man_GN -> Nkoo
+    2126, 10,  // mby -> Arab
+    2130, 10,  // mde -> Arab
+    2134, 100,  // mdf -> Cyrl
+    2138, 120,  // mdx -> Ethi
+    2142, 120,  // mdy -> Ethi
+    2146, 10,  // mfa -> Arab
+    2150, 10,  // mfi -> Arab
+    2154, 105,  // mgp -> Deva
+    2158, 10,  // mhj -> Arab
+    2162, 295,  // mid -> Mand
+    2166, 105,  // mjl -> Deva
+    2170, 320,  // mjq -> Mlym
+    2174, 320,  // mjr -> Mlym
+    2178, 105,  // mjt -> Deva
+    2182, 515,  // mju -> Telu
+    2186, 320,  // mjv -> Mlym
+    2190, 105,  // mjz -> Deva
+    2194, 100,  // mk -> Cyrl
+    2197, 105,  // mkb -> Deva
+    2201, 105,  // mke -> Deva
+    2205, 10,  // mki -> Arab
+    2209, 530,  // mkm -> Thai
+    2213, 320,  // ml -> Mlym
+    2216, 530,  // mlf -> Thai
+    2220, 100,  // mn -> Cyrl
+    2223, 330,  // mn_CN -> Mong
+    2229, 45,  // mni -> Beng
+    2233, 10,  // mnj -> Arab
+    2237, 100,  // mns -> Cyrl
+    2241, 345,  // mnw -> Mymr
+    2245, 530,  // mpz -> Thai
+    2249, 105,  // mr -> Deva
+    2252, 530,  // mra -> Thai
+    2256, 105,  // mrd -> Deva
+    2260, 100,  // mrj -> Cyrl
+    2264, 335,  // mro -> Mroo
+    2268, 105,  // mrr -> Deva
+    2272, 10,  // ms_CC -> Arab
+    2278, 100,  // mtm -> Cyrl
+    2282, 105,  // mtr -> Deva
+    2286, 100,  // mud -> Cyrl
+    2290, 535,  // muk -> Tibt
+    2294, 105,  // mut -> Deva
+    2298, 500,  // muv -> Taml
+    2302, 120,  // muz -> Ethi
+    2306, 330,  // mvf -> Mong
+    2310, 10,  // mvy -> Arab
+    2314, 120,  // mvz -> Ethi
+    2318, 105,  // mwr -> Deva
+    2322, 345,  // mwt -> Mymr
+    2326, 195,  // mww -> Hmnp
+    2330, 345,  // my -> Mymr
+    2333, 120,  // mym -> Ethi
+    2337, 100,  // myv -> Cyrl
+    2341, 295,  // myz -> Mand
+    2345, 10,  // mzn -> Arab
+    2349, 175,  // nan -> Hans
+    2353, 105,  // nao -> Deva
+    2357, 105,  // ncd -> Deva
+    2361, 260,  // ncq -> Laoo
+    2365, 100,  // ndf -> Cyrl
+    2369, 105,  // ne -> Deva
+    2372, 100,  // neg -> Cyrl
+    2376, 535,  // neh -> Tibt
+    2380, 570,  // nei -> Xsux
+    2384, 105,  // new -> Deva
+    2388, 260,  // ngt -> Laoo
+    2392, 100,  // nio -> Cyrl
+    2396, 515,  // nit -> Telu
+    2400, 100,  // niv -> Cyrl
+    2404, 10,  // nli -> Arab
+    2408, 10,  // nlm -> Arab
+    2412, 105,  // nlx -> Deva
+    2416, 105,  // nmm -> Deva
+    2420, 560,  // nnp -> Wcho
+    2424, 255,  // nod -> Lana
+    2428, 105,  // noe -> Deva
+    2432, 100,  // nog -> Cyrl
+    2436, 105,  // noi -> Deva
+    2440, 430,  // non -> Runr
+    2444, 575,  // nos -> Yiii
+    2448, 535,  // npb -> Tibt
+    2452, 360,  // nqo -> Nkoo
+    2456, 575,  // nsd -> Yiii
+    2460, 575,  // nsf -> Yiii
+    2464, 65,  // nsk -> Cans
+    2468, 540,  // nst -> Tnsa
+    2472, 575,  // nsv -> Yiii
+    2476, 575,  // nty -> Yiii
+    2480, 10,  // ntz -> Arab
+    2484, 355,  // nwc -> Newa
+    2488, 105,  // nwx -> Deva
+    2492, 530,  // nyl -> Thai
+    2496, 10,  // nyq -> Arab
+    2500, 100,  // oaa -> Cyrl
+    2504, 100,  // oac -> Cyrl
+    2508, 475,  // oar -> Syrc
+    2512, 125,  // oav -> Geor
+    2516, 405,  // obm -> Phnx
+    2520, 345,  // obr -> Mymr
+    2524, 10,  // odk -> Arab
+    2528, 570,  // oht -> Xsux
+    2532, 65,  // oj -> Cans
+    2535, 65,  // ojs -> Cans
+    2539, 165,  // okm -> Hang
+    2543, 170,  // oko -> Hani
+    2547, 235,  // okz -> Khmr
+    2551, 105,  // ola -> Deva
+    2555, 535,  // ole -> Tibt
+    2559, 100,  // omk -> Cyrl
+    2563, 340,  // omp -> Mtei
+    2567, 325,  // omr -> Modi
+    2571, 105,  // oon -> Deva
+    2575, 385,  // or -> Orya
+    2578, 515,  // ort -> Telu
+    2582, 10,  // oru -> Arab
+    2586, 100,  // orv -> Cyrl
+    2590, 100,  // os -> Cyrl
+    2593, 390,  // osa -> Osge
+    2597, 200,  // osc -> Ital
+    2601, 205,  // osi -> Java
+    2605, 10,  // ota -> Arab
+    2609, 535,  // otb -> Tibt
+    2613, 380,  // otk -> Orkh
+    2617, 145,  // oty -> Gran
+    2621, 160,  // pa -> Guru
+    2624, 10,  // pa_PK -> Arab
+    2630, 400,  // pal -> Phli
+    2634, 100,  // paq -> Cyrl
+    2638, 10,  // pbt -> Arab
+    2642, 235,  // pcb -> Khmr
+    2646, 345,  // pce -> Mymr
+    2650, 320,  // pcf -> Mlym
+    2654, 320,  // pcg -> Mlym
+    2658, 105,  // pch -> Deva
+    2662, 105,  // pci -> Deva
+    2666, 515,  // pcj -> Telu
+    2670, 385,  // peg -> Orya
+    2674, 565,  // peo -> Xpeo
+    2678, 230,  // pgd -> Khar
+    2682, 105,  // pgg -> Deva
+    2686, 370,  // pgl -> Ogam
+    2690, 200,  // pgn -> Ital
+    2694, 105,  // phd -> Deva
+    2698, 345,  // phk -> Mymr
+    2702, 10,  // phl -> Arab
+    2706, 405,  // phn -> Phnx
+    2710, 260,  // pho -> Laoo
+    2714, 10,  // phr -> Arab
+    2718, 530,  // pht -> Thai
+    2722, 10,  // phv -> Arab
+    2726, 105,  // phw -> Deva
+    2730, 455,  // pi -> Sinh
+    2733, 55,  // pka -> Brah
+    2737, 320,  // pkr -> Mlym
+    2741, 10,  // plk -> Arab
+    2745, 345,  // pll -> Mymr
+    2749, 55,  // pmh -> Brah
+    2753, 150,  // pnt -> Grek
+    2757, 230,  // pra -> Khar
+    2761, 10,  // prc -> Arab
+    2765, 10,  // prd -> Arab
+    2769, 530,  // prt -> Thai
+    2773, 10,  // prx -> Arab
+    2777, 10,  // ps -> Arab
+    2780, 10,  // psh -> Arab
+    2784, 10,  // psi -> Arab
+    2788, 10,  // pst -> Arab
+    2792, 105,  // pum -> Deva
+    2796, 345,  // pwo -> Mymr
+    2800, 105,  // pwr -> Deva
+    2804, 530,  // pww -> Thai
+    2808, 345,  // pyx -> Mymr
+    2812, 10,  // qxq -> Arab
+    2816, 105,  // raa -> Deva
+    2820, 105,  // rab -> Deva
+    2824, 105,  // raf -> Deva
+    2828, 45,  // rah -> Beng
+    2832, 105,  // raj -> Deva
+    2836, 105,  // rav -> Deva
+    2840, 345,  // rbb -> Mymr
+    2844, 10,  // rdb -> Arab
+    2848, 385,  // rei -> Orya
+    2852, 425,  // rhg -> Rohg
+    2856, 105,  // rji -> Deva
+    2860, 105,  // rjs -> Deva
+    2864, 235,  // rka -> Khmr
+    2868, 345,  // rki -> Mymr
+    2872, 45,  // rkt -> Beng
+    2876, 20,  // rmi -> Armn
+    2880, 10,  // rmt -> Arab
+    2884, 345,  // rmz -> Mymr
+    2888, 100,  // rom_BG -> Cyrl
+    2895, 100,  // rsk -> Cyrl
+    2899, 105,  // rtw -> Deva
+    2903, 100,  // ru -> Cyrl
+    2906, 100,  // rue -> Cyrl
+    2910, 100,  // rut -> Cyrl
+    2914, 105,  // rwr -> Deva
+    2918, 220,  // ryu -> Kana
+    2922, 105,  // sa -> Deva
+    2925, 100,  // sah -> Cyrl
+    2929, 435,  // sam -> Samr
+    2933, 375,  // sat -> Olck
+    2937, 445,  // saz -> Saur
+    2941, 10,  // sbn -> Arab
+    2945, 535,  // sbu -> Tibt
+    2949, 105,  // sck -> Deva
+    2953, 10,  // scl -> Arab
+    2957, 10,  // scl_IN -> Arab
+    2964, 105,  // scp -> Deva
+    2968, 260,  // sct -> Laoo
+    2972, 485,  // scu -> Takr
+    2976, 150,  // scx -> Grek
+    2980, 10,  // sd -> Arab
+    2983, 105,  // sd_IN -> Deva
+    2989, 10,  // sdb -> Arab
+    2993, 10,  // sdf -> Arab
+    2997, 10,  // sdg -> Arab
+    3001, 10,  // sdh -> Arab
+    3005, 10,  // sds -> Arab
+    3009, 100,  // sel -> Cyrl
+    3013, 410,  // sfm -> Plrd
+    3017, 370,  // sga -> Ogam
+    3021, 100,  // sgh -> Cyrl
+    3025, 105,  // sgj -> Deva
+    3029, 10,  // sgr -> Arab
+    3033, 535,  // sgt -> Tibt
+    3037, 120,  // sgw -> Ethi
+    3041, 10,  // sgy -> Arab
+    3045, 10,  // shd -> Arab
+    3049, 520,  // shi -> Tfng
+    3053, 10,  // shm -> Arab
+    3057, 345,  // shn -> Mymr
+    3061, 10,  // shu -> Arab
+    3065, 10,  // shv -> Arab
+    3069, 455,  // si -> Sinh
+    3072, 100,  // sia -> Cyrl
+    3076, 535,  // sip -> Tibt
+    3080, 10,  // siy -> Arab
+    3084, 10,  // siz -> Arab
+    3088, 100,  // sjd -> Cyrl
+    3092, 105,  // sjp -> Deva
+    3096, 100,  // sjt -> Cyrl
+    3100, 530,  // skb -> Thai
+    3104, 105,  // skj -> Deva
+    3108, 10,  // skr -> Arab
+    3112, 575,  // smh -> Yiii
+    3116, 435,  // smp -> Samr
+    3120, 235,  // smu -> Khmr
+    3124, 10,  // smy -> Arab
+    3128, 510,  // soa -> Tavt
+    3132, 460,  // sog -> Sogd
+    3136, 105,  // soi -> Deva
+    3140, 530,  // sou -> Thai
+    3144, 535,  // spt -> Tibt
+    3148, 385,  // spv -> Orya
+    3152, 10,  // sqo -> Arab
+    3156, 260,  // sqq -> Laoo
+    3160, 10,  // sqt -> Arab
+    3164, 100,  // sr -> Cyrl
+    3167, 465,  // srb -> Sora
+    3171, 10,  // srh -> Arab
+    3175, 105,  // srx -> Deva
+    3179, 10,  // srz -> Arab
+    3183, 10,  // ssh -> Arab
+    3187, 260,  // sss -> Laoo
+    3191, 10,  // sts -> Arab
+    3195, 120,  // stv -> Ethi
+    3199, 100,  // sty -> Cyrl
+    3203, 105,  // suz -> Deva
+    3207, 125,  // sva -> Geor
+    3211, 10,  // swb -> Arab
+    3215, 170,  // swi -> Hani
+    3219, 105,  // swv -> Deva
+    3223, 475,  // syc -> Syrc
+    3227, 45,  // syl -> Beng
+    3231, 475,  // syn -> Syrc
+    3235, 475,  // syr -> Syrc
+    3239, 105,  // syw -> Deva
+    3243, 500,  // ta -> Taml
+    3246, 100,  // tab -> Cyrl
+    3250, 105,  // taj -> Deva
+    3254, 480,  // tbk -> Tagb
+    3258, 535,  // tcn -> Tibt
+    3262, 345,  // tco -> Mymr
+    3266, 500,  // tcx -> Taml
+    3270, 245,  // tcy -> Knda
+    3274, 520,  // tda -> Tfng
+    3278, 105,  // tdb -> Deva
+    3282, 490,  // tdd -> Tale
+    3286, 105,  // tdg -> Deva
+    3290, 105,  // tdh -> Deva
+    3294, 515,  // te -> Telu
+    3297, 205,  // tes -> Java
+    3301, 100,  // tg -> Cyrl
+    3304, 10,  // tg_PK -> Arab
+    3310, 105,  // tge -> Deva
+    3314, 535,  // tgf -> Tibt
+    3318, 530,  // th -> Thai
+    3321, 105,  // the -> Deva
+    3325, 105,  // thf -> Deva
+    3329, 490,  // thi -> Tale
+    3333, 105,  // thl -> Deva
+    3337, 530,  // thm -> Thai
+    3341, 105,  // thq -> Deva
+    3345, 105,  // thr -> Deva
+    3349, 105,  // ths -> Deva
+    3353, 120,  // ti -> Ethi
+    3356, 120,  // tig -> Ethi
+    3360, 105,  // tij -> Deva
+    3364, 100,  // tin -> Cyrl
+    3368, 345,  // tjl -> Mymr
+    3372, 10,  // tjo -> Arab
+    3376, 105,  // tkb -> Deva
+    3380, 10,  // tks -> Arab
+    3384, 105,  // tkt -> Deva
+    3388, 475,  // tmr -> Syrc
+    3392, 60,  // tnv -> Cakm
+    3396, 10,  // tov -> Arab
+    3400, 235,  // tpu -> Khmr
+    3404, 10,  // tra -> Arab
+    3408, 185,  // trg -> Hebr
+    3412, 10,  // trm -> Arab
+    3416, 10,  // trw -> Arab
+    3420, 150,  // tsd -> Grek
+    3424, 535,  // tsj -> Tibt
+    3428, 100,  // tt -> Cyrl
+    3431, 260,  // tth -> Laoo
+    3435, 260,  // tto -> Laoo
+    3439, 530,  // tts -> Thai
+    3443, 345,  // tvn -> Mymr
+    3447, 105,  // twm -> Deva
+    3451, 505,  // txg -> Tang
+    3455, 545,  // txo -> Toto
+    3459, 510,  // tyr -> Tavt
+    3463, 100,  // tyv -> Cyrl
+    3467, 100,  // ude -> Cyrl
+    3471, 320,  // udg -> Mlym
+    3475, 0,  // udi -> Aghb
+    3479, 100,  // udm -> Cyrl
+    3483, 10,  // ug -> Arab
+    3486, 100,  // ug_KZ -> Cyrl
+    3492, 100,  // ug_MN -> Cyrl
+    3498, 550,  // uga -> Ugar
+    3502, 100,  // ugh -> Cyrl
+    3506, 530,  // ugo -> Thai
+    3510, 100,  // uk -> Cyrl
+    3513, 385,  // uki -> Orya
+    3517, 100,  // ulc -> Cyrl
+    3521, 45,  // unr -> Beng
+    3525, 105,  // unr_NP -> Deva
+    3532, 45,  // unx -> Beng
+    3536, 10,  // ur -> Arab
+    3539, 530,  // urk -> Thai
+    3543, 10,  // ush -> Arab
+    3547, 150,  // uum -> Grek
+    3551, 10,  // uz_AF -> Arab
+    3557, 100,  // uz_CN -> Cyrl
+    3563, 10,  // uzs -> Arab
+    3567, 500,  // vaa -> Taml
+    3571, 10,  // vaf -> Arab
+    3575, 105,  // vah -> Deva
+    3579, 555,  // vai -> Vaii
+    3583, 105,  // vas -> Deva
+    3587, 105,  // vav -> Deva
+    3591, 105,  // vay -> Deva
+    3595, 10,  // vgr -> Arab
+    3599, 245,  // vmd -> Knda
+    3603, 10,  // vmh -> Arab
+    3607, 120,  // wal -> Ethi
+    3611, 10,  // wbk -> Arab
+    3615, 515,  // wbq -> Telu
+    3619, 105,  // wbr -> Deva
+    3623, 10,  // wlo -> Arab
+    3627, 105,  // wme -> Deva
+    3631, 10,  // wne -> Arab
+    3635, 10,  // wni -> Arab
+    3639, 130,  // wsg -> Gong
+    3643, 10,  // wsv -> Arab
+    3647, 105,  // wtm -> Deva
+    3651, 175,  // wuu -> Hans
+    3655, 100,  // xal -> Cyrl
+    3659, 120,  // xan -> Ethi
+    3663, 100,  // xas -> Cyrl
+    3667, 85,  // xco -> Chrs
+    3671, 70,  // xcr -> Cari
+    3675, 100,  // xdq -> Cyrl
+    3679, 10,  // xhe -> Arab
+    3683, 235,  // xhm -> Khmr
+    3687, 385,  // xis -> Orya
+    3691, 10,  // xka -> Arab
+    3695, 10,  // xkc -> Arab
+    3699, 10,  // xkj -> Arab
+    3703, 10,  // xkp -> Arab
+    3707, 285,  // xlc -> Lyci
+    3711, 290,  // xld -> Lydi
+    3715, 115,  // xly -> Elym
+    3719, 125,  // xmf -> Geor
+    3723, 300,  // xmn -> Mani
+    3727, 315,  // xmr -> Merc
+    3731, 350,  // xna -> Narb
+    3735, 105,  // xnr -> Deva
+    3739, 150,  // xpg -> Grek
+    3743, 370,  // xpi -> Ogam
+    3747, 100,  // xpm -> Cyrl
+    3751, 415,  // xpr -> Prti
+    3755, 100,  // xrm -> Cyrl
+    3759, 100,  // xrn -> Cyrl
+    3763, 440,  // xsa -> Sarb
+    3767, 105,  // xsr -> Deva
+    3771, 500,  // xub -> Taml
+    3775, 500,  // xuj -> Taml
+    3779, 200,  // xve -> Ital
+    3783, 10,  // xvi -> Arab
+    3787, 100,  // xwo -> Cyrl
+    3791, 305,  // xzh -> Marc
+    3795, 100,  // yai -> Cyrl
+    3799, 105,  // ybh -> Deva
+    3803, 105,  // ybi -> Deva
+    3807, 10,  // ydg -> Arab
+    3811, 320,  // yea -> Mlym
+    3815, 150,  // yej -> Grek
+    3819, 515,  // yeu -> Telu
+    3823, 410,  // ygp -> Plrd
+    3827, 185,  // yhd -> Hebr
+    3831, 185,  // yi -> Hebr
+    3834, 575,  // yig -> Yiii
+    3838, 185,  // yih -> Hebr
+    3842, 575,  // yiv -> Yiii
+    3846, 100,  // ykg -> Cyrl
+    3850, 410,  // yna -> Plrd
+    3854, 100,  // ynk -> Cyrl
+    3858, 210,  // yoi -> Jpan
+    3862, 530,  // yoy -> Thai
+    3866, 100,  // yrk -> Cyrl
+    3870, 575,  // ysd -> Yiii
+    3874, 575,  // ysn -> Yiii
+    3878, 575,  // ysp -> Yiii
+    3882, 100,  // ysr -> Cyrl
+    3886, 410,  // ysy -> Plrd
+    3890, 185,  // yud -> Hebr
+    3894, 180,  // yue -> Hant
+    3898, 175,  // yue_CN -> Hans
+    3905, 100,  // yug -> Cyrl
+    3909, 100,  // yux -> Cyrl
+    3913, 410,  // ywq -> Plrd
+    3917, 410,  // ywu -> Plrd
+    3921, 535,  // zau -> Tibt
+    3925, 10,  // zba -> Arab
+    3929, 170,  // zch -> Hani
+    3933, 10,  // zdj -> Arab
+    3937, 170,  // zeh -> Hani
+    3941, 520,  // zen -> Tfng
+    3945, 170,  // zgb -> Hani
+    3949, 520,  // zgh -> Tfng
+    3953, 170,  // zgm -> Hani
+    3957, 170,  // zgn -> Hani
+    3961, 175,  // zh -> Hans
+    3964, 180,  // zh_AU -> Hant
+    3970, 180,  // zh_BN -> Hant
+    3976, 180,  // zh_GB -> Hant
+    3982, 180,  // zh_GF -> Hant
+    3988, 180,  // zh_HK -> Hant
+    3994, 180,  // zh_ID -> Hant
+    4000, 180,  // zh_MO -> Hant
+    4006, 180,  // zh_PA -> Hant
+    4012, 180,  // zh_PF -> Hant
+    4018, 180,  // zh_PH -> Hant
+    4024, 180,  // zh_SR -> Hant
+    4030, 180,  // zh_TH -> Hant
+    4036, 180,  // zh_TW -> Hant
+    4042, 180,  // zh_US -> Hant
+    4048, 180,  // zh_VN -> Hant
+    4054, 170,  // zhd -> Hani
+    4058, 365,  // zhx -> Nshu
+    4062, 100,  // zko -> Cyrl
+    4066, 240,  // zkt -> Kits
+    4070, 100,  // zkz -> Cyrl
+    4074, 170,  // zlj -> Hani
+    4078, 170,  // zln -> Hani
+    4082, 170,  // zlq -> Hani
+    4086, 170,  // zqe -> Hani
+    4090, 185,  // zrp -> Hebr
+    4094, 10,  // zum -> Arab
+    4098, 170,  // zyg -> Hani
+    4102, 170,  // zyn -> Hani
+    4106, 170,  // zzj -> Hani
 };
 
 //======================================================================

+ 31 - 23
thirdparty/icu4c/common/localematcher.cpp

@@ -4,6 +4,8 @@
 // localematcher.cpp
 // created: 2019may08 Markus W. Scherer
 
+#include <optional>
+
 #include "unicode/utypes.h"
 #include "unicode/localebuilder.h"
 #include "unicode/localematcher.h"
@@ -302,7 +304,7 @@ LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const {
 
 namespace {
 
-LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale,
+LSR getMaximalLsrOrUnd(const LikelySubtags &likelySubtags, const Locale &locale,
                        UErrorCode &errorCode) {
     if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) {
         return UND_LSR;
@@ -338,7 +340,7 @@ int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength
 }
 
 LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) :
-        likelySubtags(*XLikelySubtags::getSingleton(errorCode)),
+        likelySubtags(*LikelySubtags::getSingleton(errorCode)),
         localeDistance(*LocaleDistance::getSingleton(errorCode)),
         thresholdDistance(builder.thresholdDistance_),
         demotionPerDesiredLocale(0),
@@ -551,7 +553,7 @@ LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) noexcept {
 
 class LocaleLsrIterator {
 public:
-    LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales,
+    LocaleLsrIterator(const LikelySubtags &likelySubtags, Locale::Iterator &locales,
                       ULocMatchLifetime lifetime) :
             likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {}
 
@@ -596,7 +598,7 @@ public:
     }
 
 private:
-    const XLikelySubtags &likelySubtags;
+    const LikelySubtags &likelySubtags;
     Locale::Iterator &locales;
     ULocMatchLifetime lifetime;
     const Locale *current = nullptr, *remembered = nullptr;
@@ -605,10 +607,11 @@ private:
 
 const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const {
     if (U_FAILURE(errorCode)) { return nullptr; }
-    int32_t suppIndex = getBestSuppIndex(
+    std::optional<int32_t> suppIndex = getBestSuppIndex(
         getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
         nullptr, errorCode);
-    return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+    return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
+                                                         : defaultLocale;
 }
 
 const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
@@ -618,12 +621,14 @@ const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales,
         return defaultLocale;
     }
     LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
-    int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
-    return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale;
+    std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    return U_SUCCESS(errorCode) && suppIndex.has_value() ? supportedLocales[*suppIndex]
+                                                         : defaultLocale;
 }
 
 const Locale *LocaleMatcher::getBestMatchForListString(
         StringPiece desiredLocaleList, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return nullptr; }
     LocalePriorityList list(desiredLocaleList, errorCode);
     LocalePriorityList::Iterator iter = list.iterator();
     return getBestMatch(iter, errorCode);
@@ -634,13 +639,13 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
     if (U_FAILURE(errorCode)) {
         return Result(nullptr, defaultLocale, -1, -1, false);
     }
-    int32_t suppIndex = getBestSuppIndex(
+    std::optional<int32_t> suppIndex = getBestSuppIndex(
         getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode),
         nullptr, errorCode);
-    if (U_FAILURE(errorCode) || suppIndex < 0) {
+    if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
         return Result(nullptr, defaultLocale, -1, -1, false);
     } else {
-        return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, false);
+        return Result(&desiredLocale, supportedLocales[*suppIndex], 0, *suppIndex, false);
     }
 }
 
@@ -650,18 +655,19 @@ LocaleMatcher::Result LocaleMatcher::getBestMatchResult(
         return Result(nullptr, defaultLocale, -1, -1, false);
     }
     LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES);
-    int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
-    if (U_FAILURE(errorCode) || suppIndex < 0) {
+    std::optional<int32_t> suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode);
+    if (U_FAILURE(errorCode) || !suppIndex.has_value()) {
         return Result(nullptr, defaultLocale, -1, -1, false);
     } else {
-        return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex],
-                      lsrIter.getBestDesiredIndex(), suppIndex, true);
+        return Result(lsrIter.orphanRemembered(), supportedLocales[*suppIndex],
+                      lsrIter.getBestDesiredIndex(), *suppIndex, true);
     }
 }
 
-int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter,
-                                        UErrorCode &errorCode) const {
-    if (U_FAILURE(errorCode)) { return -1; }
+std::optional<int32_t> LocaleMatcher::getBestSuppIndex(LSR desiredLSR,
+                                                       LocaleLsrIterator *remainingIter,
+                                                       UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return std::nullopt; }
     int32_t desiredIndex = 0;
     int32_t bestSupportedLsrIndex = -1;
     for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) {
@@ -684,7 +690,7 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
             bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance);
             if (remainingIter != nullptr) {
                 remainingIter->rememberCurrent(desiredIndex, errorCode);
-                if (U_FAILURE(errorCode)) { return -1; }
+                if (U_FAILURE(errorCode)) { return std::nullopt; }
             }
             bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance);
         }
@@ -695,20 +701,21 @@ int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remai
             break;
         }
         desiredLSR = remainingIter->next(errorCode);
-        if (U_FAILURE(errorCode)) { return -1; }
+        if (U_FAILURE(errorCode)) { return std::nullopt; }
         ++desiredIndex;
     }
     if (bestSupportedLsrIndex < 0) {
         // no good match
-        return -1;
+        return std::nullopt;
     }
     return supportedIndexes[bestSupportedLsrIndex];
 }
 
 UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
                              UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return false; }
     LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
-    if (U_FAILURE(errorCode)) { return 0; }
+    if (U_FAILURE(errorCode)) { return false; }
     const LSR *pSuppLSR = &suppLSR;
     int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
             getMaximalLsrOrUnd(likelySubtags, desired, errorCode),
@@ -718,9 +725,10 @@ UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported,
 }
 
 double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return 0.; }
     // Returns the inverse of the distance: That is, 1-distance(desired, supported).
     LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode);
-    if (U_FAILURE(errorCode)) { return 0; }
+    if (U_FAILURE(errorCode)) { return 0.; }
     const LSR *pSuppLSR = &suppLSR;
     int32_t indexAndDistance = localeDistance.getBestIndexAndDistance(
             getMaximalLsrOrUnd(likelySubtags, desired, errorCode),

+ 3 - 3
thirdparty/icu4c/common/localeprioritylist.cpp

@@ -21,13 +21,13 @@ U_NAMESPACE_BEGIN
 namespace {
 
 int32_t hashLocale(const UHashTok token) {
-    auto *locale = static_cast<const Locale *>(token.pointer);
+    const auto* locale = static_cast<const Locale*>(token.pointer);
     return locale->hashCode();
 }
 
 UBool compareLocales(const UHashTok t1, const UHashTok t2) {
-    auto *l1 = static_cast<const Locale *>(t1.pointer);
-    auto *l2 = static_cast<const Locale *>(t2.pointer);
+    const auto* l1 = static_cast<const Locale*>(t1.pointer);
+    const auto* l2 = static_cast<const Locale*>(t2.pointer);
     return *l1 == *l2;
 }
 

+ 13 - 18
thirdparty/icu4c/common/locavailable.cpp

@@ -39,14 +39,10 @@ static icu::Locale*  availableLocaleList = nullptr;
 static int32_t  availableLocaleListCount;
 static icu::UInitOnce gInitOnceLocale {};
 
-U_NAMESPACE_END
-
-U_CDECL_BEGIN
+namespace {
 
-static UBool U_CALLCONV locale_available_cleanup()
+UBool U_CALLCONV locale_available_cleanup()
 {
-    U_NAMESPACE_USE
-
     if (availableLocaleList) {
         delete []availableLocaleList;
         availableLocaleList = nullptr;
@@ -57,9 +53,7 @@ static UBool U_CALLCONV locale_available_cleanup()
     return true;
 }
 
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
+}  // namespace
 
 void U_CALLCONV locale_available_init() {
     // This function is a friend of class Locale.
@@ -107,10 +101,9 @@ icu::UInitOnce ginstalledLocalesInitOnce {};
 class AvailableLocalesSink : public ResourceSink {
   public:
     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
+        if (U_FAILURE(status)) { return; }
         ResourceTable resIndexTable = value.getTable(status);
-        if (U_FAILURE(status)) {
-            return;
-        }
+        if (U_FAILURE(status)) { return; }
         for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) {
             ULocAvailableType type;
             if (uprv_strcmp(key, "InstalledLocales") == 0) {
@@ -144,7 +137,8 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
     AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) {
     }
 
-    const char* next(int32_t *resultLength, UErrorCode&) override {
+    const char* next(int32_t *resultLength, UErrorCode &status) override {
+        if (U_FAILURE(status)) { return nullptr; }
         ULocAvailableType actualType = fType;
         int32_t actualIndex = fIndex++;
 
@@ -176,11 +170,13 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
         return result;
     }
 
-    void reset(UErrorCode&) override {
+    void reset(UErrorCode &status) override {
+        if (U_FAILURE(status)) { return; }
         fIndex = 0;
     }
 
-    int32_t count(UErrorCode&) const override {
+    int32_t count(UErrorCode &status) const override {
+        if (U_FAILURE(status)) { return 0; }
         if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) {
             return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]
                 + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES];
@@ -196,7 +192,7 @@ class AvailableLocalesStringEnumeration : public StringEnumeration {
 
 /* ### Get available **************************************************/
 
-static UBool U_CALLCONV uloc_cleanup() {
+UBool U_CALLCONV uloc_cleanup() {
     for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) {
         uprv_free(gAvailableLocaleNames[i]);
         gAvailableLocaleNames[i] = nullptr;
@@ -209,7 +205,7 @@ static UBool U_CALLCONV uloc_cleanup() {
 // Load Installed Locales. This function will be called exactly once
 //   via the initOnce mechanism.
 
-static void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
+void U_CALLCONV loadInstalledLocales(UErrorCode& status) {
     ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup);
 
     icu::LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "res_index", &status));
@@ -267,4 +263,3 @@ uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) {
     }
     return uenum_openFromStringEnumeration(result.orphan(), status);
 }
-

+ 3 - 3
thirdparty/icu4c/common/locbased.cpp

@@ -17,7 +17,7 @@ U_NAMESPACE_BEGIN
 
 Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
     const char* id = getLocaleID(type, status);
-    return Locale((id != 0) ? id : "");
+    return Locale(id != nullptr ? id : "");
 }
 
 const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const {
@@ -37,11 +37,11 @@ const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status
 }
 
 void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) {
-    if (validID != 0) {
+    if (validID != nullptr) {
       uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY);
       valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
     }
-    if (actualID != 0) {
+    if (actualID != nullptr) {
       uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY);
       actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate
     }

+ 67 - 72
thirdparty/icu4c/common/locdispnames.cpp

@@ -26,7 +26,6 @@
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
 #include "unicode/ustring.h"
-#include "bytesinkutil.h"
 #include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
@@ -60,7 +59,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
     int32_t length;
 
     buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
         result.truncate(0);
         return result;
     }
@@ -72,7 +71,7 @@ Locale::getDisplayLanguage(const Locale &displayLocale,
 
     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
         buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
             result.truncate(0);
             return result;
         }
@@ -100,7 +99,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
     int32_t length;
 
     buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
         result.truncate(0);
         return result;
     }
@@ -112,7 +111,7 @@ Locale::getDisplayScript(const Locale &displayLocale,
 
     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
         buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
             result.truncate(0);
             return result;
         }
@@ -140,7 +139,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
     int32_t length;
 
     buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
         result.truncate(0);
         return result;
     }
@@ -152,7 +151,7 @@ Locale::getDisplayCountry(const Locale &displayLocale,
 
     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
         buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
             result.truncate(0);
             return result;
         }
@@ -180,7 +179,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
     int32_t length;
 
     buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
         result.truncate(0);
         return result;
     }
@@ -192,7 +191,7 @@ Locale::getDisplayVariant(const Locale &displayLocale,
 
     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
         buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
             result.truncate(0);
             return result;
         }
@@ -220,7 +219,7 @@ Locale::getDisplayName(const Locale &displayLocale,
     int32_t length;
 
     buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY);
-    if(buffer==0) {
+    if (buffer == nullptr) {
         result.truncate(0);
         return result;
     }
@@ -232,7 +231,7 @@ Locale::getDisplayName(const Locale &displayLocale,
 
     if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
         buffer=result.getBuffer(length);
-        if(buffer==0) {
+        if (buffer == nullptr) {
             result.truncate(0);
             return result;
         }
@@ -276,50 +275,53 @@ U_NAMESPACE_END
 
 U_NAMESPACE_USE
 
+namespace {
+
 /* ### Constants **************************************************/
 
 /* These strings describe the resources we attempt to load from
  the locale ResourceBundle data file.*/
-static const char _kLanguages[]       = "Languages";
-static const char _kScripts[]         = "Scripts";
-static const char _kScriptsStandAlone[] = "Scripts%stand-alone";
-static const char _kCountries[]       = "Countries";
-static const char _kVariants[]        = "Variants";
-static const char _kKeys[]            = "Keys";
-static const char _kTypes[]           = "Types";
-//static const char _kRootName[]        = "root";
-static const char _kCurrency[]        = "currency";
-static const char _kCurrencies[]      = "Currencies";
-static const char _kLocaleDisplayPattern[] = "localeDisplayPattern";
-static const char _kPattern[]         = "pattern";
-static const char _kSeparator[]       = "separator";
+constexpr char _kLanguages[]       = "Languages";
+constexpr char _kScripts[]         = "Scripts";
+constexpr char _kScriptsStandAlone[] = "Scripts%stand-alone";
+constexpr char _kCountries[]       = "Countries";
+constexpr char _kVariants[]        = "Variants";
+constexpr char _kKeys[]            = "Keys";
+constexpr char _kTypes[]           = "Types";
+//constexpr char _kRootName[]        = "root";
+constexpr char _kCurrency[]        = "currency";
+constexpr char _kCurrencies[]      = "Currencies";
+constexpr char _kLocaleDisplayPattern[] = "localeDisplayPattern";
+constexpr char _kPattern[]         = "pattern";
+constexpr char _kSeparator[]       = "separator";
 
 /* ### Display name **************************************************/
 
-static int32_t
+int32_t
 _getStringOrCopyKey(const char *path, const char *locale,
                     const char *tableKey, 
                     const char* subTableKey,
                     const char *itemKey,
                     const char *substitute,
                     char16_t *dest, int32_t destCapacity,
-                    UErrorCode *pErrorCode) {
+                    UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return 0; }
     const char16_t *s = nullptr;
     int32_t length = 0;
 
     if(itemKey==nullptr) {
         /* top-level item: normal resource bundle access */
-        icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode));
+        icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode));
 
-        if(U_SUCCESS(*pErrorCode)) {
-            s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode);
+        if(U_SUCCESS(errorCode)) {
+            s=ures_getStringByKey(rb.getAlias(), tableKey, &length, &errorCode);
             /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */
         }
     } else {
         bool isLanguageCode = (uprv_strncmp(tableKey, _kLanguages, 9) == 0);
         /* Language code should not be a number. If it is, set the error code. */
         if (isLanguageCode && uprv_strtol(itemKey, nullptr, 10)) {
-            *pErrorCode = U_MISSING_RESOURCE_ERROR;
+            errorCode = U_MISSING_RESOURCE_ERROR;
         } else {
             /* second-level item, use special fallback */
             s=uloc_getTableStringWithFallback(path, locale,
@@ -327,22 +329,22 @@ _getStringOrCopyKey(const char *path, const char *locale,
                                                subTableKey,
                                                itemKey,
                                                &length,
-                                               pErrorCode);
-            if (U_FAILURE(*pErrorCode) && isLanguageCode && itemKey != nullptr) {
+                                               &errorCode);
+            if (U_FAILURE(errorCode) && isLanguageCode && itemKey != nullptr) {
                 // convert itemKey locale code to canonical form and try again, ICU-20870
-                *pErrorCode = U_ZERO_ERROR;
+                errorCode = U_ZERO_ERROR;
                 Locale canonKey = Locale::createCanonical(itemKey);
                 s=uloc_getTableStringWithFallback(path, locale,
                                                     tableKey,
                                                     subTableKey,
                                                     canonKey.getName(),
                                                     &length,
-                                                    pErrorCode);
+                                                    &errorCode);
             }
         }
     }
 
-    if(U_SUCCESS(*pErrorCode)) {
+    if(U_SUCCESS(errorCode)) {
         int32_t copyLength=uprv_min(length, destCapacity);
         if(copyLength>0 && s != nullptr) {
             u_memcpy(dest, s, copyLength);
@@ -351,67 +353,63 @@ _getStringOrCopyKey(const char *path, const char *locale,
         /* no string from a resource bundle: convert the substitute */
         length=(int32_t)uprv_strlen(substitute);
         u_charsToUChars(substitute, dest, uprv_min(length, destCapacity));
-        *pErrorCode=U_USING_DEFAULT_WARNING;
+        errorCode = U_USING_DEFAULT_WARNING;
     }
 
-    return u_terminateUChars(dest, destCapacity, length, pErrorCode);
+    return u_terminateUChars(dest, destCapacity, length, &errorCode);
 }
 
-typedef  int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *);
+using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&);
 
-static int32_t
+int32_t
 _getDisplayNameForComponent(const char *locale,
                             const char *displayLocale,
                             char16_t *dest, int32_t destCapacity,
                             UDisplayNameGetter *getter,
                             const char *tag,
-                            UErrorCode *pErrorCode) {
-    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];
-    int32_t length;
+                            UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) { return 0; }
     UErrorCode localStatus;
     const char* root = nullptr;
 
-    /* argument checking */
-    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
-        return 0;
-    }
-
     if(destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
 
     localStatus = U_ZERO_ERROR;
-    length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus);
-    if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    icu::CharString localeBuffer = (*getter)(locale, localStatus);
+    if (U_FAILURE(localStatus)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
-    if(length==0) {
+    if (localeBuffer.isEmpty()) {
         // For the display name, we treat this as unknown language (ICU-20273).
-        if (getter == uloc_getLanguage) {
-            uprv_strcpy(localeBuffer, "und");
+        if (getter == ulocimp_getLanguage) {
+            localeBuffer.append("und", errorCode);
         } else {
-            return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
+            return u_terminateUChars(dest, destCapacity, 0, &errorCode);
         }
     }
 
     root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG;
 
     return _getStringOrCopyKey(root, displayLocale,
-                               tag, nullptr, localeBuffer,
-                               localeBuffer,
+                               tag, nullptr, localeBuffer.data(),
+                               localeBuffer.data(),
                                dest, destCapacity,
-                               pErrorCode);
+                               errorCode);
 }
 
+}  // namespace
+
 U_CAPI int32_t U_EXPORT2
 uloc_getDisplayLanguage(const char *locale,
                         const char *displayLocale,
                         char16_t *dest, int32_t destCapacity,
                         UErrorCode *pErrorCode) {
     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getLanguage, _kLanguages, pErrorCode);
+                ulocimp_getLanguage, _kLanguages, *pErrorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -420,19 +418,20 @@ uloc_getDisplayScript(const char* locale,
                       char16_t *dest, int32_t destCapacity,
                       UErrorCode *pErrorCode)
 {
+    if (U_FAILURE(*pErrorCode)) { return 0; }
     UErrorCode err = U_ZERO_ERROR;
     int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getScript, _kScriptsStandAlone, &err);
+                ulocimp_getScript, _kScriptsStandAlone, err);
 
     if (destCapacity == 0 && err == U_BUFFER_OVERFLOW_ERROR) {
         // For preflight, return the max of the value and the fallback.
         int32_t fallback_res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                                                           uloc_getScript, _kScripts, pErrorCode);
+                                                           ulocimp_getScript, _kScripts, *pErrorCode);
         return (fallback_res > res) ? fallback_res : res;
     }
     if ( err == U_USING_DEFAULT_WARNING ) {
         return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                                           uloc_getScript, _kScripts, pErrorCode);
+                                           ulocimp_getScript, _kScripts, *pErrorCode);
     } else {
         *pErrorCode = err;
         return res;
@@ -446,7 +445,7 @@ uloc_getDisplayScriptInContext(const char* locale,
                       UErrorCode *pErrorCode)
 {
     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                    uloc_getScript, _kScripts, pErrorCode);
+                    ulocimp_getScript, _kScripts, *pErrorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -455,7 +454,7 @@ uloc_getDisplayCountry(const char *locale,
                        char16_t *dest, int32_t destCapacity,
                        UErrorCode *pErrorCode) {
     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getCountry, _kCountries, pErrorCode);
+                ulocimp_getRegion, _kCountries, *pErrorCode);
 }
 
 /*
@@ -469,7 +468,7 @@ uloc_getDisplayVariant(const char *locale,
                        char16_t *dest, int32_t destCapacity,
                        UErrorCode *pErrorCode) {
     return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity,
-                uloc_getVariant, _kVariants, pErrorCode);
+                ulocimp_getVariant, _kVariants, *pErrorCode);
 }
 
 /* Instead of having a separate pass for 'special' patterns, reintegrate the two
@@ -809,7 +808,7 @@ uloc_getDisplayKeyword(const char* keyword,
                                keyword, 
                                keyword,      
                                dest, destCapacity,
-                               status);
+                               *status);
 
 }
 
@@ -836,11 +835,7 @@ uloc_getDisplayKeywordValue(   const char* locale,
     }
 
     /* get the keyword value */
-    CharString keywordValue;
-    {
-        CharStringByteSink sink(&keywordValue);
-        ulocimp_getKeywordValue(locale, keyword, sink, status);
-    }
+    CharString keywordValue = ulocimp_getKeywordValue(locale, keyword, *status);
 
     /* 
      * if the keyword is equal to currency .. then to get the display name 
@@ -897,6 +892,6 @@ uloc_getDisplayKeywordValue(   const char* locale,
                                    keywordValue.data(),
                                    keywordValue.data(),
                                    dest, destCapacity,
-                                   status);
+                                   *status);
     }
 }

+ 4 - 4
thirdparty/icu4c/common/locdistance.cpp

@@ -59,7 +59,7 @@ UBool U_CALLCONV cleanup() {
 void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) {
     // This function is invoked only via umtx_initOnce().
     U_ASSERT(gLocaleDistance == nullptr);
-    const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode);
+    const LikelySubtags &likely = *LikelySubtags::getSingleton(errorCode);
     if (U_FAILURE(errorCode)) { return; }
     const LocaleDistanceData &data = likely.getDistanceData();
     if (data.distanceTrieBytes == nullptr ||
@@ -83,7 +83,7 @@ const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) {
     return gLocaleDistance;
 }
 
-LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) :
+LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely) :
         likelySubtags(likely),
         trie(data.distanceTrieBytes),
         regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions),
@@ -119,7 +119,7 @@ int32_t LocaleDistance::getBestIndexAndDistance(
     uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0;
     // Index of the supported LSR with the lowest distance.
     int32_t bestIndex = -1;
-    // Cached lookup info from XLikelySubtags.compareLikely().
+    // Cached lookup info from LikelySubtags.compareLikely().
     int32_t bestLikelyInfo = -1;
     for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) {
         const LSR &supported = *supportedLSRs[slIndex];
@@ -399,7 +399,7 @@ int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue)
     }
 }
 
-UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
+bool LocaleDistance::isParadigmLSR(const LSR &lsr) const {
     // Linear search for a very short list (length 6 as of 2019),
     // because we look for equivalence not equality, and
     // because it's easy.

+ 5 - 5
thirdparty/icu4c/common/locdistance.h

@@ -62,7 +62,7 @@ public:
                                     ULocMatchFavorSubtag favorSubtag,
                                     ULocMatchDirection direction) const;
 
-    UBool isParadigmLSR(const LSR &lsr) const;
+    bool isParadigmLSR(const LSR &lsr) const;
 
     int32_t getDefaultScriptDistance() const {
         return defaultScriptDistance;
@@ -83,14 +83,14 @@ private:
     // tic constexpr int32_t MAX_INDEX = 0x1fffff;  // avoids sign bit
     static constexpr int32_t INDEX_NEG_1 = 0xfffffc00;
 
-    LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely);
+    LocaleDistance(const LocaleDistanceData &data, const LikelySubtags &likely);
     LocaleDistance(const LocaleDistance &other) = delete;
     LocaleDistance &operator=(const LocaleDistance &other) = delete;
 
     static void initLocaleDistance(UErrorCode &errorCode);
 
-    UBool isMatch(const LSR &desired, const LSR &supported,
-                  int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
+    bool isMatch(const LSR &desired, const LSR &supported,
+                 int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const {
         const LSR *pSupp = &supported;
         return getBestIndexAndDistance(
             desired, &pSupp, 1,
@@ -119,7 +119,7 @@ private:
         return defaultRegionDistance;
     }
 
-    const XLikelySubtags &likelySubtags;
+    const LikelySubtags &likelySubtags;
 
     // The trie maps each dlang+slang+dscript+sscript+dregion+sregion
     // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance.

+ 64 - 92
thirdparty/icu4c/common/locdspnm.cpp

@@ -18,45 +18,17 @@
 #include "unicode/udisplaycontext.h"
 #include "unicode/brkiter.h"
 #include "unicode/ucurr.h"
+#include "bytesinkutil.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "mutex.h"
+#include "uassert.h"
 #include "ulocimp.h"
 #include "umutex.h"
 #include "ureslocs.h"
 #include "uresimp.h"
 
-#include <stdarg.h>
-
-/**
- * Concatenate a number of null-terminated strings to buffer, leaving a
- * null-terminated string.  The last argument should be the null pointer.
- * Return the length of the string in the buffer, not counting the trailing
- * null.  Return -1 if there is an error (buffer is null, or buflen < 1).
- */
-static int32_t ncat(char *buffer, uint32_t buflen, ...) {
-  va_list args;
-  char *str;
-  char *p = buffer;
-  const char* e = buffer + buflen - 1;
-
-  if (buffer == nullptr || buflen < 1) {
-    return -1;
-  }
-
-  va_start(args, buflen);
-  while ((str = va_arg(args, char *)) != 0) {
-    char c;
-    while (p != e && (c = *str++) != 0) {
-      *p++ = c;
-    }
-  }
-  *p = 0;
-  va_end(args);
-
-  return static_cast<int32_t>(p - buffer);
-}
-
 U_NAMESPACE_BEGIN
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -64,12 +36,13 @@ U_NAMESPACE_BEGIN
 // Access resource data for locale components.
 // Wrap code in uloc.c for now.
 class ICUDataTable {
-    const char* path;
+    const char* const path;
     Locale locale;
 
 public:
+    // Note: path should be a pointer to a statically allocated string.
     ICUDataTable(const char* path, const Locale& locale);
-    ~ICUDataTable();
+    ~ICUDataTable() = default;
 
     const Locale& getLocale();
 
@@ -95,23 +68,9 @@ ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeSt
 }
 
 ICUDataTable::ICUDataTable(const char* path, const Locale& locale)
-    : path(nullptr), locale(Locale::getRoot())
+    : path(path), locale(locale)
 {
-  if (path) {
-    int32_t len = static_cast<int32_t>(uprv_strlen(path));
-    this->path = (const char*) uprv_malloc(len + 1);
-    if (this->path) {
-      uprv_strcpy((char *)this->path, path);
-      this->locale = locale;
-    }
-  }
-}
-
-ICUDataTable::~ICUDataTable() {
-  if (path) {
-    uprv_free((void*) path);
-    path = nullptr;
-  }
+    U_ASSERT(path != nullptr);
 }
 
 const Locale&
@@ -305,7 +264,7 @@ class LocaleDisplayNamesImpl : public LocaleDisplayNames {
     };
     // Capitalization transforms. For each usage type, indicates whether to titlecase for
     // the context specified in capitalizationContext (which we know at construction time)
-     UBool fCapitalization[kCapContextUsageCount];
+     bool fCapitalization[kCapContextUsageCount];
 
 public:
     // constructor
@@ -341,12 +300,12 @@ private:
                                 UnicodeString& result, bool substitute) const;
     UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const;
     UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const;
-    UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const;
-    UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const;
+    UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& regionDisplayName(const char* region, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, bool skipAdjust) const;
+    UnicodeString& keyDisplayName(const char* key, UnicodeString& result, bool skipAdjust) const;
     UnicodeString& keyValueDisplayName(const char* key, const char* value,
-                                        UnicodeString& result, UBool skipAdjust) const;
+                                        UnicodeString& result, bool skipAdjust) const;
     void initialize();
 
     struct CapitalizationContextSink;
@@ -399,7 +358,7 @@ LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale,
 }
 
 struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink {
-    UBool hasCapitalizationUsage;
+    bool hasCapitalizationUsage;
     LocaleDisplayNamesImpl& parent;
 
     CapitalizationContextSink(LocaleDisplayNamesImpl& _parent)
@@ -490,7 +449,7 @@ LocaleDisplayNamesImpl::initialize() {
 #if !UCONFIG_NO_BREAK_ITERATION
     // Only get the context data if we need it! This is a const object so we know now...
     // Also check whether we will need a break iterator (depends on the data)
-    UBool needBrkIter = false;
+    bool needBrkIter = false;
     if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) {
         LocalUResourceBundlePointer resource(ures_open(nullptr, locale.getName(), &status));
         if (U_FAILURE(status)) { return; }
@@ -582,36 +541,51 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
   const char* country = loc.getCountry();
   const char* variant = loc.getVariant();
 
-  UBool hasScript = uprv_strlen(script) > 0;
-  UBool hasCountry = uprv_strlen(country) > 0;
-  UBool hasVariant = uprv_strlen(variant) > 0;
+  bool hasScript = uprv_strlen(script) > 0;
+  bool hasCountry = uprv_strlen(country) > 0;
+  bool hasVariant = uprv_strlen(variant) > 0;
 
   if (dialectHandling == ULDN_DIALECT_NAMES) {
-    char buffer[ULOC_FULLNAME_CAPACITY];
+    UErrorCode status = U_ZERO_ERROR;
+    CharString buffer;
     do { // loop construct is so we can break early out of search
       if (hasScript && hasCountry) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0);
-        localeIdName(buffer, resultName, false);
-        if (!resultName.isBogus()) {
-          hasScript = false;
-          hasCountry = false;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(script, status)
+              .append('_', status)
+              .append(country, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasScript = false;
+            hasCountry = false;
+            break;
+          }
         }
       }
       if (hasScript) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0);
-        localeIdName(buffer, resultName, false);
-        if (!resultName.isBogus()) {
-          hasScript = false;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(script, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasScript = false;
+            break;
+          }
         }
       }
       if (hasCountry) {
-        ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0);
-        localeIdName(buffer, resultName, false);
-        if (!resultName.isBogus()) {
-          hasCountry = false;
-          break;
+        buffer.append(lang, status)
+              .append('_', status)
+              .append(country, status);
+        if (U_SUCCESS(status)) {
+          localeIdName(buffer.data(), resultName, false);
+          if (!resultName.isBogus()) {
+            hasCountry = false;
+            break;
+          }
         }
       }
     } while (false);
@@ -658,21 +632,19 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc,
   LocalPointer<StringEnumeration> e(loc.createKeywords(status));
   if (e.isValid() && U_SUCCESS(status)) {
     UnicodeString temp2;
-    char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY
     const char* key;
-    while ((key = e->next((int32_t *)0, status)) != nullptr) {
-      value[0] = 0;
-      loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status);
-      if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
-        return result;
+    while ((key = e->next((int32_t*)nullptr, status)) != nullptr) {
+        auto value = loc.getKeywordValue<CharString>(key, status);
+        if (U_FAILURE(status)) {
+            return result;
       }
       keyDisplayName(key, temp, true);
       temp.findAndReplace(formatOpenParen, formatReplaceOpenParen);
       temp.findAndReplace(formatCloseParen, formatReplaceCloseParen);
-      keyValueDisplayName(key, value, temp2, true);
+      keyValueDisplayName(key, value.data(), temp2, true);
       temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen);
       temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen);
-      if (temp2 != UnicodeString(value, -1, US_INV)) {
+      if (temp2 != UnicodeString(value.data(), -1, US_INV)) {
         appendWithSep(resultRemainder, temp2);
       } else if (temp != UnicodeString(key, -1, US_INV)) {
         UnicodeString temp3;
@@ -779,7 +751,7 @@ LocaleDisplayNamesImpl::languageDisplayName(const char* lang,
 UnicodeString&
 LocaleDisplayNamesImpl::scriptDisplayName(const char* script,
                                           UnicodeString& result,
-                                          UBool skipAdjust) const {
+                                          bool skipAdjust) const {
     if (nameLength == UDISPCTX_LENGTH_SHORT) {
         langData.getNoFallback("Scripts%short", script, result);
         if (!result.isBogus()) {
@@ -809,7 +781,7 @@ LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode,
 UnicodeString&
 LocaleDisplayNamesImpl::regionDisplayName(const char* region,
                                           UnicodeString& result,
-                                          UBool skipAdjust) const {
+                                          bool skipAdjust) const {
     if (nameLength == UDISPCTX_LENGTH_SHORT) {
          regionData.getNoFallback("Countries%short", region, result);
         if (!result.isBogus()) {
@@ -834,7 +806,7 @@ LocaleDisplayNamesImpl::regionDisplayName(const char* region,
 UnicodeString&
 LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
                                            UnicodeString& result,
-                                           UBool skipAdjust) const {
+                                           bool skipAdjust) const {
     // don't have a resource for short variant names
     if (substitute == UDISPCTX_SUBSTITUTE) {
         langData.get("Variants", variant, result);
@@ -853,7 +825,7 @@ LocaleDisplayNamesImpl::variantDisplayName(const char* variant,
 UnicodeString&
 LocaleDisplayNamesImpl::keyDisplayName(const char* key,
                                        UnicodeString& result,
-                                       UBool skipAdjust) const {
+                                       bool skipAdjust) const {
     // don't have a resource for short key names
     if (substitute == UDISPCTX_SUBSTITUTE) {
         langData.get("Keys", key, result);
@@ -873,7 +845,7 @@ UnicodeString&
 LocaleDisplayNamesImpl::keyValueDisplayName(const char* key,
                                             const char* value,
                                             UnicodeString& result,
-                                            UBool skipAdjust) const {
+                                            bool skipAdjust) const {
     if (uprv_strcmp(key, "currency") == 0) {
         // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now.
         UErrorCode sts = U_ZERO_ERROR;
@@ -939,7 +911,7 @@ uldn_open(const char * locale,
           UDialectHandling dialectHandling,
           UErrorCode *pErrorCode) {
   if (U_FAILURE(*pErrorCode)) {
-    return 0;
+    return nullptr;
   }
   if (locale == nullptr) {
     locale = uloc_getDefault();
@@ -952,7 +924,7 @@ uldn_openForContext(const char * locale,
                     UDisplayContext *contexts, int32_t length,
                     UErrorCode *pErrorCode) {
   if (U_FAILURE(*pErrorCode)) {
-    return 0;
+    return nullptr;
   }
   if (locale == nullptr) {
     locale = uloc_getDefault();

+ 80 - 101
thirdparty/icu4c/common/locid.cpp

@@ -57,10 +57,6 @@
 #include "ustr_imp.h"
 #include "uvector.h"
 
-U_CDECL_BEGIN
-static UBool U_CALLCONV locale_cleanup();
-U_CDECL_END
-
 U_NAMESPACE_BEGIN
 
 static Locale   *gLocaleCache = nullptr;
@@ -106,16 +102,17 @@ typedef enum ELocalePos {
     eMAX_LOCALES
 } ELocalePos;
 
-U_CDECL_BEGIN
+namespace {
+
 //
 // Deleter function for Locales owned by the default Locale hash table/
 //
-static void U_CALLCONV
+void U_CALLCONV
 deleteLocale(void *obj) {
     delete (icu::Locale *) obj;
 }
 
-static UBool U_CALLCONV locale_cleanup()
+UBool U_CALLCONV locale_cleanup()
 {
     U_NAMESPACE_USE
 
@@ -131,8 +128,7 @@ static UBool U_CALLCONV locale_cleanup()
     return true;
 }
 
-
-static void U_CALLCONV locale_init(UErrorCode &status) {
+void U_CALLCONV locale_init(UErrorCode &status) {
     U_NAMESPACE_USE
 
     U_ASSERT(gLocaleCache == nullptr);
@@ -163,7 +159,7 @@ static void U_CALLCONV locale_init(UErrorCode &status) {
     gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
 }
 
-U_CDECL_END
+}  // namespace
 
 U_NAMESPACE_BEGIN
 
@@ -182,15 +178,8 @@ Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
         canonicalize = true; // always canonicalize host ID
     }
 
-    CharString localeNameBuf;
-    {
-        CharStringByteSink sink(&localeNameBuf);
-        if (canonicalize) {
-            ulocimp_canonicalize(id, sink, &status);
-        } else {
-            ulocimp_getName(id, sink, &status);
-        }
-    }
+    CharString localeNameBuf =
+        canonicalize ? ulocimp_canonicalize(id, status) : ulocimp_getName(id, status);
 
     if (U_FAILURE(status)) {
         return gDefaultLocale;
@@ -494,7 +483,7 @@ namespace {
 UInitOnce gKnownCanonicalizedInitOnce {};
 UHashtable *gKnownCanonicalized = nullptr;
 
-static const char* const KNOWN_CANONICALIZED[] = {
+constexpr const char* KNOWN_CANONICALIZED[] = {
     "c",
     // Commonly used locales known are already canonicalized
     "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
@@ -518,13 +507,13 @@ static const char* const KNOWN_CANONICALIZED[] = {
     "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
 };
 
-static UBool U_CALLCONV cleanupKnownCanonicalized() {
+UBool U_CALLCONV cleanupKnownCanonicalized() {
     gKnownCanonicalizedInitOnce.reset();
     if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
     return true;
 }
 
-static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
+void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
                                 cleanupKnownCanonicalized);
     LocalUHashtablePointer newKnownCanonicalizedMap(
@@ -920,6 +909,8 @@ AliasData::loadData(UErrorCode &status)
  */
 AliasData*
 AliasDataBuilder::build(UErrorCode &status) {
+    if (U_FAILURE(status)) { return nullptr; }
+
     LocalUResourceBundlePointer metadata(
         ures_openDirect(nullptr, "metadata", &status));
     LocalUResourceBundlePointer metadataAlias(
@@ -1065,7 +1056,7 @@ AliasDataBuilder::build(UErrorCode &status) {
  */
 class AliasReplacer {
 public:
-    AliasReplacer(UErrorCode status) :
+    AliasReplacer(UErrorCode& status) :
             language(nullptr), script(nullptr), region(nullptr),
             extensions(nullptr),
             // store value in variants only once
@@ -1130,12 +1121,12 @@ private:
     }
 
     // Gather fields and generate locale ID into out.
-    CharString& outputToString(CharString& out, UErrorCode status);
+    CharString& outputToString(CharString& out, UErrorCode& status);
 
     // Generate the lookup key.
     CharString& generateKey(const char* language, const char* region,
                             const char* variant, CharString& out,
-                            UErrorCode status);
+                            UErrorCode& status);
 
     void parseLanguageReplacement(const char* replacement,
                                   const char*& replaceLanguage,
@@ -1172,8 +1163,9 @@ private:
 CharString&
 AliasReplacer::generateKey(
         const char* language, const char* region, const char* variant,
-        CharString& out, UErrorCode status)
+        CharString& out, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return out; }
     out.append(language, status);
     if (notEmpty(region)) {
         out.append(SEP_CHAR, status)
@@ -1529,13 +1521,12 @@ AliasReplacer::replaceTransformedExtensions(
     const char* tkey = ultag_getTKeyStart(str);
     int32_t tlangLen = (tkey == str) ? 0 :
         ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
-    CharStringByteSink sink(&output);
     if (tlangLen > 0) {
         Locale tlang = LocaleBuilder()
             .setLanguageTag(StringPiece(str, tlangLen))
             .build(status);
         tlang.canonicalize(status);
-        tlang.toLanguageTag(sink, status);
+        output = tlang.toLanguageTag<CharString>(status);
         if (U_FAILURE(status)) {
             return false;
         }
@@ -1591,8 +1582,9 @@ AliasReplacer::replaceTransformedExtensions(
 
 CharString&
 AliasReplacer::outputToString(
-    CharString& out, UErrorCode status)
+    CharString& out, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return out; }
     out.append(language, status);
     if (notEmpty(script)) {
         out.append(SEP_CHAR, status)
@@ -1735,9 +1727,7 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
             while ((key = iter->next(nullptr, status)) != nullptr) {
                 if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
                         uprv_strcmp("t", key) == 0) {
-                    CharString value;
-                    CharStringByteSink valueSink(&value);
-                    locale.getKeywordValue(key, valueSink, status);
+                    auto value = locale.getKeywordValue<CharString>(key, status);
                     if (U_FAILURE(status)) {
                         status = U_ZERO_ERROR;
                         continue;
@@ -1782,6 +1772,7 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status
 bool
 canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return false; }
     AliasReplacer replacer(status);
     return replacer.replace(locale, out, status);
 }
@@ -1791,6 +1782,8 @@ canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
 bool
 isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return false; }
+
     if (    uprv_strcmp(locale, "c") == 0 ||
             uprv_strcmp(locale, "en") == 0 ||
             uprv_strcmp(locale, "en_US") == 0) {
@@ -1809,24 +1802,30 @@ isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
 
 }  // namespace
 
+U_NAMESPACE_END
+
 // Function for testing.
-U_CAPI const char* const*
-ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
+U_EXPORT const char* const*
+ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length)
 {
-    *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
+    U_NAMESPACE_USE
+    length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
     return KNOWN_CANONICALIZED;
 }
 
 // Function for testing.
-U_CAPI bool
+U_EXPORT bool
 ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
 {
+    U_NAMESPACE_USE
     Locale l(localeName);
     UErrorCode status = U_ZERO_ERROR;
     CharString temp;
     return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
 }
 
+U_NAMESPACE_BEGIN
+
 /*This function initializes a Locale from a C locale ID*/
 Locale& Locale::init(const char* localeID, UBool canonicalize)
 {
@@ -1846,7 +1845,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
     // without goto and without another function
     do {
         char *separator;
-        char *field[5] = {0};
+        char *field[5] = {nullptr};
         int32_t fieldLen[5] = {0};
         int32_t fieldIdx;
         int32_t variantField;
@@ -1871,7 +1870,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
             U_ASSERT(baseName == nullptr);
             /*Go to heap for the fullName if necessary*/
             fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
-            if(fullName == 0) {
+            if (fullName == nullptr) {
                 fullName = fullNameBuffer;
                 break; // error: out of memory
             }
@@ -1892,7 +1891,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize)
         separator = field[0] = fullName;
         fieldIdx = 1;
         char* at = uprv_strchr(fullName, '@');
-        while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
+        while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != nullptr &&
                fieldIdx < UPRV_LENGTHOF(field)-1 &&
                (at == nullptr || separator < at)) {
             field[fieldIdx] = separator + 1;
@@ -2074,11 +2073,7 @@ Locale::addLikelySubtags(UErrorCode& status) {
         return;
     }
 
-    CharString maximizedLocaleID;
-    {
-        CharStringByteSink sink(&maximizedLocaleID);
-        ulocimp_addLikelySubtags(fullName, sink, &status);
-    }
+    CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
 
     if (U_FAILURE(status)) {
         return;
@@ -2100,11 +2095,7 @@ Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
         return;
     }
 
-    CharString minimizedLocaleID;
-    {
-        CharStringByteSink sink(&minimizedLocaleID);
-        ulocimp_minimizeSubtags(fullName, sink, favorScript, &status);
-    }
+    CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
 
     if (U_FAILURE(status)) {
         return;
@@ -2155,17 +2146,12 @@ Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
     // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
     // and then Locale::init(), instead of just calling the normal constructor.
 
-    CharString localeID;
     int32_t parsedLength;
-    {
-        CharStringByteSink sink(&localeID);
-        ulocimp_forLanguageTag(
-                tag.data(),
-                tag.length(),
-                sink,
-                &parsedLength,
-                &status);
-    }
+    CharString localeID = ulocimp_forLanguageTag(
+            tag.data(),
+            tag.length(),
+            &parsedLength,
+            status);
 
     if (U_FAILURE(status)) {
         return result;
@@ -2195,7 +2181,7 @@ Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
         return;
     }
 
-    ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, &status);
+    ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status);
 }
 
 Locale U_EXPORT2
@@ -2420,33 +2406,23 @@ Locale::getLocaleCache()
 
 class KeywordEnumeration : public StringEnumeration {
 protected:
-    char *keywords;
+    CharString keywords;
 private:
-    char *current;
-    int32_t length;
-    UnicodeString currUSKey;
-    static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
+    const char *current;
+    static const char fgClassID;
 
 public:
     static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
     virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
 public:
     KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
-        : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
+        : keywords(), current(keywords.data()) {
         if(U_SUCCESS(status) && keywordLen != 0) {
             if(keys == nullptr || keywordLen < 0) {
                 status = U_ILLEGAL_ARGUMENT_ERROR;
             } else {
-                keywords = (char *)uprv_malloc(keywordLen+1);
-                if (keywords == nullptr) {
-                    status = U_MEMORY_ALLOCATION_ERROR;
-                }
-                else {
-                    uprv_memcpy(keywords, keys, keywordLen);
-                    keywords[keywordLen] = 0;
-                    current = keywords + currentIndex;
-                    length = keywordLen;
-                }
+                keywords.append(keys, keywordLen, status);
+                current = keywords.data() + currentIndex;
             }
         }
     }
@@ -2456,11 +2432,14 @@ public:
     virtual StringEnumeration * clone() const override
     {
         UErrorCode status = U_ZERO_ERROR;
-        return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
+        return new KeywordEnumeration(
+                keywords.data(), keywords.length(),
+                (int32_t)(current - keywords.data()), status);
     }
 
-    virtual int32_t count(UErrorCode &/*status*/) const override {
-        char *kw = keywords;
+    virtual int32_t count(UErrorCode& status) const override {
+        if (U_FAILURE(status)) { return 0; }
+        const char *kw = keywords.data();
         int32_t result = 0;
         while(*kw) {
             result++;
@@ -2489,21 +2468,22 @@ public:
     }
 
     virtual const UnicodeString* snext(UErrorCode& status) override {
+        if (U_FAILURE(status)) { return nullptr; }
         int32_t resultLength = 0;
         const char *s = next(&resultLength, status);
         return setChars(s, resultLength, status);
     }
 
-    virtual void reset(UErrorCode& /*status*/) override {
-        current = keywords;
+    virtual void reset(UErrorCode& status) override {
+        if (U_FAILURE(status)) { return; }
+        current = keywords.data();
     }
 };
 
 const char KeywordEnumeration::fgClassID = '\0';
 
-KeywordEnumeration::~KeywordEnumeration() {
-    uprv_free(keywords);
-}
+// Out-of-line virtual destructor to serve as the "key function".
+KeywordEnumeration::~KeywordEnumeration() = default;
 
 // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
 // the next() method for each keyword before returning it.
@@ -2528,8 +2508,9 @@ public:
         if (resultLength != nullptr) *resultLength = 0;
         return nullptr;
     }
-    virtual int32_t count(UErrorCode &/*status*/) const override {
-        char *kw = keywords;
+    virtual int32_t count(UErrorCode& status) const override {
+        if (U_FAILURE(status)) { return 0; }
+        const char *kw = keywords.data();
         int32_t result = 0;
         while(*kw) {
             if (uloc_toUnicodeLocaleKey(kw) != nullptr) {
@@ -2557,9 +2538,7 @@ Locale::createKeywords(UErrorCode &status) const
     const char* assignment = uprv_strchr(fullName, '=');
     if(variantStart) {
         if(assignment > variantStart) {
-            CharString keywords;
-            CharStringByteSink sink(&keywords);
-            ulocimp_getKeywords(variantStart+1, '@', sink, false, &status);
+            CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
             if (U_SUCCESS(status) && !keywords.isEmpty()) {
                 result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
                 if (!result) {
@@ -2586,9 +2565,7 @@ Locale::createUnicodeKeywords(UErrorCode &status) const
     const char* assignment = uprv_strchr(fullName, '=');
     if(variantStart) {
         if(assignment > variantStart) {
-            CharString keywords;
-            CharStringByteSink sink(&keywords);
-            ulocimp_getKeywords(variantStart+1, '@', sink, false, &status);
+            CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
             if (U_SUCCESS(status) && !keywords.isEmpty()) {
                 result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
                 if (!result) {
@@ -2625,13 +2602,17 @@ Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& sta
         return;
     }
 
-    ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
+    ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, status);
 }
 
 void
 Locale::getUnicodeKeywordValue(StringPiece keywordName,
                                ByteSink& sink,
                                UErrorCode& status) const {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
     // TODO: Remove the need for a const char* to a NUL terminated buffer.
     const CharString keywordName_nul(keywordName, status);
     if (U_FAILURE(status)) {
@@ -2639,17 +2620,12 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName,
     }
 
     const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
-
     if (legacy_key == nullptr) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 
-    CharString legacy_value;
-    {
-        CharStringByteSink sink(&legacy_value);
-        getKeywordValue(legacy_key, sink, status);
-    }
+    auto legacy_value = getKeywordValue<CharString>(legacy_key, status);
 
     if (U_FAILURE(status)) {
         return;
@@ -2712,6 +2688,7 @@ void
 Locale::setKeywordValue(StringPiece keywordName,
                         StringPiece keywordValue,
                         UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
     // TODO: Remove the need for a const char* to a NUL terminated buffer.
     const CharString keywordName_nul(keywordName, status);
     const CharString keywordValue_nul(keywordValue, status);
@@ -2722,16 +2699,18 @@ void
 Locale::setUnicodeKeywordValue(StringPiece keywordName,
                                StringPiece keywordValue,
                                UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return;
+    }
+
     // TODO: Remove the need for a const char* to a NUL terminated buffer.
     const CharString keywordName_nul(keywordName, status);
     const CharString keywordValue_nul(keywordValue, status);
-
     if (U_FAILURE(status)) {
         return;
     }
 
     const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
-
     if (legacy_key == nullptr) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return;

+ 240 - 646
thirdparty/icu4c/common/loclikely.cpp

@@ -19,6 +19,8 @@
 *   that then do not depend on resource bundle code and likely-subtags data.
 */
 
+#include <utility>
+
 #include "unicode/bytestream.h"
 #include "unicode/utypes.h"
 #include "unicode/locid.h"
@@ -33,72 +35,31 @@
 #include "cstring.h"
 #include "loclikelysubtags.h"
 #include "ulocimp.h"
-#include "ustr_imp.h"
-
-/**
- * Append a tag to a buffer, adding the separator if necessary.  The buffer
- * must be large enough to contain the resulting tag plus any separator
- * necessary. The tag must not be a zero-length string.
- *
- * @param tag The tag to add.
- * @param tagLength The length of the tag.
- * @param buffer The output buffer.
- * @param bufferLength The length of the output buffer.  This is an input/output parameter.
- **/
-static void U_CALLCONV
-appendTag(
-    const char* tag,
-    int32_t tagLength,
-    char* buffer,
-    int32_t* bufferLength,
-    UBool withSeparator) {
-
-    if (withSeparator) {
-        buffer[*bufferLength] = '_';
-        ++(*bufferLength);
-    }
-
-    uprv_memmove(
-        &buffer[*bufferLength],
-        tag,
-        tagLength);
 
-    *bufferLength += tagLength;
-}
+namespace {
 
 /**
  * Create a tag string from the supplied parameters.  The lang, script and region
  * parameters may be nullptr pointers. If they are, their corresponding length parameters
  * must be less than or equal to 0.
  *
- * If any of the language, script or region parameters are empty, and the alternateTags
- * parameter is not nullptr, it will be parsed for potential language, script and region tags
- * to be used when constructing the new tag.  If the alternateTags parameter is nullptr, or
- * it contains no language tag, the default tag for the unknown language is used.
- *
- * If the length of the new string exceeds the capacity of the output buffer, 
- * the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.
- *
  * If an illegal argument is provided, the function returns the error
  * U_ILLEGAL_ARGUMENT_ERROR.
  *
- * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
- * the tag string fits in the output buffer, but the null terminator doesn't.
- *
  * @param lang The language tag to use.
  * @param langLength The length of the language tag.
  * @param script The script tag to use.
  * @param scriptLength The length of the script tag.
  * @param region The region tag to use.
  * @param regionLength The length of the region tag.
+ * @param variant The region tag to use.
+ * @param variantLength The length of the region tag.
  * @param trailing Any trailing data to append to the new tag.
  * @param trailingLength The length of the trailing data.
- * @param alternateTags A string containing any alternate tags.
  * @param sink The output sink receiving the tag string.
  * @param err A pointer to a UErrorCode for error reporting.
  **/
-static void U_CALLCONV
+void U_CALLCONV
 createTagStringWithAlternates(
     const char* lang,
     int32_t langLength,
@@ -106,575 +67,240 @@ createTagStringWithAlternates(
     int32_t scriptLength,
     const char* region,
     int32_t regionLength,
+    const char* variant,
+    int32_t variantLength,
     const char* trailing,
     int32_t trailingLength,
-    const char* alternateTags,
     icu::ByteSink& sink,
-    UErrorCode* err) {
-
-    if (U_FAILURE(*err)) {
-        goto error;
-    }
-    else if (langLength >= ULOC_LANG_CAPACITY ||
-             scriptLength >= ULOC_SCRIPT_CAPACITY ||
-             regionLength >= ULOC_COUNTRY_CAPACITY) {
-        goto error;
-    }
-    else {
-        /**
-         * ULOC_FULLNAME_CAPACITY will provide enough capacity
-         * that we can build a string that contains the language,
-         * script and region code without worrying about overrunning
-         * the user-supplied buffer.
-         **/
-        char tagBuffer[ULOC_FULLNAME_CAPACITY];
-        int32_t tagLength = 0;
-        UBool regionAppended = false;
-
-        if (langLength > 0) {
-            appendTag(
-                lang,
-                langLength,
-                tagBuffer,
-                &tagLength,
-                /*withSeparator=*/false);
-        }
-        else if (alternateTags == nullptr) {
-            /*
-             * Use the empty string for an unknown language, if
-             * we found no language.
-             */
-        }
-        else {
-            /*
-             * Parse the alternateTags string for the language.
-             */
-            char alternateLang[ULOC_LANG_CAPACITY];
-            int32_t alternateLangLength = sizeof(alternateLang);
-
-            alternateLangLength =
-                uloc_getLanguage(
-                    alternateTags,
-                    alternateLang,
-                    alternateLangLength,
-                    err);
-            if(U_FAILURE(*err) ||
-                alternateLangLength >= ULOC_LANG_CAPACITY) {
-                goto error;
-            }
-            else if (alternateLangLength == 0) {
-                /*
-                 * Use the empty string for an unknown language, if
-                 * we found no language.
-                 */
-            }
-            else {
-                appendTag(
-                    alternateLang,
-                    alternateLangLength,
-                    tagBuffer,
-                    &tagLength,
-                    /*withSeparator=*/false);
-            }
-        }
-
-        if (scriptLength > 0) {
-            appendTag(
-                script,
-                scriptLength,
-                tagBuffer,
-                &tagLength,
-                /*withSeparator=*/true);
-        }
-        else if (alternateTags != nullptr) {
-            /*
-             * Parse the alternateTags string for the script.
-             */
-            char alternateScript[ULOC_SCRIPT_CAPACITY];
-
-            const int32_t alternateScriptLength =
-                uloc_getScript(
-                    alternateTags,
-                    alternateScript,
-                    sizeof(alternateScript),
-                    err);
-
-            if (U_FAILURE(*err) ||
-                alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
-                goto error;
-            }
-            else if (alternateScriptLength > 0) {
-                appendTag(
-                    alternateScript,
-                    alternateScriptLength,
-                    tagBuffer,
-                    &tagLength,
-                    /*withSeparator=*/true);
-            }
-        }
-
-        if (regionLength > 0) {
-            appendTag(
-                region,
-                regionLength,
-                tagBuffer,
-                &tagLength,
-                /*withSeparator=*/true);
-
-            regionAppended = true;
-        }
-        else if (alternateTags != nullptr) {
-            /*
-             * Parse the alternateTags string for the region.
-             */
-            char alternateRegion[ULOC_COUNTRY_CAPACITY];
-
-            const int32_t alternateRegionLength =
-                uloc_getCountry(
-                    alternateTags,
-                    alternateRegion,
-                    sizeof(alternateRegion),
-                    err);
-            if (U_FAILURE(*err) ||
-                alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
-                goto error;
-            }
-            else if (alternateRegionLength > 0) {
-                appendTag(
-                    alternateRegion,
-                    alternateRegionLength,
-                    tagBuffer,
-                    &tagLength,
-                    /*withSeparator=*/true);
-
-                regionAppended = true;
-            }
-        }
-
-        /**
-         * Copy the partial tag from our internal buffer to the supplied
-         * target.
-         **/
-        sink.Append(tagBuffer, tagLength);
-
-        if (trailingLength > 0) {
-            if (*trailing != '@') {
-                sink.Append("_", 1);
-                if (!regionAppended) {
-                    /* extra separator is required */
-                    sink.Append("_", 1);
-                }
-            }
-
-            /*
-             * Copy the trailing data into the supplied buffer.
-             */
-            sink.Append(trailing, trailingLength);
-        }
-
+    UErrorCode& err) {
+    if (U_FAILURE(err)) {
         return;
     }
 
-error:
-
-    /**
-     * An overflow indicates the locale ID passed in
-     * is ill-formed.  If we got here, and there was
-     * no previous error, it's an implicit overflow.
-     **/
-    if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
-        U_SUCCESS(*err)) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+    if (langLength >= ULOC_LANG_CAPACITY ||
+            scriptLength >= ULOC_SCRIPT_CAPACITY ||
+            regionLength >= ULOC_COUNTRY_CAPACITY) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
-}
 
-/**
- * Parse the language, script, and region subtags from a tag string, and copy the
- * results into the corresponding output parameters. The buffers are null-terminated,
- * unless overflow occurs.
- *
- * The langLength, scriptLength, and regionLength parameters are input/output
- * parameters, and must contain the capacity of their corresponding buffers on
- * input.  On output, they will contain the actual length of the buffers, not
- * including the null terminator.
- *
- * If the length of any of the output subtags exceeds the capacity of the corresponding
- * buffer, the function copies as many bytes to the output buffer as it can, and returns
- * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
- * occurs.
- *
- * If an illegal argument is provided, the function returns the error
- * U_ILLEGAL_ARGUMENT_ERROR.
- *
- * @param localeID The locale ID to parse.
- * @param lang The language tag buffer.
- * @param langLength The length of the language tag.
- * @param script The script tag buffer.
- * @param scriptLength The length of the script tag.
- * @param region The region tag buffer.
- * @param regionLength The length of the region tag.
- * @param err A pointer to a UErrorCode for error reporting.
- * @return The number of chars of the localeID parameter consumed.
- **/
-static int32_t U_CALLCONV
-parseTagString(
-    const char* localeID,
-    char* lang,
-    int32_t* langLength,
-    char* script,
-    int32_t* scriptLength,
-    char* region,
-    int32_t* regionLength,
-    UErrorCode* err)
-{
-    const char* position = localeID;
-    int32_t subtagLength = 0;
-
-    if(U_FAILURE(*err) ||
-       localeID == nullptr ||
-       lang == nullptr ||
-       langLength == nullptr ||
-       script == nullptr ||
-       scriptLength == nullptr ||
-       region == nullptr ||
-       regionLength == nullptr) {
-        goto error;
+    if (langLength > 0) {
+        sink.Append(lang, langLength);
     }
 
-    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
-
-    /*
-     * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
-     * to be an error, because it indicates the user-supplied tag is
-     * not well-formed.
-     */
-    if(U_FAILURE(*err)) {
-        goto error;
+    if (scriptLength > 0) {
+        sink.Append("_", 1);
+        sink.Append(script, scriptLength);
     }
 
-    *langLength = subtagLength;
-
-    /*
-     * If no language was present, use the empty string instead.
-     * Otherwise, move past any separator.
-     */
-    if (_isIDSeparator(*position)) {
-        ++position;
+    if (regionLength > 0) {
+        sink.Append("_", 1);
+        sink.Append(region, regionLength);
     }
 
-    subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err);
-
-    if(U_FAILURE(*err)) {
-        goto error;
+    if (variantLength > 0) {
+        if (regionLength == 0) {
+            /* extra separator is required */
+            sink.Append("_", 1);
+        }
+        sink.Append("_", 1);
+        sink.Append(variant, variantLength);
     }
 
-    *scriptLength = subtagLength;
-
-    if (*scriptLength > 0) {
+    if (trailingLength > 0) {
         /*
-         * Move past any separator.
+         * Copy the trailing data into the supplied buffer.
          */
-        if (_isIDSeparator(*position)) {
-            ++position;
-        }    
+        sink.Append(trailing, trailingLength);
     }
+}
 
-    subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err);
-
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
-
-    *regionLength = subtagLength;
-
-    if (*regionLength <= 0 && *position != 0 && *position != '@') {
-        /* back up over consumed trailing separator */
-        --position;
-    }
-
-exit:
-
-    return (int32_t)(position - localeID);
-
-error:
-
-    /**
-     * If we get here, we have no explicit error, it's the result of an
-     * illegal argument.
-     **/
-    if (!U_FAILURE(*err)) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+bool CHECK_TRAILING_VARIANT_SIZE(const char* variant, int32_t variantLength) {
+    int32_t count = 0;
+    for (int32_t i = 0; i < variantLength; i++) {
+        if (_isIDSeparator(variant[i])) {
+            count = 0;
+        } else if (count == 8) {
+            return false;
+        } else {
+            count++;
+        }
     }
-
-    goto exit;
+    return true;
 }
 
-#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
-    int32_t count = 0; \
-    int32_t i; \
-    for (i = 0; i < trailingLength; i++) { \
-        if (trailing[i] == '-' || trailing[i] == '_') { \
-            count = 0; \
-            if (count > 8) { \
-                goto error; \
-            } \
-        } else if (trailing[i] == '@') { \
-            break; \
-        } else if (count > 8) { \
-            goto error; \
-        } else { \
-            count++; \
-        } \
-    } \
-} UPRV_BLOCK_MACRO_END
-
-static UBool
+void
 _uloc_addLikelySubtags(const char* localeID,
                        icu::ByteSink& sink,
-                       UErrorCode* err) {
-    char lang[ULOC_LANG_CAPACITY];
-    int32_t langLength = sizeof(lang);
-    char script[ULOC_SCRIPT_CAPACITY];
-    int32_t scriptLength = sizeof(script);
-    char region[ULOC_COUNTRY_CAPACITY];
-    int32_t regionLength = sizeof(region);
-    const char* trailing = "";
-    int32_t trailingLength = 0;
-    int32_t trailingIndex = 0;
-
-    if(U_FAILURE(*err)) {
-        goto error;
+                       UErrorCode& err) {
+    if (U_FAILURE(err)) {
+        return;
     }
+
     if (localeID == nullptr) {
-        goto error;
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
 
-    trailingIndex = parseTagString(
-        localeID,
-        lang,
-        &langLength,
-        script,
-        &scriptLength,
-        region,
-        &regionLength,
-        err);
-    if(U_FAILURE(*err)) {
-        /* Overflow indicates an illegal argument error */
-        if (*err == U_BUFFER_OVERFLOW_ERROR) {
-            *err = U_ILLEGAL_ARGUMENT_ERROR;
-        }
-
-        goto error;
-    }
-    if (langLength > 3) {
-        goto error;
+    icu::CharString lang;
+    icu::CharString script;
+    icu::CharString region;
+    icu::CharString variant;
+    const char* trailing = nullptr;
+    ulocimp_getSubtags(localeID, &lang, &script, &region, &variant, &trailing, err);
+    if (U_FAILURE(err)) {
+        return;
     }
 
-    /* Find the length of the trailing portion. */
-    while (_isIDSeparator(localeID[trailingIndex])) {
-        trailingIndex++;
+    if (!CHECK_TRAILING_VARIANT_SIZE(variant.data(), variant.length())) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
-    trailing = &localeID[trailingIndex];
-    trailingLength = (int32_t)uprv_strlen(trailing);
-
-    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
-    {
-        const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-        // We need to keep l on the stack because lsr may point into internal
-        // memory of l.
-        icu::Locale l = icu::Locale::createFromName(localeID);
-        if (l.isBogus()) {
-            goto error;
-        }
-        icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(l, true, *err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-        const char* language = lsr.language;
-        if (uprv_strcmp(language, "und") == 0) {
-            language = "";
-        }
-        createTagStringWithAlternates(
-            language,
-            (int32_t)uprv_strlen(language),
-            lsr.script,
-            (int32_t)uprv_strlen(lsr.script),
-            lsr.region,
-            (int32_t)uprv_strlen(lsr.region),
-            trailing,
-            trailingLength,
-            nullptr,
-            sink,
-            err);
-        if(U_FAILURE(*err)) {
-            goto error;
+
+    if (lang.length() == 4) {
+        if (script.isEmpty()) {
+            script = std::move(lang);
+            lang.clear();
+        } else {
+            err = U_ILLEGAL_ARGUMENT_ERROR;
+            return;
         }
+    } else if (lang.length() > 8) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
-    return true;
 
-error:
+    int32_t trailingLength = (int32_t)uprv_strlen(trailing);
 
-    if (!U_FAILURE(*err)) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+    const icu::LikelySubtags* likelySubtags = icu::LikelySubtags::getSingleton(err);
+    if (U_FAILURE(err)) {
+        return;
+    }
+    // We need to keep l on the stack because lsr may point into internal
+    // memory of l.
+    icu::Locale l = icu::Locale::createFromName(localeID);
+    if (l.isBogus()) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
-    return false;
+    icu::LSR lsr = likelySubtags->makeMaximizedLsrFrom(l, true, err);
+    if (U_FAILURE(err)) {
+        return;
+    }
+    const char* language = lsr.language;
+    if (uprv_strcmp(language, "und") == 0) {
+        language = "";
+    }
+    createTagStringWithAlternates(
+        language,
+        (int32_t)uprv_strlen(language),
+        lsr.script,
+        (int32_t)uprv_strlen(lsr.script),
+        lsr.region,
+        (int32_t)uprv_strlen(lsr.region),
+        variant.data(),
+        variant.length(),
+        trailing,
+        trailingLength,
+        sink,
+        err);
 }
 
-// Add likely subtags to the sink
-// return true if the value in the sink is produced by a match during the lookup
-// return false if the value in the sink is the same as input because there are
-// no match after the lookup.
-static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*);
-
-static void
+void
 _uloc_minimizeSubtags(const char* localeID,
                       icu::ByteSink& sink,
                       bool favorScript,
-                      UErrorCode* err) {
-    icu::CharString maximizedTagBuffer;
-
-    char lang[ULOC_LANG_CAPACITY];
-    int32_t langLength = sizeof(lang);
-    char script[ULOC_SCRIPT_CAPACITY];
-    int32_t scriptLength = sizeof(script);
-    char region[ULOC_COUNTRY_CAPACITY];
-    int32_t regionLength = sizeof(region);
-    const char* trailing = "";
-    int32_t trailingLength = 0;
-    int32_t trailingIndex = 0;
-
-    if(U_FAILURE(*err)) {
-        goto error;
-    }
-    else if (localeID == nullptr) {
-        goto error;
+                      UErrorCode& err) {
+    if (U_FAILURE(err)) {
+        return;
     }
 
-    trailingIndex =
-        parseTagString(
-            localeID,
-            lang,
-            &langLength,
-            script,
-            &scriptLength,
-            region,
-            &regionLength,
-            err);
-    if(U_FAILURE(*err)) {
-
-        /* Overflow indicates an illegal argument error */
-        if (*err == U_BUFFER_OVERFLOW_ERROR) {
-            *err = U_ILLEGAL_ARGUMENT_ERROR;
-        }
-
-        goto error;
+    if (localeID == nullptr) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
+        return;
     }
 
-    /* Find the spot where the variants or the keywords begin, if any. */
-    while (_isIDSeparator(localeID[trailingIndex])) {
-        trailingIndex++;
+    icu::CharString lang;
+    icu::CharString script;
+    icu::CharString region;
+    icu::CharString variant;
+    const char* trailing = nullptr;
+    ulocimp_getSubtags(localeID, &lang, &script, &region, &variant, &trailing, err);
+    if (U_FAILURE(err)) {
+        return;
     }
-    trailing = &localeID[trailingIndex];
-    trailingLength = (int32_t)uprv_strlen(trailing);
-
-    CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
 
-    {
-        const icu::XLikelySubtags* likelySubtags = icu::XLikelySubtags::getSingleton(*err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-        icu::LSR lsr = likelySubtags->minimizeSubtags(
-            {lang, langLength},
-            {script, scriptLength},
-            {region, regionLength},
-            favorScript,
-            *err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
-        const char* language = lsr.language;
-        if (uprv_strcmp(language, "und") == 0) {
-            language = "";
-        }
-        createTagStringWithAlternates(
-            language,
-            (int32_t)uprv_strlen(language),
-            lsr.script,
-            (int32_t)uprv_strlen(lsr.script),
-            lsr.region,
-            (int32_t)uprv_strlen(lsr.region),
-            trailing,
-            trailingLength,
-            nullptr,
-            sink,
-            err);
-        if(U_FAILURE(*err)) {
-            goto error;
-        }
+    if (!CHECK_TRAILING_VARIANT_SIZE(variant.data(), variant.length())) {
+        err = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
 
-error:
+    int32_t trailingLength = (int32_t)uprv_strlen(trailing);
 
-    if (!U_FAILURE(*err)) {
-        *err = U_ILLEGAL_ARGUMENT_ERROR;
+    const icu::LikelySubtags* likelySubtags = icu::LikelySubtags::getSingleton(err);
+    if (U_FAILURE(err)) {
+        return;
     }
+    icu::LSR lsr = likelySubtags->minimizeSubtags(
+        lang.toStringPiece(),
+        script.toStringPiece(),
+        region.toStringPiece(),
+        favorScript,
+        err);
+    if (U_FAILURE(err)) {
+        return;
+    }
+    const char* language = lsr.language;
+    if (uprv_strcmp(language, "und") == 0) {
+        language = "";
+    }
+    createTagStringWithAlternates(
+        language,
+        (int32_t)uprv_strlen(language),
+        lsr.script,
+        (int32_t)uprv_strlen(lsr.script),
+        lsr.region,
+        (int32_t)uprv_strlen(lsr.region),
+        variant.data(),
+        variant.length(),
+        trailing,
+        trailingLength,
+        sink,
+        err);
 }
 
+}  // namespace
+
 U_CAPI int32_t U_EXPORT2
 uloc_addLikelySubtags(const char* localeID,
                       char* maximizedLocaleID,
                       int32_t maximizedLocaleIDCapacity,
                       UErrorCode* status) {
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    icu::CheckedArrayByteSink sink(
-            maximizedLocaleID, maximizedLocaleIDCapacity);
-
-    ulocimp_addLikelySubtags(localeID, sink, status);
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*status)) {
-        return sink.Overflowed() ? reslen : -1;
-    }
-
-    if (sink.Overflowed()) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(
-                maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
-    }
-
-    return reslen;
+    return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
+        maximizedLocaleID, maximizedLocaleIDCapacity,
+        [&](icu::ByteSink& sink, UErrorCode& status) {
+            ulocimp_addLikelySubtags(localeID, sink, status);
+        },
+        *status);
 }
 
-static UBool
-_ulocimp_addLikelySubtags(const char* localeID,
-                          icu::ByteSink& sink,
-                          UErrorCode* status) {
-    icu::CharString localeBuffer;
-    {
-        icu::CharStringByteSink localeSink(&localeBuffer);
-        ulocimp_canonicalize(localeID, localeSink, status);
-    }
-    if (U_SUCCESS(*status)) {
-        return _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
-    } else {
-        return false;
-    }
+U_EXPORT icu::CharString
+ulocimp_addLikelySubtags(const char* localeID,
+                         UErrorCode& status) {
+    return icu::ByteSinkUtil::viaByteSinkToCharString(
+        [&](icu::ByteSink& sink, UErrorCode& status) {
+            ulocimp_addLikelySubtags(localeID, sink, status);
+        },
+        status);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_addLikelySubtags(const char* localeID,
                          icu::ByteSink& sink,
-                         UErrorCode* status) {
-    _ulocimp_addLikelySubtags(localeID, sink, status);
+                         UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
+    _uloc_addLikelySubtags(localeBuffer.data(), sink, status);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -682,40 +308,32 @@ uloc_minimizeSubtags(const char* localeID,
                      char* minimizedLocaleID,
                      int32_t minimizedLocaleIDCapacity,
                      UErrorCode* status) {
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-
-    icu::CheckedArrayByteSink sink(
-            minimizedLocaleID, minimizedLocaleIDCapacity);
-
-    ulocimp_minimizeSubtags(localeID, sink, false, status);
-    int32_t reslen = sink.NumberOfBytesAppended();
-
-    if (U_FAILURE(*status)) {
-        return sink.Overflowed() ? reslen : -1;
-    }
-
-    if (sink.Overflowed()) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(
-                minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
-    }
+    return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
+        minimizedLocaleID, minimizedLocaleIDCapacity,
+        [&](icu::ByteSink& sink, UErrorCode& status) {
+            ulocimp_minimizeSubtags(localeID, sink, false, status);
+        },
+        *status);
+}
 
-    return reslen;
+U_EXPORT icu::CharString
+ulocimp_minimizeSubtags(const char* localeID,
+                        bool favorScript,
+                        UErrorCode& status) {
+    return icu::ByteSinkUtil::viaByteSinkToCharString(
+        [&](icu::ByteSink& sink, UErrorCode& status) {
+            ulocimp_minimizeSubtags(localeID, sink, favorScript, status);
+        },
+        status);
 }
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_minimizeSubtags(const char* localeID,
                         icu::ByteSink& sink,
                         bool favorScript,
-                        UErrorCode* status) {
-    icu::CharString localeBuffer;
-    {
-        icu::CharStringByteSink localeSink(&localeBuffer);
-        ulocimp_canonicalize(localeID, localeSink, status);
-    }
+                        UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status);
     _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status);
 }
 
@@ -728,22 +346,16 @@ static const char LANG_DIR_STRING[] =
 U_CAPI UBool U_EXPORT2
 uloc_isRightToLeft(const char *locale) {
     UErrorCode errorCode = U_ZERO_ERROR;
-    char script[8];
-    int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
-    if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
-            scriptLength == 0) {
+    icu::CharString lang;
+    icu::CharString script;
+    ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, errorCode);
+    if (U_FAILURE(errorCode) || script.isEmpty()) {
         // Fastpath: We know the likely scripts and their writing direction
         // for some common languages.
-        errorCode = U_ZERO_ERROR;
-        char lang[8];
-        int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
-        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
-            return false;
-        }
-        if (langLength > 0) {
-            const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
+        if (!lang.isEmpty()) {
+            const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang.data());
             if (langPtr != nullptr) {
-                switch (langPtr[langLength]) {
+                switch (langPtr[lang.length()]) {
                 case '-': return false;
                 case '+': return true;
                 default: break;  // partial match of a longer code
@@ -752,21 +364,16 @@ uloc_isRightToLeft(const char *locale) {
         }
         // Otherwise, find the likely script.
         errorCode = U_ZERO_ERROR;
-        icu::CharString likely;
-        {
-            icu::CharStringByteSink sink(&likely);
-            ulocimp_addLikelySubtags(locale, sink, &errorCode);
-        }
-        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
+        icu::CharString likely = ulocimp_addLikelySubtags(locale, errorCode);
+        if (U_FAILURE(errorCode)) {
             return false;
         }
-        scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
-        if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
-                scriptLength == 0) {
+        ulocimp_getSubtags(likely.data(), nullptr, &script, nullptr, nullptr, nullptr, errorCode);
+        if (U_FAILURE(errorCode) || script.isEmpty()) {
             return false;
         }
     }
-    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
+    UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script.data());
     return uscript_isRightToLeft(scriptCode);
 }
 
@@ -779,65 +386,52 @@ Locale::isRightToLeft() const {
 
 U_NAMESPACE_END
 
-// The following must at least allow for rg key value (6) plus terminator (1).
-#define ULOC_RG_BUFLEN 8
+namespace {
+icu::CharString
+GetRegionFromKey(const char* localeID, const char* key, UErrorCode& status) {
+    icu::CharString result;
 
-U_CAPI int32_t U_EXPORT2
-ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
-                                     char *region, int32_t regionCapacity, UErrorCode* status) {
-    if (U_FAILURE(*status)) {
-        return 0;
-    }
-    char rgBuf[ULOC_RG_BUFLEN];
-    UErrorCode rgStatus = U_ZERO_ERROR;
-
-    // First check for rg keyword value
-    icu::CharString rg;
-    {
-        icu::CharStringByteSink sink(&rg);
-        ulocimp_getKeywordValue(localeID, "rg", sink, &rgStatus);
-    }
-    int32_t rgLen = rg.length();
-    if (U_FAILURE(rgStatus) || rgLen < 3 || rgLen > 7) {
-        rgLen = 0;
-    } else {
+    // First check for keyword value
+    icu::CharString kw = ulocimp_getKeywordValue(localeID, key, status);
+    int32_t len = kw.length();
+    if (U_SUCCESS(status) && len >= 3 && len <= 7) {
         // chop off the subdivision code (which will generally be "zzzz" anyway)
-        const char* const data = rg.data();
+        const char* const data = kw.data();
         if (uprv_isASCIILetter(data[0])) {
-            rgLen = 2;
-            rgBuf[0] = uprv_toupper(data[0]);
-            rgBuf[1] = uprv_toupper(data[1]);
+            result.append(uprv_toupper(data[0]), status);
+            result.append(uprv_toupper(data[1]), status);
         } else {
             // assume three-digit region code
-            rgLen = 3;
-            uprv_memcpy(rgBuf, data, rgLen);
+            result.append(data, 3, status);
         }
     }
+    return result;
+}
+}  // namespace
 
-    if (rgLen == 0) {
+U_EXPORT icu::CharString
+ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
+                                     UErrorCode& status) {
+    if (U_FAILURE(status)) {
+        return {};
+    }
+    icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status);
+    if (U_SUCCESS(status) && rgBuf.isEmpty()) {
         // No valid rg keyword value, try for unicode_region_subtag
-        rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
-        if (U_FAILURE(*status)) {
-            rgLen = 0;
-        } else if (rgLen == 0 && inferRegion) {
-            // no unicode_region_subtag but inferRegion true, try likely subtags
-            rgStatus = U_ZERO_ERROR;
-            icu::CharString locBuf;
-            {
-                icu::CharStringByteSink sink(&locBuf);
-                ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
-            }
-            if (U_SUCCESS(rgStatus)) {
-                rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
-                if (U_FAILURE(*status)) {
-                    rgLen = 0;
+        rgBuf = ulocimp_getRegion(localeID, status);
+        if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) {
+            // Second check for sd keyword value
+            rgBuf = GetRegionFromKey(localeID, "sd", status);
+            if (U_SUCCESS(status) && rgBuf.isEmpty()) {
+                // no unicode_region_subtag but inferRegion true, try likely subtags
+                UErrorCode rgStatus = U_ZERO_ERROR;
+                icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus);
+                if (U_SUCCESS(rgStatus)) {
+                    rgBuf = ulocimp_getRegion(locBuf.data(), status);
                 }
             }
         }
     }
 
-    rgBuf[rgLen] = 0;
-    uprv_strncpy(region, rgBuf, regionCapacity);
-    return u_terminateChars(region, regionCapacity, rgLen, status);
+    return rgBuf;
 }
-

+ 128 - 62
thirdparty/icu4c/common/loclikelysubtags.cpp

@@ -51,8 +51,7 @@ LocaleDistanceData::~LocaleDistanceData() {
     delete[] paradigms;
 }
 
-// TODO(ICU-20777): Rename to just LikelySubtagsData.
-struct XLikelySubtagsData {
+struct LikelySubtagsData {
     UResourceBundle *langInfoBundle = nullptr;
     UniqueCharStrings strings;
     CharStringMap languageAliases;
@@ -63,14 +62,15 @@ struct XLikelySubtagsData {
 
     LocaleDistanceData distanceData;
 
-    XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
+    LikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {}
 
-    ~XLikelySubtagsData() {
+    ~LikelySubtagsData() {
         ures_close(langInfoBundle);
         delete[] lsrs;
     }
 
     void load(UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return; }
         langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode);
         if (U_FAILURE(errorCode)) { return; }
         StackUResourceBundle stackTempBundle;
@@ -231,6 +231,7 @@ struct XLikelySubtagsData {
 private:
     bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value,
                      LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return false; }
         if (table.findValue(key, value)) {
             ResourceArray stringArray = value.getArray(errorCode);
             if (U_FAILURE(errorCode)) { return false; }
@@ -297,7 +298,7 @@ private:
     }
 
     UnicodeString toRegion(const ResourceArray& m49Array, ResourceValue &value, int encoded, UErrorCode &errorCode) {
-        if (encoded == 0 || encoded == 1) {
+        if (U_FAILURE(errorCode) || encoded == 0 || encoded == 1) {
             return UNICODE_STRING_SIMPLE("");
         }
         encoded &= 0x00ffffff;
@@ -315,6 +316,7 @@ private:
 
     bool readLSREncodedStrings(const ResourceTable &table, const char* key, ResourceValue &value, const ResourceArray& m49Array,
                      LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) {
+        if (U_FAILURE(errorCode)) { return false; }
         if (table.findValue(key, value)) {
             const int32_t* vectors = value.getIntVector(length, errorCode);
             if (U_FAILURE(errorCode)) { return false; }
@@ -339,7 +341,7 @@ private:
 
 namespace {
 
-XLikelySubtags *gLikelySubtags = nullptr;
+LikelySubtags *gLikelySubtags = nullptr;
 UVector *gMacroregions = nullptr;
 UInitOnce gInitOnce {};
 
@@ -352,8 +354,56 @@ UBool U_CALLCONV cleanup() {
     return true;
 }
 
-static const char16_t RANGE_MARKER = 0x7E; /* '~' */
+constexpr const char16_t* MACROREGION_HARDCODE[] = {
+    u"001~3",
+    u"005",
+    u"009",
+    u"011",
+    u"013~5",
+    u"017~9",
+    u"021",
+    u"029",
+    u"030",
+    u"034~5",
+    u"039",
+    u"053~4",
+    u"057",
+    u"061",
+    u"142~3",
+    u"145",
+    u"150~1",
+    u"154~5",
+    u"202",
+    u"419",
+    u"EU",
+    u"EZ",
+    u"QO",
+    u"UN",
+};
+
+constexpr char16_t RANGE_MARKER = 0x7E; /* '~' */
+void processMacroregionRange(const UnicodeString& regionName, UVector* newMacroRegions, UErrorCode& status) {
+    if (U_FAILURE(status)) { return; }
+    int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
+    char16_t buf[6];
+    regionName.extract(buf,6,status);
+    if ( rangeMarkerLocation > 0 ) {
+        char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
+        buf[rangeMarkerLocation] = 0;
+        while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
+            LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
+            newMacroRegions->adoptElement(newRegion.orphan(),status);
+            buf[rangeMarkerLocation-1]++;
+        }
+    } else {
+        LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
+        newMacroRegions->adoptElement(newRegion.orphan(),status);
+    }
+}
+
+#if U_DEBUG
 UVector* loadMacroregions(UErrorCode &status) {
+    if (U_FAILURE(status)) { return nullptr; }
     LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
 
     LocalUResourceBundlePointer supplementalData(ures_openDirect(nullptr,"supplementalData",&status));
@@ -365,37 +415,52 @@ UVector* loadMacroregions(UErrorCode &status) {
         return nullptr;
     }
 
-    while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) {
+    while (ures_hasNext(regionMacro.getAlias())) {
         UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),nullptr,&status);
-        int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER);
-        char16_t buf[6];
-        regionName.extract(buf,6,status);
-        if ( rangeMarkerLocation > 0 ) {
-            char16_t endRange = regionName.charAt(rangeMarkerLocation+1);
-            buf[rangeMarkerLocation] = 0;
-            while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) {
-                LocalPointer<UnicodeString> newRegion(new UnicodeString(buf), status);
-                newMacroRegions->adoptElement(newRegion.orphan(),status);
-                buf[rangeMarkerLocation-1]++;
-            }
-        } else {
-            LocalPointer<UnicodeString> newRegion(new UnicodeString(regionName), status);
-            newMacroRegions->adoptElement(newRegion.orphan(),status);
+        processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
+        if (U_FAILURE(status)) {
+            return nullptr;
         }
     }
+
+    return newMacroRegions.orphan();
+}
+#endif // U_DEBUG
+
+UVector* getStaticMacroregions(UErrorCode &status) {
+    if (U_FAILURE(status)) { return nullptr; }
+    LocalPointer<UVector> newMacroRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status);
+
+    if (U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    for (const auto *region : MACROREGION_HARDCODE) {
+        UnicodeString regionName(region);
+        processMacroregionRange(regionName, newMacroRegions.getAlias(), status);
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
+    }
+
     return newMacroRegions.orphan();
 }
 
 }  // namespace
 
-void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
+void U_CALLCONV LikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
     // This function is invoked only via umtx_initOnce().
     U_ASSERT(gLikelySubtags == nullptr);
-    XLikelySubtagsData data(errorCode);
+    LikelySubtagsData data(errorCode);
     data.load(errorCode);
     if (U_FAILURE(errorCode)) { return; }
-    gLikelySubtags = new XLikelySubtags(data);
-    gMacroregions = loadMacroregions(errorCode);
+    gLikelySubtags = new LikelySubtags(data);
+    gMacroregions = getStaticMacroregions(errorCode);
+#if U_DEBUG
+    auto macroregionsFromData = loadMacroregions(errorCode);
+    U_ASSERT((*gMacroregions) == (*macroregionsFromData));
+    delete macroregionsFromData;
+#endif
     if (U_FAILURE(errorCode) || gLikelySubtags == nullptr || gMacroregions == nullptr) {
         delete gLikelySubtags;
         delete gMacroregions;
@@ -406,13 +471,13 @@ void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) {
     ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup);
 }
 
-const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) {
+const LikelySubtags *LikelySubtags::getSingleton(UErrorCode &errorCode) {
     if (U_FAILURE(errorCode)) { return nullptr; }
-    umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+    umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
     return gLikelySubtags;
 }
 
-XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
+LikelySubtags::LikelySubtags(LikelySubtagsData &data) :
         langInfoBundle(data.langInfoBundle),
         strings(data.strings.orphanCharStrings()),
         languageAliases(std::move(data.languageAliases)),
@@ -421,7 +486,7 @@ XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
         lsrs(data.lsrs),
 #if U_DEBUG
         lsrsLength(data.lsrsLength),
-#endif
+#endif // U_DEBUG
         distanceData(std::move(data.distanceData)) {
     data.langInfoBundle = nullptr;
     data.lsrs = nullptr;
@@ -447,18 +512,19 @@ XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) :
     }
 }
 
-XLikelySubtags::~XLikelySubtags() {
+LikelySubtags::~LikelySubtags() {
     ures_close(langInfoBundle);
     delete strings;
     delete[] lsrs;
 }
 
-LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
+LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale,
                                          bool returnInputIfUnmatch,
                                          UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
     if (locale.isBogus()) {
         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
-        return LSR("", "", "", LSR::EXPLICIT_LSR);
+        return {};
     }
     const char *name = locale.getName();
     if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') {  // name.startsWith("@x=")
@@ -490,10 +556,11 @@ const char *getCanonical(const CharStringMap &aliases, const char *alias) {
 
 }  // namespace
 
-LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
+LSR LikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region,
                                      const char *variant,
                                      bool returnInputIfUnmatch,
                                      UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
     // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK.
     // They should match only themselves,
     // not other locales with what looks like the same language and script subtags.
@@ -501,12 +568,21 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
     if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) {
         switch (c1) {
         case 'A':
+            if (returnInputIfUnmatch) {
+                return LSR(language, script, region, LSR::EXPLICIT_LSR);
+            }
             return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region,
                        LSR::EXPLICIT_LSR, errorCode);
         case 'B':
+            if (returnInputIfUnmatch) {
+                return LSR(language, script, region, LSR::EXPLICIT_LSR);
+            }
             return LSR(PSEUDO_BIDI_PREFIX, language, script, region,
                        LSR::EXPLICIT_LSR, errorCode);
         case 'C':
+            if (returnInputIfUnmatch) {
+                return LSR(language, script, region, LSR::EXPLICIT_LSR);
+            }
             return LSR(PSEUDO_CRACKED_PREFIX, language, script, region,
                        LSR::EXPLICIT_LSR, errorCode);
         default:  // normal locale
@@ -536,9 +612,10 @@ LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, c
     return maximize(language, script, region, returnInputIfUnmatch, errorCode);
 }
 
-LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region,
+LSR LikelySubtags::maximize(const char *language, const char *script, const char *region,
                              bool returnInputIfUnmatch,
                              UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
     return maximize({language, (int32_t)uprv_strlen(language)},
                     {script, (int32_t)uprv_strlen(script)},
                     {region, (int32_t)uprv_strlen(region)},
@@ -546,23 +623,21 @@ LSR XLikelySubtags::maximize(const char *language, const char *script, const cha
                     errorCode);
 }
 
-bool XLikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
+bool LikelySubtags::isMacroregion(StringPiece& region, UErrorCode& errorCode) const {
+    if (U_FAILURE(errorCode)) { return false; }
     // In Java, we use Region class. In C++, since Region is under i18n,
     // we read the same data used by Region into gMacroregions avoid dependency
     // from common to i18n/region.cpp
-    if (U_FAILURE(errorCode)) { return false; }
-    umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode);
+    umtx_initOnce(gInitOnce, &LikelySubtags::initLikelySubtags, errorCode);
     if (U_FAILURE(errorCode)) { return false; }
     UnicodeString str(UnicodeString::fromUTF8(region));
     return gMacroregions->contains((void *)&str);
 }
 
-LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
+LSR LikelySubtags::maximize(StringPiece language, StringPiece script, StringPiece region,
                              bool returnInputIfUnmatch,
                              UErrorCode &errorCode) const {
-    if (U_FAILURE(errorCode)) {
-        return LSR(language, script, region, LSR::EXPLICIT_LSR, errorCode);
-    }
+    if (U_FAILURE(errorCode)) { return {}; }
     if (language.compare("und") == 0) {
         language = "";
     }
@@ -681,7 +756,7 @@ LSR XLikelySubtags::maximize(StringPiece language, StringPiece script, StringPie
     return LSR(language, script, region, retainMask, errorCode);
 }
 
-int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
+int32_t LikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const {
     // If likelyInfo >= 0:
     // likelyInfo bit 1 is set if the previous comparison with lsr
     // was for equal language and script.
@@ -723,7 +798,7 @@ int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t
 }
 
 // Subset of maximize().
-int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const {
+int32_t LikelySubtags::getLikelyIndex(const char *language, const char *script) const {
     if (uprv_strcmp(language, "und") == 0) {
         language = "";
     }
@@ -781,7 +856,7 @@ int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script)
     return value;
 }
 
-int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
+int32_t LikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
     UStringTrieResult result;
     uint8_t c;
     if ((c = s[i]) == 0) {
@@ -814,7 +889,7 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) {
     default: return -1;
     }
 }
-int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
+int32_t LikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
     UStringTrieResult result;
     uint8_t c;
     if (s.length() == i) {
@@ -848,14 +923,13 @@ int32_t XLikelySubtags::trieNext(BytesTrie &iter, StringPiece s, int32_t i) {
     }
 }
 
-LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
+LSR LikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
                                     StringPiece region,
                                     bool favorScript,
                                     UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return {}; }
     LSR max = maximize(language, script, region, true, errorCode);
-    if (U_FAILURE(errorCode)) {
-        return max;
-    }
+    if (U_FAILURE(errorCode)) { return {}; }
     // If no match, return it.
     if (uprv_strlen(max.language) == 0 &&
         uprv_strlen(max.script) == 0 &&
@@ -868,9 +942,7 @@ LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
     }
     // try language
     LSR test = maximize(max.language, "", "", true, errorCode);
-    if (U_FAILURE(errorCode)) {
-        return max;
-    }
+    if (U_FAILURE(errorCode)) { return {}; }
     if (test.isEquivalentTo(max)) {
         return LSR(max.language, "", "", LSR::DONT_CARE_FLAGS, errorCode);
     }
@@ -879,27 +951,21 @@ LSR XLikelySubtags::minimizeSubtags(StringPiece language, StringPiece script,
         // favor Region
         // try language and region
         test = maximize(max.language, "", max.region, true, errorCode);
-        if (U_FAILURE(errorCode)) {
-            return max;
-        }
+        if (U_FAILURE(errorCode)) { return {}; }
         if (test.isEquivalentTo(max)) {
             return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
         }
     }
     // try language and script
     test = maximize(max.language, max.script, "", true, errorCode);
-    if (U_FAILURE(errorCode)) {
-        return max;
-    }
+    if (U_FAILURE(errorCode)) { return {}; }
     if (test.isEquivalentTo(max)) {
         return LSR(max.language, max.script, "", LSR::DONT_CARE_FLAGS, errorCode);
     }
     if (favorScript) {
         // try language and region
         test = maximize(max.language, "", max.region, true, errorCode);
-        if (U_FAILURE(errorCode)) {
-            return max;
-        }
+        if (U_FAILURE(errorCode)) { return {}; }
         if (test.isEquivalentTo(max)) {
             return LSR(max.language, "", max.region, LSR::DONT_CARE_FLAGS, errorCode);
         }

+ 8 - 9
thirdparty/icu4c/common/loclikelysubtags.h

@@ -19,7 +19,7 @@
 
 U_NAMESPACE_BEGIN
 
-struct XLikelySubtagsData;
+struct LikelySubtagsData;
 
 struct LocaleDistanceData {
     LocaleDistanceData() = default;
@@ -37,15 +37,14 @@ private:
     LocaleDistanceData &operator=(const LocaleDistanceData &) = delete;
 };
 
-// TODO(ICU-20777): Rename to just LikelySubtags.
-class XLikelySubtags final : public UMemory {
+class LikelySubtags final : public UMemory {
 public:
-    ~XLikelySubtags();
+    ~LikelySubtags();
 
     static constexpr int32_t SKIP_SCRIPT = 1;
 
     // VisibleForTesting
-    static const XLikelySubtags *getSingleton(UErrorCode &errorCode);
+    static const LikelySubtags *getSingleton(UErrorCode &errorCode);
 
     // VisibleForTesting
     LSR makeMaximizedLsrFrom(const Locale &locale,
@@ -72,9 +71,9 @@ public:
     const LocaleDistanceData &getDistanceData() const { return distanceData; }
 
 private:
-    XLikelySubtags(XLikelySubtagsData &data);
-    XLikelySubtags(const XLikelySubtags &other) = delete;
-    XLikelySubtags &operator=(const XLikelySubtags &other) = delete;
+    LikelySubtags(LikelySubtagsData &data);
+    LikelySubtags(const LikelySubtags &other) = delete;
+    LikelySubtags &operator=(const LikelySubtags &other) = delete;
 
     static void initLikelySubtags(UErrorCode &errorCode);
 
@@ -120,7 +119,7 @@ private:
     int32_t lsrsLength;
 #endif
 
-    // distance/matcher data: see comment in XLikelySubtagsData::load()
+    // distance/matcher data: see comment in LikelySubtagsData::load()
     LocaleDistanceData distanceData;
 };
 

+ 41 - 45
thirdparty/icu4c/common/locmap.cpp

@@ -28,7 +28,6 @@
  */
 
 #include "locmap.h"
-#include "bytesinkutil.h"
 #include "charstr.h"
 #include "cstring.h"
 #include "cmemory.h"
@@ -49,6 +48,8 @@
  * [MS-LCID] Windows Language Code Identifier (LCID) Reference
  */
 
+namespace {
+
 /*
 ////////////////////////////////////////////////
 //
@@ -87,7 +88,7 @@ typedef struct ILcidPosixMap
  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
  */
 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
-static const ILcidPosixElement locmap_ ## languageID [] = { \
+constexpr ILcidPosixElement locmap_ ## languageID [] = { \
     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
     {hostID, #posixID}, \
 };
@@ -97,7 +98,7 @@ static const ILcidPosixElement locmap_ ## languageID [] = { \
  * @param id the POSIX ID, either a language or language_TERRITORY
  */
 #define ILCID_POSIX_SUBTABLE(id) \
-static const ILcidPosixElement locmap_ ## id [] =
+constexpr ILcidPosixElement locmap_ ## id [] =
 
 
 /**
@@ -796,7 +797,7 @@ ILCID_POSIX_SUBTABLE(zh) {
 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
 
 /* This must be static and grouped by LCID. */
-static const ILcidPosixMap gPosixIDmap[] = {
+constexpr ILcidPosixMap gPosixIDmap[] = {
     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
@@ -945,14 +946,14 @@ static const ILcidPosixMap gPosixIDmap[] = {
     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
 };
 
-static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
+constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
 
 /**
  * Do not call this function. It is called by hostID.
  * The function is not private because this struct must stay as a C struct,
  * and this is an internal class.
  */
-static int32_t
+int32_t
 idCmp(const char* id1, const char* id2)
 {
     int32_t diffIdx = 0;
@@ -972,9 +973,10 @@ idCmp(const char* id1, const char* id2)
  *               no equivalent Windows LCID.
  * @return the LCID
  */
-static uint32_t
-getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
+uint32_t
+getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
 {
+    if (U_FAILURE(status)) { return locmap_root->hostID; }
     int32_t bestIdx = 0;
     int32_t bestIdxDiff = 0;
     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
@@ -996,16 +998,16 @@ getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
     {
-        *status = U_USING_FALLBACK_WARNING;
+        status = U_USING_FALLBACK_WARNING;
         return this_0->regionMaps[bestIdx].hostID;
     }
 
     /*no match found */
-    *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return this_0->regionMaps->hostID;
+    status = U_ILLEGAL_ARGUMENT_ERROR;
+    return locmap_root->hostID;
 }
 
-static const char*
+const char*
 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
 {
     uint32_t i;
@@ -1035,19 +1037,21 @@ getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
  * quz -> qu
  * prs -> fa
  */
-#define FIX_LANGUAGE_ID_TAG(buffer, len) \
-    if (len >= 3) { \
-        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
-            buffer[2] = 0; \
-            uprv_strcat(buffer, buffer+3); \
-        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
-            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
-            uprv_strcat(buffer, buffer+3); \
-        } \
+void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) {
+    if (len >= 3) {
+        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {
+            buffer[2] = 0;
+            uprv_strcat(buffer, buffer+3);
+        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {
+            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;
+            uprv_strcat(buffer, buffer+3);
+        }
     }
-
+}
 #endif
 
+}  // namespace
+
 U_CAPI int32_t
 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
 {
@@ -1147,7 +1151,7 @@ uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UEr
 
     /* no match found */
     *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return -1;
+    return 0;
 }
 
 /*
@@ -1176,11 +1180,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
     // Check any for keywords.
     if (uprv_strchr(localeID, '@'))
     {
-        icu::CharString collVal;
-        {
-            icu::CharStringByteSink sink(&collVal);
-            ulocimp_getKeywordValue(localeID, "collation", sink, status);
-        }
+        icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
         if (U_SUCCESS(*status) && !collVal.isEmpty())
         {
             // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
@@ -1189,10 +1189,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
         else
         {
             // If the locale ID contains keywords other than collation, just use the base name.
-            {
-                icu::CharStringByteSink sink(&baseName);
-                ulocimp_getBaseName(localeID, sink, status);
-            }
+            baseName = ulocimp_getBaseName(localeID, *status);
             if (U_SUCCESS(*status) && !baseName.isEmpty())
             {
                 mylocaleID = baseName.data();
@@ -1201,11 +1198,7 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
     }
 
     // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
-    icu::CharString asciiBCP47Tag;
-    {
-        icu::CharStringByteSink sink(&asciiBCP47Tag);
-        ulocimp_toLanguageTag(mylocaleID, sink, false, status);
-    }
+    icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);
 
     if (U_SUCCESS(*status))
     {
@@ -1253,6 +1246,14 @@ uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
 U_CAPI uint32_t
 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
 {
+    if (U_FAILURE(*status) ||
+            langID == nullptr ||
+            posixID == nullptr ||
+            uprv_strlen(langID) < 2 ||
+            uprv_strlen(posixID) < 2) {
+        return locmap_root->hostID;
+    }
+
     // This function does the table lookup when native platform name->lcid conversion isn't available,
     // or for locales that don't follow patterns the platform expects.
     uint32_t   low    = 0;
@@ -1266,11 +1267,6 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
     UErrorCode myStatus;
     uint32_t   idx;
 
-    /* Check for incomplete id. */
-    if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
-        return 0;
-    }
-
     /*Binary search for the map entry for normal cases */
 
     while (high > low)  /*binary search*/{
@@ -1288,7 +1284,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
             low = mid;
         }
         else /*we found it*/{
-            return getHostID(&gPosixIDmap[mid], posixID, status);
+            return getHostID(&gPosixIDmap[mid], posixID, *status);
         }
         oldmid = mid;
     }
@@ -1299,7 +1295,7 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
      */
     for (idx = 0; idx < gLocaleCount; idx++ ) {
         myStatus = U_ZERO_ERROR;
-        value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
+        value = getHostID(&gPosixIDmap[idx], posixID, myStatus);
         if (myStatus == U_ZERO_ERROR) {
             return value;
         }
@@ -1315,5 +1311,5 @@ uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
 
     /* no match found */
     *status = U_ILLEGAL_ARGUMENT_ERROR;
-    return 0;   /* return international (root) */
+    return locmap_root->hostID;   /* return international (root) */
 }

+ 51 - 49
thirdparty/icu4c/common/locresdata.cpp

@@ -24,7 +24,6 @@
 #include "unicode/putil.h"
 #include "unicode/uloc.h"
 #include "unicode/ures.h"
-#include "bytesinkutil.h"
 #include "charstr.h"
 #include "cstring.h"
 #include "ulocimp.h"
@@ -50,10 +49,10 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
                               int32_t *pLength,
                               UErrorCode *pErrorCode)
 {
+    if (U_FAILURE(*pErrorCode)) { return nullptr; }
 /*    char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/
     const char16_t *item=nullptr;
     UErrorCode errorCode;
-    char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0};
 
     /*
      * open the bundle for the current locale
@@ -128,15 +127,16 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
                *pErrorCode = errorCode;
                 break;
             }
-            
-            u_UCharsToChars(fallbackLocale, explicitFallbackName, len);
-            
+
+            icu::CharString explicitFallbackName;
+            explicitFallbackName.appendInvariantChars(fallbackLocale, len, errorCode);
+
             /* guard against recursive fallback */
-            if(uprv_strcmp(explicitFallbackName, locale)==0){
+            if (explicitFallbackName == locale) {
                 *pErrorCode = U_INTERNAL_PROGRAM_ERROR;
                 break;
             }
-            rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode));
+            rb.adoptInstead(ures_open(path, explicitFallbackName.data(), &errorCode));
             if(U_FAILURE(errorCode)){
                 *pErrorCode = errorCode;
                 break;
@@ -150,63 +150,65 @@ uloc_getTableStringWithFallback(const char *path, const char *locale,
     return item;
 }
 
-static ULayoutType
+namespace {
+
+ULayoutType
 _uloc_getOrientationHelper(const char* localeId,
                            const char* key,
-                           UErrorCode *status)
+                           UErrorCode& status)
 {
     ULayoutType result = ULOC_LAYOUT_UNKNOWN;
 
-    if (!U_FAILURE(*status)) {
-        icu::CharString localeBuffer;
-        {
-            icu::CharStringByteSink sink(&localeBuffer);
-            ulocimp_canonicalize(localeId, sink, status);
-        }
+    if (U_FAILURE(status)) { return result; }
 
-        if (!U_FAILURE(*status)) {
-            int32_t length = 0;
-            const char16_t* const value =
-                uloc_getTableStringWithFallback(
-                    nullptr,
-                    localeBuffer.data(),
-                    "layout",
-                    nullptr,
-                    key,
-                    &length,
-                    status);
-
-            if (!U_FAILURE(*status) && length != 0) {
-                switch(value[0])
-                {
-                case 0x0062: /* 'b' */
-                    result = ULOC_LAYOUT_BTT;
-                    break;
-                case 0x006C: /* 'l' */
-                    result = ULOC_LAYOUT_LTR;
-                    break;
-                case 0x0072: /* 'r' */
-                    result = ULOC_LAYOUT_RTL;
-                    break;
-                case 0x0074: /* 't' */
-                    result = ULOC_LAYOUT_TTB;
-                    break;
-                default:
-                    *status = U_INTERNAL_PROGRAM_ERROR;
-                    break;
-                }
-            }
+    icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status);
+
+    if (U_FAILURE(status)) { return result; }
+
+    int32_t length = 0;
+    const char16_t* const value =
+        uloc_getTableStringWithFallback(
+            nullptr,
+            localeBuffer.data(),
+            "layout",
+            nullptr,
+            key,
+            &length,
+            &status);
+
+    if (U_FAILURE(status)) { return result; }
+
+    if (length != 0) {
+        switch(value[0])
+        {
+        case 0x0062: /* 'b' */
+            result = ULOC_LAYOUT_BTT;
+            break;
+        case 0x006C: /* 'l' */
+            result = ULOC_LAYOUT_LTR;
+            break;
+        case 0x0072: /* 'r' */
+            result = ULOC_LAYOUT_RTL;
+            break;
+        case 0x0074: /* 't' */
+            result = ULOC_LAYOUT_TTB;
+            break;
+        default:
+            status = U_INTERNAL_PROGRAM_ERROR;
+            break;
         }
     }
 
     return result;
 }
 
+}  // namespace
+
 U_CAPI ULayoutType U_EXPORT2
 uloc_getCharacterOrientation(const char* localeId,
                              UErrorCode *status)
 {
-    return _uloc_getOrientationHelper(localeId, "characters", status);
+    return _uloc_getOrientationHelper(localeId, "characters", *status);
 }
 
 /**
@@ -220,5 +222,5 @@ U_CAPI ULayoutType U_EXPORT2
 uloc_getLineOrientation(const char* localeId,
                         UErrorCode *status)
 {
-    return _uloc_getOrientationHelper(localeId, "lines", status);
+    return _uloc_getOrientationHelper(localeId, "lines", *status);
 }

+ 13 - 12
thirdparty/icu4c/common/locutil.cpp

@@ -145,9 +145,7 @@ LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& res
 Locale&
 LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
 {
-    enum { BUFLEN = 128 }; // larger than ever needed
-
-    if (id.isBogus() || id.length() >= BUFLEN) {
+    if (id.isBogus()) {
         result.setToBogus();
     } else {
         /*
@@ -168,24 +166,29 @@ LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
          *
          * There should be only at most one '@' in a locale ID.
          */
-        char buffer[BUFLEN];
+        CharString buffer;
         int32_t prev, i;
         prev = 0;
-        for(;;) {
+        UErrorCode status = U_ZERO_ERROR;
+        do {
             i = id.indexOf((char16_t)0x40, prev);
             if(i < 0) {
                 // no @ between prev and the rest of the string
-                id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
+                buffer.appendInvariantChars(id.tempSubString(prev), status);
                 break; // done
             } else {
                 // normal invariant-character conversion for text between @s
-                id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
+                buffer.appendInvariantChars(id.tempSubString(prev, i - prev), status);
                 // manually "convert" U+0040 at id[i] into '@' at buffer[i]
-                buffer[i] = '@';
+                buffer.append('@', status);
                 prev = i + 1;
             }
+        } while (U_SUCCESS(status));
+        if (U_FAILURE(status)) {
+            result.setToBogus();
+        } else {
+            result = Locale::createFromName(buffer.data());
         }
-        result = Locale::createFromName(buffer);
     }
     return result;
 }
@@ -259,7 +262,7 @@ LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
     return htp;
 }
 
-UBool
+bool
 LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
 {
     return child.indexOf(root) == 0 &&
@@ -271,5 +274,3 @@ U_NAMESPACE_END
 
 /* !UCONFIG_NO_SERVICE */
 #endif
-
-

+ 1 - 1
thirdparty/icu4c/common/locutil.h

@@ -28,7 +28,7 @@ public:
   static Locale& initLocaleFromName(const UnicodeString& id, Locale& result);
   static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result);
   static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID);
-  static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
+  static bool isFallbackOf(const UnicodeString& root, const UnicodeString& child);
 };
 
 U_NAMESPACE_END

+ 0 - 1
thirdparty/icu4c/common/messagepattern.cpp

@@ -999,7 +999,6 @@ MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
     }
     setParseError(parseError, start /*, limit*/);  // Bad syntax for numeric value.
     errorCode=U_PATTERN_SYNTAX_ERROR;
-    return;
 }
 
 int32_t

+ 8 - 2
thirdparty/icu4c/common/normalizer2impl.cpp

@@ -1390,8 +1390,11 @@ Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
     } else if(norm16<minYesNoMappingsOnly) {
         // a combines forward.
         if(isJamoL(norm16)) {
+            if (b < Hangul::JAMO_V_BASE) {
+                return U_SENTINEL;
+            }
             b-=Hangul::JAMO_V_BASE;
-            if(0<=b && b<Hangul::JAMO_V_COUNT) {
+            if(b<Hangul::JAMO_V_COUNT) {
                 return
                     (Hangul::HANGUL_BASE+
                      ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)*
@@ -1400,8 +1403,11 @@ Normalizer2Impl::composePair(UChar32 a, UChar32 b) const {
                 return U_SENTINEL;
             }
         } else if(isHangulLV(norm16)) {
+            if (b <= Hangul::JAMO_T_BASE) {
+               return U_SENTINEL;
+            }
             b-=Hangul::JAMO_T_BASE;
-            if(0<b && b<Hangul::JAMO_T_COUNT) {  // not b==0!
+            if(b<Hangul::JAMO_T_COUNT) {  // not b==0!
                 return a+b;
             } else {
                 return U_SENTINEL;

+ 4 - 4
thirdparty/icu4c/common/normalizer2impl.h

@@ -141,12 +141,12 @@ public:
     /** Constructs only; init() should be called. */
     ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
         impl(ni), str(dest),
-        start(NULL), reorderStart(NULL), limit(NULL),
+        start(nullptr), reorderStart(nullptr), limit(nullptr),
         remainingCapacity(0), lastCC(0) {}
     /** Constructs, removes the string contents, and initializes for a small initial capacity. */
     ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode);
     ~ReorderingBuffer() {
-        if(start!=NULL) {
+        if (start != nullptr) {
             str.releaseBuffer((int32_t)(limit-start));
         }
     }
@@ -245,7 +245,7 @@ private:
  */
 class U_COMMON_API Normalizer2Impl : public UObject {
 public:
-    Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { }
+    Normalizer2Impl() : normTrie(nullptr), fCanonIterData(nullptr) {}
     virtual ~Normalizer2Impl();
 
     void init(const int32_t *inIndexes, const UCPTrie *inTrie,
@@ -623,7 +623,7 @@ private:
     const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); }
     const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const {
         if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) {
-            return NULL;
+            return nullptr;
         } else if(norm16<minMaybeYes) {
             return getMapping(norm16);  // for yesYes; if Jamo L: harmless empty list
         } else {

File diff suppressed because it is too large
+ 844 - 840
thirdparty/icu4c/common/propname_data.h


+ 5 - 6
thirdparty/icu4c/common/putil.cpp

@@ -1498,7 +1498,6 @@ static void U_CALLCONV dataDirectoryInitFn() {
     }
 
     u_setDataDirectory(path);
-    return;
 }
 
 U_CAPI const char * U_EXPORT2
@@ -1622,7 +1621,7 @@ static const char *uprv_getPOSIXIDForCategory(int category)
         * of nullptr, will modify the libc behavior.
         */
         posixID = setlocale(category, nullptr);
-        if ((posixID == 0)
+        if ((posixID == nullptr)
             || (uprv_strcmp("C", posixID) == 0)
             || (uprv_strcmp("POSIX", posixID) == 0))
         {
@@ -1636,16 +1635,16 @@ static const char *uprv_getPOSIXIDForCategory(int category)
                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
                 if ((posixID == 0) || (posixID[0] == '\0')) {
 #else
-            if (posixID == 0) {
+            if (posixID == nullptr) {
                 posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
-                if (posixID == 0) {
+                if (posixID == nullptr) {
 #endif
                     posixID = getenv("LANG");
                 }
             }
         }
     }
-    if ((posixID==0)
+    if ((posixID == nullptr)
         || (uprv_strcmp("C", posixID) == 0)
         || (uprv_strcmp("POSIX", posixID) == 0))
     {
@@ -1665,7 +1664,7 @@ static const char *uprv_getPOSIXIDForCategory(int category)
 static const char *uprv_getPOSIXIDForDefaultLocale()
 {
     static const char* posixID = nullptr;
-    if (posixID == 0) {
+    if (posixID == nullptr) {
         posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
     }
     return posixID;

+ 2 - 2
thirdparty/icu4c/common/rbbi.cpp

@@ -1212,7 +1212,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
         fLanguageBreakEngines = new UStack(status);
         if (fLanguageBreakEngines == nullptr || U_FAILURE(status)) {
             delete fLanguageBreakEngines;
-            fLanguageBreakEngines = 0;
+            fLanguageBreakEngines = nullptr;
             return nullptr;
         }
     }
@@ -1252,7 +1252,7 @@ RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c, const char* locale) {
         U_ASSERT(!fLanguageBreakEngines->hasDeleter());
         if (U_FAILURE(status)) {
             delete fUnhandledBreakEngine;
-            fUnhandledBreakEngine = 0;
+            fUnhandledBreakEngine = nullptr;
             return nullptr;
         }
     }

+ 0 - 4
thirdparty/icu4c/common/rbbi_cache.cpp

@@ -246,7 +246,6 @@ void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode
         fBI->fDone = false;
         next();
     }
-    return;
 }
 
 
@@ -265,7 +264,6 @@ void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode
             current();
         }
     }
-    return;
 }
 
 
@@ -277,7 +275,6 @@ void RuleBasedBreakIterator::BreakCache::nextOL() {
     fBI->fDone = !populateFollowing();
     fBI->fPosition = fTextIdx;
     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
-    return;
 }
 
 
@@ -297,7 +294,6 @@ void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
     fBI->fDone = (fBufIdx == initialBufIdx);
     fBI->fPosition = fTextIdx;
     fBI->fRuleStatusIndex = fStatuses[fBufIdx];
-    return;
 }
 
 

+ 64 - 7
thirdparty/icu4c/common/rbbinode.cpp

@@ -123,19 +123,66 @@ RBBINode::~RBBINode() {
         break;
 
     default:
-        delete        fLeftChild;
+        // Avoid using a recursive implementation because of stack overflow problems.
+        // See bug ICU-22584.
+        // delete        fLeftChild;
+        NRDeleteNode(fLeftChild);
         fLeftChild =   nullptr;
-        delete        fRightChild;
+        // delete        fRightChild;
+        NRDeleteNode(fRightChild);
         fRightChild = nullptr;
     }
 
-
     delete fFirstPosSet;
     delete fLastPosSet;
     delete fFollowPos;
-
 }
 
+/**
+ * Non-recursive delete of a node + its children. Used from the node destructor
+ * instead of the more obvious recursive implementation to avoid problems with
+ * stack overflow with some perverse test rule data (from fuzzing).
+ */
+void RBBINode::NRDeleteNode(RBBINode *node) {
+    if (node == nullptr) {
+        return;
+    }
+
+    RBBINode *stopNode = node->fParent;
+    RBBINode *nextNode = node;
+    while (nextNode != stopNode && nextNode != nullptr) {
+        RBBINode *currentNode = nextNode;
+
+        if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
+                currentNode->fType == varRef ||      // varRef and setRef nodes do not
+                currentNode->fType == setRef) {      // own their children nodes.
+            // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
+            nextNode = currentNode->fParent;
+            if (nextNode) {
+                if (nextNode->fLeftChild == currentNode) {
+                    nextNode->fLeftChild = nullptr;
+                } else if (nextNode->fRightChild == currentNode) {
+                    nextNode->fRightChild = nullptr;
+                }
+            }
+            delete currentNode;
+        } else if (currentNode->fLeftChild) {
+            nextNode = currentNode->fLeftChild;
+            if (nextNode->fParent == nullptr) {
+                nextNode->fParent = currentNode;
+                // fParent isn't always set; do it now if not.
+            }
+            U_ASSERT(nextNode->fParent == currentNode);
+        } else if (currentNode->fRightChild) {
+            nextNode = currentNode->fRightChild;
+            if (nextNode->fParent == nullptr) {
+                nextNode->fParent = currentNode;
+                // fParent isn't always set; do it now if not.
+            }
+            U_ASSERT(nextNode->fParent == currentNode);
+        }
+    }
+}
 
 //-------------------------------------------------------------------------
 //
@@ -192,7 +239,17 @@ RBBINode *RBBINode::cloneTree() {
 //                      nested references are handled by cloneTree(), not here.
 //
 //-------------------------------------------------------------------------
-RBBINode *RBBINode::flattenVariables() {
+constexpr int kRecursiveDepthLimit = 3500;
+RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
+    if (U_FAILURE(status)) {
+        return this;
+    }
+    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
+    // to avoid stack overflow crash.
+    if (depth > kRecursiveDepthLimit) {
+        status = U_INPUT_TOO_LONG_ERROR;
+        return this;
+    }
     if (fType == varRef) {
         RBBINode *retNode  = fLeftChild->cloneTree();
         if (retNode != nullptr) {
@@ -204,11 +261,11 @@ RBBINode *RBBINode::flattenVariables() {
     }
 
     if (fLeftChild != nullptr) {
-        fLeftChild = fLeftChild->flattenVariables();
+        fLeftChild = fLeftChild->flattenVariables(status, depth+1);
         fLeftChild->fParent  = this;
     }
     if (fRightChild != nullptr) {
-        fRightChild = fRightChild->flattenVariables();
+        fRightChild = fRightChild->flattenVariables(status, depth+1);
         fRightChild->fParent = this;
     }
     return this;

+ 2 - 1
thirdparty/icu4c/common/rbbinode.h

@@ -94,9 +94,10 @@ class RBBINode : public UMemory {
         RBBINode(NodeType t);
         RBBINode(const RBBINode &other);
         ~RBBINode();
+        static void  NRDeleteNode(RBBINode *node);
         
         RBBINode    *cloneTree();
-        RBBINode    *flattenVariables();
+        RBBINode    *flattenVariables(UErrorCode &status, int depth=0);
         void         flattenSets();
         void         findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status);
 

+ 3 - 2
thirdparty/icu4c/common/rbbirb.cpp

@@ -86,7 +86,8 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString   &rules,
     if (U_FAILURE(status)) {
         return;
     }
-    if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
+    if (fSetBuilder == nullptr || fScanner == nullptr ||
+        fUSetNodes == nullptr || fRuleStatusVals == nullptr) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
 }
@@ -156,7 +157,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
     int32_t statusTableSize   = align8(fRuleStatusVals->size() * sizeof(int32_t));
 
     int32_t rulesLengthInUTF8 = 0;
-    u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8,
+    u_strToUTF8WithSub(nullptr, 0, &rulesLengthInUTF8,
                        fStrippedRules.getBuffer(), fStrippedRules.length(),
                        0xfffd, nullptr, fStatus);
     *fStatus = U_ZERO_ERROR;

+ 32 - 15
thirdparty/icu4c/common/rbbiscan.cpp

@@ -289,6 +289,9 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
 
             // Terminate expression, leaves expression parse tree rooted in TOS node.
             fixOpStack(RBBINode::precStart);
+            if (U_FAILURE(*fRB->fStatus)) {
+                break;
+            }
 
             RBBINode *startExprNode  = fNodeStack[fNodeStackPtr-2];
             RBBINode *varRefNode     = fNodeStack[fNodeStackPtr-1];
@@ -312,6 +315,11 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
                 UErrorCode t = *fRB->fStatus;
                 *fRB->fStatus = U_ZERO_ERROR;
                 error(t);
+                // When adding $variableRef to the symbol table fail, Delete
+                // both nodes because deleting varRefNode will not delete
+                // RHSExprNode internally.
+                delete RHSExprNode;
+                delete varRefNode;
             }
 
             // Clean up the stack.
@@ -522,7 +530,13 @@ UBool RBBIRuleScanner::doParseActions(int32_t action)
             n = fNodeStack[fNodeStackPtr];
             uint32_t v = u_charDigitValue(fC.fChar);
             U_ASSERT(v < 10);
-            n->fVal = n->fVal*10 + v;
+            int64_t updated = static_cast<int64_t>(n->fVal)*10 + v;
+            // Avoid overflow n->fVal
+            if (updated > INT32_MAX) {
+                error(U_BRK_RULE_SYNTAX);
+                break;
+            }
+            n->fVal = static_cast<int32_t>(updated);
             break;
         }
 
@@ -762,6 +776,7 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
     RBBINode *usetNode    = new RBBINode(RBBINode::uset);
     if (usetNode == nullptr) {
         error(U_MEMORY_ALLOCATION_ERROR);
+        delete setToAdopt;
         return;
     }
     usetNode->fInputSet   = setToAdopt;
@@ -796,8 +811,6 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode
     el->key = tkey;
     el->val = usetNode;
     uhash_put(fSetTable, el->key, el, fRB->fStatus);
-
-    return;
 }
 
 
@@ -926,6 +939,9 @@ void RBBIRuleScanner::nextChar(RBBIRuleChar &c) {
         }
     }
 
+    if (c.fChar == (UChar32)-1) {
+        return;
+    }
     if (fQuoteMode) {
         c.fEscaped = true;
     }
@@ -1199,7 +1215,6 @@ RBBINode  *RBBIRuleScanner::pushNewNode(RBBINode::NodeType  t) {
 //
 //------------------------------------------------------------------------------
 void RBBIRuleScanner::scanSet() {
-    UnicodeSet    *uset;
     ParsePosition  pos;
     int            startPos;
     int            i;
@@ -1211,12 +1226,12 @@ void RBBIRuleScanner::scanSet() {
     pos.setIndex(fScanIndex);
     startPos = fScanIndex;
     UErrorCode localStatus = U_ZERO_ERROR;
-    uset = new UnicodeSet();
-    if (uset == nullptr) {
-        localStatus = U_MEMORY_ALLOCATION_ERROR;
-    } else {
-        uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
+    LocalPointer<UnicodeSet> uset(new UnicodeSet(), localStatus);
+    if (U_FAILURE(localStatus)) {
+        error(localStatus);
+        return;
     }
+    uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus);
     if (U_FAILURE(localStatus)) {
         //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
         //         UnicodeSet appears to not be reporting correctly at this time.
@@ -1224,20 +1239,22 @@ void RBBIRuleScanner::scanSet() {
             RBBIDebugPrintf("UnicodeSet parse position.ErrorIndex = %d\n", pos.getIndex());
         #endif
         error(localStatus);
-        delete uset;
         return;
     }
 
     // Verify that the set contains at least one code point.
     //
-    U_ASSERT(uset!=nullptr);
-    if (uset->isEmpty()) {
+    U_ASSERT(uset.isValid());
+    UnicodeSet tempSet(*uset);
+    // Use tempSet to handle the case that the UnicodeSet contains
+    // only string element, such as [{ab}] and treat it as empty set.
+    tempSet.removeAllStrings();
+    if (tempSet.isEmpty()) {
         // This set is empty.
         //  Make it an error, because it almost certainly is not what the user wanted.
         //  Also, avoids having to think about corner cases in the tree manipulation code
         //   that occurs later on.
         error(U_BRK_RULE_EMPTY_SET);
-        delete uset;
         return;
     }
 
@@ -1246,7 +1263,7 @@ void RBBIRuleScanner::scanSet() {
     //   Don't just set fScanIndex because the line/char positions maintained
     //   for error reporting would be thrown off.
     i = pos.getIndex();
-    for (;;) {
+    for (;U_SUCCESS(*fRB->fStatus);) {
         if (fNextIndex >= i) {
             break;
         }
@@ -1269,7 +1286,7 @@ void RBBIRuleScanner::scanSet() {
         //          character categories for run time engine.
         //     - Eliminates mulitiple instances of the same set.
         //     - Creates a new uset node if necessary (if this isn't a duplicate.)
-        findSetFor(n->fText, n, uset);
+        findSetFor(n->fText, n, uset.orphan());
     }
 
 }

+ 1 - 1
thirdparty/icu4c/common/rbbistbl.cpp

@@ -122,7 +122,7 @@ const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const
     RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const
     if (ch == 0xffff) {
         retVal = fCachedSetLookup;
-        This->fCachedSetLookup = 0;
+        This->fCachedSetLookup = nullptr;
     }
     return retVal;
 }

+ 4 - 1
thirdparty/icu4c/common/rbbitblb.cpp

@@ -81,7 +81,10 @@ void  RBBITableBuilder::buildForwardTable() {
     // Walk through the tree, replacing any references to $variables with a copy of the
     //   parse tree for the substitution expression.
     //
-    fTree = fTree->flattenVariables();
+    fTree = fTree->flattenVariables(*fStatus, 0);
+    if (U_FAILURE(*fStatus)) {
+        return;
+    }
 #ifdef RBBI_DEBUG
     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
         RBBIDebugPuts("\nParse tree after flattening variable references.");

+ 8 - 10
thirdparty/icu4c/common/resbund.cpp

@@ -179,7 +179,7 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle)
 ResourceBundle::ResourceBundle(UErrorCode &err)
                                 :UObject(), fLocale(nullptr)
 {
-    fResource = ures_open(0, Locale::getDefault().getName(), &err);
+    fResource = ures_open(nullptr, Locale::getDefault().getName(), &err);
 }
 
 ResourceBundle::ResourceBundle(const ResourceBundle &other)
@@ -188,7 +188,7 @@ ResourceBundle::ResourceBundle(const ResourceBundle &other)
     UErrorCode status = U_ZERO_ERROR;
 
     if (other.fResource) {
-        fResource = ures_copyResb(0, other.fResource, &status);
+        fResource = ures_copyResb(nullptr, other.fResource, &status);
     } else {
         /* Copying a bad resource bundle */
         fResource = nullptr;
@@ -199,7 +199,7 @@ ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err)
                                :UObject(), fLocale(nullptr)
 {
     if (res) {
-        fResource = ures_copyResb(0, res, &err);
+        fResource = ures_copyResb(nullptr, res, &err);
     } else {
         /* Copying a bad resource bundle */
         fResource = nullptr;
@@ -218,7 +218,7 @@ ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
     if(this == &other) {
         return *this;
     }
-    if(fResource != 0) {
+    if (fResource != nullptr) {
         ures_close(fResource);
         fResource = nullptr;
     }
@@ -228,7 +228,7 @@ ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
     }
     UErrorCode status = U_ZERO_ERROR;
     if (other.fResource) {
-        fResource = ures_copyResb(0, other.fResource, &status);
+        fResource = ures_copyResb(nullptr, other.fResource, &status);
     } else {
         /* Copying a bad resource bundle */
         fResource = nullptr;
@@ -238,12 +238,10 @@ ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other)
 
 ResourceBundle::~ResourceBundle()
 {
-    if(fResource != 0) {
+    if (fResource != nullptr) {
         ures_close(fResource);
     }
-    if(fLocale != nullptr) {
-      delete(fLocale);
-    }
+    delete fLocale;
 }
 
 ResourceBundle *
@@ -311,7 +309,7 @@ ResourceBundle ResourceBundle::getNext(UErrorCode& status) {
 
 UnicodeString ResourceBundle::getNextString(UErrorCode& status) {
     int32_t len = 0;
-    const char16_t* r = ures_getNextString(fResource, &len, 0, &status);
+    const char16_t* r = ures_getNextString(fResource, &len, nullptr, &status);
     return UnicodeString(true, r, len);
 }
 

+ 10 - 10
thirdparty/icu4c/common/ruleiter.cpp

@@ -27,12 +27,12 @@ RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const
     text(theText),
     pos(thePos),
     sym(theSym),
-    buf(0),
+    buf(nullptr),
     bufPos(0)
 {}
 
 UBool RuleCharacterIterator::atEnd() const {
-    return buf == 0 && pos.getIndex() == text.length();
+    return buf == nullptr && pos.getIndex() == text.length();
 }
 
 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
@@ -45,8 +45,8 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
         c = _current();
         _advance(U16_LENGTH(c));
 
-        if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
-            (options & PARSE_VARIABLES) != 0 && sym != 0) {
+        if (c == SymbolTable::SYMBOL_REF && buf == nullptr &&
+            (options & PARSE_VARIABLES) != 0 && sym != nullptr) {
             UnicodeString name = sym->parseReference(text, pos, text.length());
             // If name is empty there was an isolated SYMBOL_REF;
             // return it.  Caller must be prepared for this.
@@ -55,13 +55,13 @@ UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCod
             }
             bufPos = 0;
             buf = sym->lookup(name);
-            if (buf == 0) {
+            if (buf == nullptr) {
                 ec = U_UNDEFINED_VARIABLE;
                 return DONE;
             }
             // Handle empty variable value
             if (buf->length() == 0) {
-                buf = 0;
+                buf = nullptr;
             }
             continue;
         }
@@ -114,7 +114,7 @@ UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t m
     if (maxLookAhead < 0) {
         maxLookAhead = 0x7FFFFFFF;
     }
-    if (buf != 0) {
+    if (buf != nullptr) {
         buf->extract(bufPos, maxLookAhead, result);
     } else {
         text.extract(pos.getIndex(), maxLookAhead, result);
@@ -135,7 +135,7 @@ UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
 */
 
 UChar32 RuleCharacterIterator::_current() const {
-    if (buf != 0) {
+    if (buf != nullptr) {
         return buf->char32At(bufPos);
     } else {
         int i = pos.getIndex();
@@ -144,10 +144,10 @@ UChar32 RuleCharacterIterator::_current() const {
 }
 
 void RuleCharacterIterator::_advance(int32_t count) {
-    if (buf != 0) {
+    if (buf != nullptr) {
         bufPos += count;
         if (bufPos == buf->length()) {
-            buf = 0;
+            buf = nullptr;
         }
     } else {
         pos.setIndex(pos.getIndex() + count);

+ 1 - 1
thirdparty/icu4c/common/ruleiter.h

@@ -224,7 +224,7 @@ private:
 };
 
 inline UBool RuleCharacterIterator::inVariable() const {
-    return buf != 0;
+    return buf != nullptr;
 }
 
 U_NAMESPACE_END

+ 0 - 1
thirdparty/icu4c/common/ubidiln.cpp

@@ -244,7 +244,6 @@ ubidi_setLine(const UBiDi *pParaBiDi,
         }
     }
     pLineBiDi->pParaBiDi=pParaBiDi;     /* mark successful setLine */
-    return;
 }
 
 U_CAPI UBiDiLevel U_EXPORT2

+ 8 - 8
thirdparty/icu4c/common/ubrk.cpp

@@ -38,9 +38,9 @@ ubrk_open(UBreakIteratorType type,
       UErrorCode *status)
 {
 
-  if(U_FAILURE(*status)) return 0;
+  if (U_FAILURE(*status)) return nullptr;
 
-  BreakIterator *result = 0;
+  BreakIterator *result = nullptr;
 
   switch(type) {
 
@@ -70,11 +70,11 @@ ubrk_open(UBreakIteratorType type,
 
   // check for allocation error
   if (U_FAILURE(*status)) {
-     return 0;
+    return nullptr;
   }
-  if(result == 0) {
+  if (result == nullptr) {
     *status = U_MEMORY_ALLOCATION_ERROR;
-    return 0;
+    return nullptr;
   }
 
 
@@ -102,14 +102,14 @@ ubrk_openRules(  const char16_t     *rules,
                        UErrorCode   *status)  {
 
     if (status == nullptr || U_FAILURE(*status)){
-        return 0;
+        return nullptr;
     }
 
-    BreakIterator *result = 0;
+    BreakIterator *result = nullptr;
     UnicodeString ruleString(rules, rulesLength);
     result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
     if(U_FAILURE(*status)) {
-        return 0;
+        return nullptr;
     }
 
     UBreakIterator *uBI = (UBreakIterator *)result;

+ 1 - 1
thirdparty/icu4c/common/ucase.h

@@ -357,7 +357,7 @@ enum {
 /* definitions for 16-bit case properties word ------------------------------ */
 
 U_CFUNC const UTrie2 * U_EXPORT2
-ucase_getTrie();
+ucase_getTrie(void);
 
 /* 2-bit constants for types of cased characters */
 #define UCASE_TYPE_MASK     3

+ 10 - 12
thirdparty/icu4c/common/ucasemap.cpp

@@ -41,7 +41,6 @@
 #include "uassert.h"
 #include "ucase.h"
 #include "ucasemap_imp.h"
-#include "ustr_imp.h"
 
 U_NAMESPACE_USE
 
@@ -917,21 +916,20 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
         return 0;
     }
 
-    CheckedArrayByteSink sink(dest, destCapacity);
     if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
         edits->reset();
     }
-    stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
-                     (const uint8_t *)src, srcLength, sink, edits, errorCode);
-    sink.Flush();
-    if (U_SUCCESS(errorCode)) {
-        if (sink.Overflowed()) {
-            errorCode = U_BUFFER_OVERFLOW_ERROR;
-        } else if (edits != nullptr) {
-            edits->copyErrorTo(errorCode);
-        }
+    int32_t reslen = ByteSinkUtil::viaByteSinkToTerminatedChars(
+        dest, destCapacity,
+        [&](ByteSink& sink, UErrorCode& status) {
+            stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
+                             (const uint8_t *)src, srcLength, sink, edits, status);
+        },
+        errorCode);
+    if (U_SUCCESS(errorCode) && edits != nullptr) {
+        edits->copyErrorTo(errorCode);
     }
-    return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode);
+    return reslen;
 }
 
 /* public API functions */

File diff suppressed because it is too large
+ 1939 - 1938
thirdparty/icu4c/common/uchar_props_data.h


+ 6 - 5
thirdparty/icu4c/common/uchriter.cpp

@@ -20,14 +20,14 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator)
 
 UCharCharacterIterator::UCharCharacterIterator()
   : CharacterIterator(),
-  text(0)
+  text(nullptr)
 {
     // never default construct!
 }
 
 UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
                                                int32_t length)
-  : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0),
+  : CharacterIterator(textPtr != nullptr ? (length >= 0 ? length : u_strlen(textPtr)) : 0),
   text(textPtr)
 {
 }
@@ -35,7 +35,7 @@ UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
 UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
                                                int32_t length,
                                                int32_t position)
-  : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position),
+  : CharacterIterator(textPtr != nullptr ? (length >= 0 ? length : u_strlen(textPtr)) : 0, position),
   text(textPtr)
 {
 }
@@ -45,7 +45,8 @@ UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr,
                                                int32_t textBegin,
                                                int32_t textEnd,
                                                int32_t position)
-  : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position),
+  : CharacterIterator(textPtr != nullptr ? (length >= 0 ? length : u_strlen(textPtr)) : 0,
+                      textBegin, textEnd, position),
   text(textPtr)
 {
 }
@@ -352,7 +353,7 @@ UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin)
 void UCharCharacterIterator::setText(ConstChar16Ptr newText,
                                      int32_t      newTextLength) {
     text = newText;
-    if(newText == 0 || newTextLength < 0) {
+    if (newText == nullptr || newTextLength < 0) {
         newTextLength = 0;
     }
     end = textLength = newTextLength;

+ 4 - 6
thirdparty/icu4c/common/ucnv.cpp

@@ -473,8 +473,6 @@ ucnv_setSubstChars (UConverter * converter,
     * we set subChar1 to 0.
     */
     converter->subChar1 = 0;
-    
-    return;
 }
 
 U_CAPI void U_EXPORT2
@@ -1754,7 +1752,7 @@ ucnv_fromUChars(UConverter *cnv,
         destLimit=dest+destCapacity;
 
         /* perform the conversion */
-        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, true, pErrorCode);
+        ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
         destLength=(int32_t)(dest-originalDest);
 
         /* if an overflow occurs, then get the preflighting length */
@@ -1765,7 +1763,7 @@ ucnv_fromUChars(UConverter *cnv,
             do {
                 dest=buffer;
                 *pErrorCode=U_ZERO_ERROR;
-                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, true, pErrorCode);
+                ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
                 destLength+=(int32_t)(dest-buffer);
             } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
         }
@@ -1810,7 +1808,7 @@ ucnv_toUChars(UConverter *cnv,
         destLimit=dest+destCapacity;
 
         /* perform the conversion */
-        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, true, pErrorCode);
+        ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
         destLength=(int32_t)(dest-originalDest);
 
         /* if an overflow occurs, then get the preflighting length */
@@ -1822,7 +1820,7 @@ ucnv_toUChars(UConverter *cnv,
             do {
                 dest=buffer;
                 *pErrorCode=U_ZERO_ERROR;
-                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, true, pErrorCode);
+                ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, pErrorCode);
                 destLength+=(int32_t)(dest-buffer);
             }
             while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);

+ 0 - 4
thirdparty/icu4c/common/ucnv_err.cpp

@@ -109,7 +109,6 @@ UCNV_FROM_U_CALLBACK_STOP (
         *err = U_ZERO_ERROR;
     }
     /* the caller must have set the error code accordingly */
-    return;
 }
 
 
@@ -125,7 +124,6 @@ UCNV_TO_U_CALLBACK_STOP (
 {
     /* the caller must have set the error code accordingly */
     (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
-    return;
 }
 
 U_CAPI void    U_EXPORT2
@@ -353,8 +351,6 @@ UCNV_FROM_U_CALLBACK_ESCAPE (
       *err = err2;
       return;
   }
-
-  return;
 }
 
 

+ 0 - 4
thirdparty/icu4c/common/ucnv_u7.cpp

@@ -455,7 +455,6 @@ unicodeMode:
     pArgs->source=(const char *)source;
     pArgs->target=target;
     pArgs->offsets=offsets;
-    return;
 }
 
 static void U_CALLCONV
@@ -731,7 +730,6 @@ unicodeMode:
     pArgs->source=source;
     pArgs->target=(char *)target;
     pArgs->offsets=offsets;
-    return;
 }
 
 static const char * U_CALLCONV
@@ -1156,7 +1154,6 @@ endloop:
     pArgs->source=(const char *)source;
     pArgs->target=target;
     pArgs->offsets=offsets;
-    return;
 }
 
 static void U_CALLCONV
@@ -1443,7 +1440,6 @@ unicodeMode:
     pArgs->source=source;
     pArgs->target=(char *)target;
     pArgs->offsets=offsets;
-    return;
 }
 U_CDECL_END
 

+ 0 - 2
thirdparty/icu4c/common/ucnvbocu.cpp

@@ -1165,7 +1165,6 @@ endloop:
     pArgs->source=(const char *)source;
     pArgs->target=target;
     pArgs->offsets=offsets;
-    return;
 }
 
 /*
@@ -1363,7 +1362,6 @@ endloop:
     /* write back the updated pointers */
     pArgs->source=(const char *)source;
     pArgs->target=target;
-    return;
 }
 
 /* miscellaneous ------------------------------------------------------------ */

+ 2 - 2
thirdparty/icu4c/common/ucnvisci.cpp

@@ -1537,12 +1537,12 @@ _ISCII_SafeClone(const UConverter *cnv,
     int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct);
 
     if (U_FAILURE(*status)) {
-        return 0;
+        return nullptr;
     }
 
     if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */
         *pBufferSize = bufferSizeNeeded;
-        return 0;
+        return nullptr;
     }
 
     localClone = (struct cloneISCIIStruct *)stackBuffer;

+ 2 - 4
thirdparty/icu4c/common/ucnvscsu.cpp

@@ -572,7 +572,6 @@ endloop:
     pArgs->source=(const char *)source;
     pArgs->target=target;
     pArgs->offsets=offsets;
-    return;
 }
 
 /*
@@ -864,7 +863,6 @@ endloop:
     /* write back the updated pointers */
     pArgs->source=(const char *)source;
     pArgs->target=target;
-    return;
 }
 U_CDECL_END
 /* SCSU-from-Unicode conversion functions ----------------------------------- */
@@ -1978,12 +1976,12 @@ _SCSUSafeClone(const UConverter *cnv,
     int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
 
     if (U_FAILURE(*status)){
-        return 0;
+        return nullptr;
     }
 
     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
         *pBufferSize = bufferSizeNeeded;
-        return 0;
+        return nullptr;
     }
 
     localClone = (struct cloneSCSUStruct *)stackBuffer;

+ 43 - 72
thirdparty/icu4c/common/ucurr.cpp

@@ -22,7 +22,6 @@
 #include "unicode/usetiter.h"
 #include "unicode/utf16.h"
 #include "ustr_imp.h"
-#include "bytesinkutil.h"
 #include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
@@ -296,7 +295,7 @@ myUCharsToChars(char* resultOfLen4, const char16_t* currency) {
 static const int32_t*
 _findMetaData(const char16_t* currency, UErrorCode& ec) {
 
-    if (currency == 0 || *currency == 0) {
+    if (currency == nullptr || *currency == 0) {
         if (U_SUCCESS(ec)) {
             ec = U_ILLEGAL_ARGUMENT_ERROR;
         }
@@ -349,10 +348,10 @@ _findMetaData(const char16_t* currency, UErrorCode& ec) {
 
 // -------------------------------------
 
-static void
-idForLocale(const char* locale, char* countryAndVariant, int capacity, UErrorCode* ec)
+static CharString
+idForLocale(const char* locale, UErrorCode* ec)
 {
-    ulocimp_getRegionForSupplementalData(locale, false, countryAndVariant, capacity, ec);
+    return ulocimp_getRegionForSupplementalData(locale, false, *ec);
 }
 
 // ------------------------------------------
@@ -371,7 +370,7 @@ U_CDECL_END
 struct CReg;
 
 static UMutex gCRegLock;
-static CReg* gCRegHead = 0;
+static CReg* gCRegHead = nullptr;
 
 struct CReg : public icu::UMemory {
     CReg *next;
@@ -379,7 +378,7 @@ struct CReg : public icu::UMemory {
     char  id[ULOC_FULLNAME_CAPACITY];
 
     CReg(const char16_t* _iso, const char* _id)
-        : next(0)
+        : next(nullptr)
     {
         int32_t len = (int32_t)uprv_strlen(_id);
         if (len > (int32_t)(sizeof(id)-1)) {
@@ -408,7 +407,7 @@ struct CReg : public icu::UMemory {
             }
             *status = U_MEMORY_ALLOCATION_ERROR;
         }
-        return 0;
+        return nullptr;
     }
 
     static UBool unreg(UCurrRegistryKey key) {
@@ -464,9 +463,8 @@ U_CAPI UCurrRegistryKey U_EXPORT2
 ucurr_register(const char16_t* isoCode, const char* locale, UErrorCode *status)
 {
     if (status && U_SUCCESS(*status)) {
-        char id[ULOC_FULLNAME_CAPACITY];
-        idForLocale(locale, id, sizeof(id), status);
-        return CReg::reg(isoCode, id, status);
+        CharString id = idForLocale(locale, status);
+        return CReg::reg(isoCode, id.data(), status);
     }
     return nullptr;
 }
@@ -524,11 +522,7 @@ ucurr_forLocale(const char* locale,
     }
 
     UErrorCode localStatus = U_ZERO_ERROR;
-    CharString currency;
-    {
-        CharStringByteSink sink(&currency);
-        ulocimp_getKeywordValue(locale, "currency", sink, &localStatus);
-    }
+    CharString currency = ulocimp_getKeywordValue(locale, "currency", localStatus);
     int32_t resLen = currency.length();
 
     if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency.data(), resLen)) {
@@ -540,14 +534,13 @@ ucurr_forLocale(const char* locale,
     }
 
     // get country or country_variant in `id'
-    char id[ULOC_FULLNAME_CAPACITY];
-    idForLocale(locale, id, UPRV_LENGTHOF(id), ec);
+    CharString id = idForLocale(locale, ec);
     if (U_FAILURE(*ec)) {
         return 0;
     }
 
 #if !UCONFIG_NO_SERVICE
-    const char16_t* result = CReg::get(id);
+    const char16_t* result = CReg::get(id.data());
     if (result) {
         if(buffCapacity > u_strlen(result)) {
             u_strcpy(buff, result);
@@ -557,13 +550,13 @@ ucurr_forLocale(const char* locale,
     }
 #endif
     // Remove variants, which is only needed for registration.
-    char *idDelim = uprv_strchr(id, VAR_DELIM);
+    char *idDelim = uprv_strchr(id.data(), VAR_DELIM);
     if (idDelim) {
-        idDelim[0] = 0;
+        id.truncate(idDelim - id.data());
     }
 
     const char16_t* s = nullptr;  // Currency code from data file.
-    if (id[0] == 0) {
+    if (id.isEmpty()) {
         // No point looking in the data for an empty string.
         // This is what we would get.
         localStatus = U_MISSING_RESOURCE_ERROR;
@@ -572,7 +565,7 @@ ucurr_forLocale(const char* locale,
         localStatus = U_ZERO_ERROR;
         UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus);
         UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
-        UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+        UResourceBundle *countryArray = ures_getByKey(rb, id.data(), cm, &localStatus);
         // https://unicode-org.atlassian.net/browse/ICU-21997
         // Prefer to use currencies that are legal tender.
         if (U_SUCCESS(localStatus)) {
@@ -602,13 +595,9 @@ ucurr_forLocale(const char* locale,
         ures_close(countryArray);
     }
 
-    if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) {
+    if ((U_FAILURE(localStatus)) && strchr(id.data(), '_') != nullptr) {
         // We don't know about it.  Check to see if we support the variant.
-        CharString parent;
-        {
-            CharStringByteSink sink(&parent);
-            ulocimp_getParent(locale, sink, ec);
-        }
+        CharString parent = ulocimp_getParent(locale, *ec);
         *ec = U_USING_FALLBACK_WARNING;
         // TODO: Loop over the parent rather than recursing and
         // looking again for a currency keyword.
@@ -647,10 +636,7 @@ static UBool fallback(CharString& loc) {
         loc.truncate(3);
         loc.append("001", status);
     } else {
-        CharString tmp;
-        CharStringByteSink sink(&tmp);
-        ulocimp_getParent(loc.data(), sink, &status);
-        loc = std::move(tmp);
+        loc = ulocimp_getParent(loc.data(), status);
     }
  /*
     char *i = uprv_strrchr(loc, '_');
@@ -683,13 +669,13 @@ ucurr_getName(const char16_t* currency,
     //|}
 
     if (U_FAILURE(*ec)) {
-        return 0;
+        return nullptr;
     }
 
     int32_t choice = (int32_t) nameStyle;
     if (choice < 0 || choice > 4) {
         *ec = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+        return nullptr;
     }
 
     // In the future, resource bundles may implement multi-level
@@ -705,14 +691,10 @@ ucurr_getName(const char16_t* currency,
     // this function.
     UErrorCode ec2 = U_ZERO_ERROR;
 
-    CharString loc;
-    {
-        CharStringByteSink sink(&loc);
-        ulocimp_getName(locale, sink, &ec2);
-    }
+    CharString loc = ulocimp_getName(locale, ec2);
     if (U_FAILURE(ec2)) {
         *ec = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+        return nullptr;
     }
 
     char buf[ISO_CURRENCY_CODE_LENGTH+1];
@@ -739,7 +721,7 @@ ucurr_getName(const char16_t* currency,
             break;
         default:
             *ec = U_UNSUPPORTED_ERROR;
-            return 0;
+            return nullptr;
         }
         key.append("/", ec2);
         key.append(buf, ec2);
@@ -800,21 +782,17 @@ ucurr_getPluralName(const char16_t* currency,
     //|}
 
     if (U_FAILURE(*ec)) {
-        return 0;
+        return nullptr;
     }
 
     // Use a separate UErrorCode here that does not propagate out of
     // this function.
     UErrorCode ec2 = U_ZERO_ERROR;
 
-    CharString loc;
-    {
-        CharStringByteSink sink(&loc);
-        ulocimp_getName(locale, sink, &ec2);
-    }
+    CharString loc = ulocimp_getName(locale, ec2);
     if (U_FAILURE(ec2)) {
         *ec = U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+        return nullptr;
     }
 
     char buf[ISO_CURRENCY_CODE_LENGTH+1];
@@ -1002,11 +980,7 @@ collectCurrencyNames(const char* locale,
     // Look up the Currencies resource for the given locale.
     UErrorCode ec2 = U_ZERO_ERROR;
 
-    CharString loc;
-    {
-        CharStringByteSink sink(&loc);
-        ulocimp_getName(locale, sink, &ec2);
-    }
+    CharString loc = ulocimp_getName(locale, ec2);
     if (U_FAILURE(ec2)) {
         ec = U_ILLEGAL_ARGUMENT_ERROR;
     }
@@ -1390,7 +1364,6 @@ searchCurrencyName(const CurrencyNameStruct* currencyNames,
             break;
         }
     }
-    return;
 }
 
 //========================= currency name cache =====================
@@ -1447,7 +1420,7 @@ currency_cache_cleanup() {
     for (int32_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) {
         if (currCache[i]) {
             deleteCacheEntry(currCache[i]);
-            currCache[i] = 0;
+            currCache[i] = nullptr;
         }
     }
     return true;
@@ -2030,6 +2003,7 @@ static const struct CurrencyList {
     {"XBC", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
     {"XBD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
     {"XCD", UCURR_COMMON|UCURR_NON_DEPRECATED},
+    {"XCG", UCURR_COMMON|UCURR_NON_DEPRECATED},
     {"XDR", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
     {"XEU", UCURR_UNCOMMON|UCURR_DEPRECATED},
     {"XFO", UCURR_UNCOMMON|UCURR_NON_DEPRECATED},
@@ -2229,7 +2203,7 @@ static void U_CALLCONV initIsoCodes(UErrorCode &status) {
 
 static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) {
     if (U_FAILURE(status)) { return; }
-    for (auto& entry : unisets::kCurrencyEntries) {
+    for (const auto& entry : unisets::kCurrencyEntries) {
         UnicodeString exemplar(entry.exemplar);
         const UnicodeSet* set = unisets::get(entry.key);
         if (set == nullptr) { return; }
@@ -2325,10 +2299,9 @@ ucurr_countCurrencies(const char* locale,
     {
         // local variables
         UErrorCode localStatus = U_ZERO_ERROR;
-        char id[ULOC_FULLNAME_CAPACITY];
 
         // get country or country_variant in `id'
-        idForLocale(locale, id, sizeof(id), ec);
+        CharString id = idForLocale(locale, ec);
 
         if (U_FAILURE(*ec))
         {
@@ -2336,10 +2309,10 @@ ucurr_countCurrencies(const char* locale,
         }
 
         // Remove variants, which is only needed for registration.
-        char *idDelim = strchr(id, VAR_DELIM);
+        char *idDelim = strchr(id.data(), VAR_DELIM);
         if (idDelim)
         {
-            idDelim[0] = 0;
+            id.truncate(idDelim - id.data());
         }
 
         // Look up the CurrencyMap element in the root bundle.
@@ -2347,7 +2320,7 @@ ucurr_countCurrencies(const char* locale,
         UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
 
         // Using the id derived from the local, get the currency data
-        UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+        UResourceBundle *countryArray = ures_getByKey(rb, id.data(), cm, &localStatus);
 
         // process each currency to see which one is valid for the given date
         if (U_SUCCESS(localStatus))
@@ -2440,20 +2413,19 @@ ucurr_forLocaleAndDate(const char* locale,
         {
             // local variables
             UErrorCode localStatus = U_ZERO_ERROR;
-            char id[ULOC_FULLNAME_CAPACITY];
 
             // get country or country_variant in `id'
-            idForLocale(locale, id, sizeof(id), ec);
+            CharString id = idForLocale(locale, ec);
             if (U_FAILURE(*ec))
             {
                 return 0;
             }
 
             // Remove variants, which is only needed for registration.
-            char *idDelim = strchr(id, VAR_DELIM);
+            char *idDelim = strchr(id.data(), VAR_DELIM);
             if (idDelim)
             {
-                idDelim[0] = 0;
+                id.truncate(idDelim - id.data());
             }
 
             // Look up the CurrencyMap element in the root bundle.
@@ -2461,7 +2433,7 @@ ucurr_forLocaleAndDate(const char* locale,
             UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus);
 
             // Using the id derived from the local, get the currency data
-            UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus);
+            UResourceBundle *countryArray = ures_getByKey(rb, id.data(), cm, &localStatus);
 
             // process each currency to see which one is valid for the given date
             bool matchFound = false;
@@ -2587,9 +2559,8 @@ static const UEnumeration defaultKeywordValues = {
 
 U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode* status) {
     // Resolve region
-    char prefRegion[ULOC_COUNTRY_CAPACITY];
-    ulocimp_getRegionForSupplementalData(locale, true, prefRegion, sizeof(prefRegion), status);
-    
+    CharString prefRegion = ulocimp_getRegionForSupplementalData(locale, true, *status);
+
     // Read value from supplementalData
     UList *values = ulist_createEmptyList(status);
     UList *otherValues = ulist_createEmptyList(status);
@@ -2621,7 +2592,7 @@ U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key,
             break;
         }
         const char *region = ures_getKey(&bundlekey);
-        UBool isPrefRegion = uprv_strcmp(region, prefRegion) == 0 ? true : false;
+        UBool isPrefRegion = prefRegion == region;
         if (!isPrefRegion && commonlyUsed) {
             // With commonlyUsed=true, we do not put
             // currencies for other regions in the
@@ -2732,7 +2703,7 @@ ucurr_getNumericCode(const char16_t* currency) {
     if (currency && u_strlen(currency) == ISO_CURRENCY_CODE_LENGTH) {
         UErrorCode status = U_ZERO_ERROR;
 
-        UResourceBundle *bundle = ures_openDirect(0, "currencyNumericCodes", &status);
+        UResourceBundle *bundle = ures_openDirect(nullptr, "currencyNumericCodes", &status);
         ures_getByKey(bundle, "codeMap", bundle, &status);
         if (U_SUCCESS(status)) {
             char alphaCode[ISO_CURRENCY_CODE_LENGTH+1];

+ 6 - 6
thirdparty/icu4c/common/udata.cpp

@@ -850,12 +850,12 @@ static UBool extendICUData(UErrorCode *pErr)
        UDataMemory_init(&copyPData);
        if(pData != nullptr) {
           UDatamemory_assign(&copyPData, pData);
-          copyPData.map = 0;              /* The mapping for this data is owned by the hash table */
-          copyPData.mapAddr = 0;          /*   which will unmap it when ICU is shut down.         */
-                                          /* CommonICUData is also unmapped when ICU is shut down.*/
-                                          /* To avoid unmapping the data twice, zero out the map  */
-                                          /*   fields in the UDataMemory that we're assigning     */
-                                          /*   to CommonICUData.                                  */
+          copyPData.map = nullptr;     /* The mapping for this data is owned by the hash table */
+          copyPData.mapAddr = nullptr; /*   which will unmap it when ICU is shut down.         */
+                                       /* CommonICUData is also unmapped when ICU is shut down.*/
+                                       /* To avoid unmapping the data twice, zero out the map  */
+                                       /*   fields in the UDataMemory that we're assigning     */
+                                       /*   to CommonICUData.                                  */
 
           didUpdate = /* no longer using this result */
               setCommonICUData(&copyPData,/*  The new common data.                                */

+ 2 - 2
thirdparty/icu4c/common/udataswp.cpp

@@ -441,7 +441,7 @@ udata_openSwapperForInputData(const void *data, int32_t length,
         pHeader->info.sizeofUChar!=2
     ) {
         *pErrorCode=U_UNSUPPORTED_ERROR;
-        return 0;
+        return nullptr;
     }
 
     inIsBigEndian=(UBool)pHeader->info.isBigEndian;
@@ -461,7 +461,7 @@ udata_openSwapperForInputData(const void *data, int32_t length,
         (length>=0 && length<headerSize)
     ) {
         *pErrorCode=U_UNSUPPORTED_ERROR;
-        return 0;
+        return nullptr;
     }
 
     return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode);

+ 14 - 14
thirdparty/icu4c/common/uiter.cpp

@@ -66,7 +66,7 @@ noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCod
 }
 
 static const UCharIterator noopIterator={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     noopGetIndex,
     noopMove,
     noopHasNext,
@@ -197,7 +197,7 @@ stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCo
 }
 
 static const UCharIterator stringIterator={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     stringIteratorGetIndex,
     stringIteratorMove,
     stringIteratorHasNext,
@@ -212,8 +212,8 @@ static const UCharIterator stringIterator={
 
 U_CAPI void U_EXPORT2
 uiter_setString(UCharIterator *iter, const char16_t *s, int32_t length) {
-    if(iter!=0) {
-        if(s!=0 && length>=-1) {
+    if (iter != nullptr) {
+        if (s != nullptr && length >= -1) {
             *iter=stringIterator;
             iter->context=s;
             if(length>=0) {
@@ -283,7 +283,7 @@ utf16BEIteratorPrevious(UCharIterator *iter) {
 }
 
 static const UCharIterator utf16BEIterator={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     stringIteratorGetIndex,
     stringIteratorMove,
     stringIteratorHasNext,
@@ -457,7 +457,7 @@ characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErro
 }
 
 static const UCharIterator characterIteratorWrapper={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     characterIteratorGetIndex,
     characterIteratorMove,
     characterIteratorHasNext,
@@ -472,8 +472,8 @@ static const UCharIterator characterIteratorWrapper={
 
 U_CAPI void U_EXPORT2
 uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
-    if(iter!=0) {
-        if(charIter!=0) {
+    if (iter != nullptr) {
+        if (charIter != nullptr) {
             *iter=characterIteratorWrapper;
             iter->context=charIter;
         } else {
@@ -521,7 +521,7 @@ replaceableIteratorPrevious(UCharIterator *iter) {
 }
 
 static const UCharIterator replaceableIterator={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     stringIteratorGetIndex,
     stringIteratorMove,
     stringIteratorHasNext,
@@ -536,8 +536,8 @@ static const UCharIterator replaceableIterator={
 
 U_CAPI void U_EXPORT2
 uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
-    if(iter!=0) {
-        if(rep!=0) {
+    if (iter != nullptr) {
+        if (rep != nullptr) {
             *iter=replaceableIterator;
             iter->context=rep;
             iter->limit=iter->length=rep->length();
@@ -987,7 +987,7 @@ utf8IteratorSetState(UCharIterator *iter,
 }
 
 static const UCharIterator utf8Iterator={
-    0, 0, 0, 0, 0, 0,
+    nullptr, 0, 0, 0, 0, 0,
     utf8IteratorGetIndex,
     utf8IteratorMove,
     utf8IteratorHasNext,
@@ -1002,8 +1002,8 @@ static const UCharIterator utf8Iterator={
 
 U_CAPI void U_EXPORT2
 uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
-    if(iter!=0) {
-        if(s!=0 && length>=-1) {
+    if (iter != nullptr) {
+        if (s != nullptr && length >= -1) {
             *iter=utf8Iterator;
             iter->context=s;
             if(length>=0) {

File diff suppressed because it is too large
+ 665 - 373
thirdparty/icu4c/common/uloc.cpp


+ 17 - 15
thirdparty/icu4c/common/uloc_keytype.cpp

@@ -74,8 +74,9 @@ uloc_key_type_cleanup() {
 
 U_CDECL_END
 
+namespace {
 
-static void U_CALLCONV
+void U_CALLCONV
 initFromResourceBundle(UErrorCode& sts) {
     U_NAMESPACE_USE
     ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
@@ -141,7 +142,7 @@ initFromResourceBundle(UErrorCode& sts) {
             bcpKeyId = bcpKeyIdBuf->data();
         }
 
-        UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
+        bool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
 
         UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts);
         if (U_FAILURE(sts)) {
@@ -351,7 +352,7 @@ initFromResourceBundle(UErrorCode& sts) {
     }
 }
 
-static UBool
+bool
 init() {
     UErrorCode sts = U_ZERO_ERROR;
     umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
@@ -361,7 +362,7 @@ init() {
     return true;
 }
 
-static UBool
+bool
 isSpecialTypeCodepoints(const char* val) {
     int32_t subtagLen = 0;
     const char* p = val;
@@ -383,7 +384,7 @@ isSpecialTypeCodepoints(const char* val) {
     return (subtagLen >= 4 && subtagLen <= 6);
 }
 
-static UBool
+bool
 isSpecialTypeReorderCode(const char* val) {
     int32_t subtagLen = 0;
     const char* p = val;
@@ -403,7 +404,7 @@ isSpecialTypeReorderCode(const char* val) {
     return (subtagLen >=3 && subtagLen <=8);
 }
 
-static UBool
+bool
 isSpecialTypeRgKeyValue(const char* val) {
     int32_t subtagLen = 0;
     const char* p = val;
@@ -419,7 +420,9 @@ isSpecialTypeRgKeyValue(const char* val) {
     return (subtagLen == 6);
 }
 
-U_CFUNC const char*
+}  // namespace
+
+U_EXPORT const char*
 ulocimp_toBcpKey(const char* key) {
     if (!init()) {
         return nullptr;
@@ -432,7 +435,7 @@ ulocimp_toBcpKey(const char* key) {
     return nullptr;
 }
 
-U_CFUNC const char*
+U_EXPORT const char*
 ulocimp_toLegacyKey(const char* key) {
     if (!init()) {
         return nullptr;
@@ -445,8 +448,8 @@ ulocimp_toLegacyKey(const char* key) {
     return nullptr;
 }
 
-U_CFUNC const char*
-ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+U_EXPORT const char*
+ulocimp_toBcpType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType) {
     if (isKnownKey != nullptr) {
         *isKnownKey = false;
     }
@@ -468,7 +471,7 @@ ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* i
             return t->bcpId;
         }
         if (keyData->specialTypes != SPECIALTYPE_NONE) {
-            UBool matched = false;
+            bool matched = false;
             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
                 matched = isSpecialTypeCodepoints(type);
             }
@@ -490,8 +493,8 @@ ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* i
 }
 
 
-U_CFUNC const char*
-ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
+U_EXPORT const char*
+ulocimp_toLegacyType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType) {
     if (isKnownKey != nullptr) {
         *isKnownKey = false;
     }
@@ -513,7 +516,7 @@ ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool
             return t->legacyId;
         }
         if (keyData->specialTypes != SPECIALTYPE_NONE) {
-            UBool matched = false;
+            bool matched = false;
             if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
                 matched = isSpecialTypeCodepoints(type);
             }
@@ -533,4 +536,3 @@ ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool
     }
     return nullptr;
 }
-

File diff suppressed because it is too large
+ 205 - 208
thirdparty/icu4c/common/uloc_tag.cpp


+ 14 - 17
thirdparty/icu4c/common/ulocale.cpp

@@ -1,6 +1,7 @@
 // © 2023 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 //
+#include "unicode/bytestream.h"
 #include "unicode/errorcode.h"
 #include "unicode/stringpiece.h"
 #include "unicode/utypes.h"
@@ -8,9 +9,9 @@
 #include "unicode/ulocale.h"
 #include "unicode/locid.h"
 
+#include "bytesinkutil.h"
 #include "charstr.h"
 #include "cmemory.h"
-#include "ustr_imp.h"
 
 U_NAMESPACE_USE
 #define EXTERNAL(i) (reinterpret_cast<ULocale*>(i))
@@ -19,15 +20,17 @@ U_NAMESPACE_USE
 
 ULocale*
 ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) {
+    if (U_FAILURE(*err)) { return nullptr; }
     CharString str(length < 0 ? StringPiece(localeID) : StringPiece(localeID, length), *err);
-    if (U_FAILURE(*err)) return nullptr;
+    if (U_FAILURE(*err)) { return nullptr; }
     return EXTERNAL(icu::Locale::createFromName(str.data()).clone());
 }
 
 ULocale*
 ulocale_openForLanguageTag(const char* tag, int32_t length, UErrorCode* err) {
+  if (U_FAILURE(*err)) { return nullptr; }
   Locale l = icu::Locale::forLanguageTag(length < 0 ? StringPiece(tag) : StringPiece(tag, length), *err);
-  if (U_FAILURE(*err)) return nullptr;
+  if (U_FAILURE(*err)) { return nullptr; }
   return EXTERNAL(l.clone());
 }
 
@@ -53,20 +56,14 @@ int32_t ulocale_get ##N ( \
         *err = U_ILLEGAL_ARGUMENT_ERROR; \
         return 0; \
     } \
-    CheckedArrayByteSink sink(valueBuffer, bufferCapacity); \
-    CONST_INTERNAL(locale)->get ## N( \
-        keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \
-        sink, *err); \
-    int32_t reslen = sink.NumberOfBytesAppended(); \
-    if (U_FAILURE(*err)) { \
-        return reslen; \
-    } \
-    if (sink.Overflowed()) { \
-        *err = U_BUFFER_OVERFLOW_ERROR; \
-    } else { \
-        u_terminateChars(valueBuffer, bufferCapacity, reslen, err); \
-    } \
-    return reslen; \
+    return ByteSinkUtil::viaByteSinkToTerminatedChars( \
+        valueBuffer, bufferCapacity, \
+        [&](ByteSink& sink, UErrorCode& status) { \
+            CONST_INTERNAL(locale)->get ## N( \
+                keywordLength < 0 ? StringPiece(keyword) : StringPiece(keyword, keywordLength), \
+                sink, status); \
+        }, \
+        *err); \
 }
 
 #define IMPL_ULOCALE_GET_KEYWORDS(N) \

+ 10 - 15
thirdparty/icu4c/common/ulocbuilder.cpp

@@ -9,10 +9,10 @@
 #include "unicode/stringpiece.h"
 #include "unicode/umachine.h"
 #include "unicode/ulocbuilder.h"
+#include "bytesinkutil.h"
 #include "cstring.h"
 #include "ustr_imp.h"
 
-using icu::CheckedArrayByteSink;
 using icu::StringPiece;
 
 #define EXTERNAL(i) (reinterpret_cast<ULocaleBuilder*>(i))
@@ -112,12 +112,13 @@ ULocale* ulocbld_buildULocale(ULocaleBuilder* builder, UErrorCode* err) {
 
 int32_t ulocbld_buildLocaleID(ULocaleBuilder* builder,
                               char* buffer, int32_t bufferCapacity, UErrorCode* err) {
+    if (U_FAILURE(*err)) { return 0; }
     if (builder == nullptr) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
     icu::Locale l = INTERNAL(builder)->build(*err);
-    if (U_FAILURE(*err)) return 0;
+    if (U_FAILURE(*err)) { return 0; }
     int32_t length = (int32_t)(uprv_strlen(l.getName()));
     if (0 < length && length <= bufferCapacity) {
         uprv_memcpy(buffer, l.getName(), length);
@@ -127,24 +128,18 @@ int32_t ulocbld_buildLocaleID(ULocaleBuilder* builder,
 
 int32_t ulocbld_buildLanguageTag(ULocaleBuilder* builder,
                   char* buffer, int32_t bufferCapacity, UErrorCode* err) {
+    if (U_FAILURE(*err)) { return 0; }
     if (builder == nullptr) {
         *err = U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
     icu::Locale l = INTERNAL(builder)->build(*err);
-    if (U_FAILURE(*err)) return 0;
-    CheckedArrayByteSink sink(buffer, bufferCapacity);
-    l.toLanguageTag(sink, *err);
-    int32_t reslen = sink.NumberOfBytesAppended();
-    if (U_FAILURE(*err)) {
-        return reslen;
-    }
-    if (sink.Overflowed()) {
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    } else {
-        u_terminateChars(buffer, bufferCapacity, reslen, err);
-    }
-    return reslen;
+    return icu::ByteSinkUtil::viaByteSinkToTerminatedChars(
+        buffer, bufferCapacity,
+        [&](icu::ByteSink& sink, UErrorCode& status) {
+            l.toLanguageTag(sink, status);
+        },
+        *err);
 }
 
 UBool ulocbld_copyErrorTo(const ULocaleBuilder* builder, UErrorCode *outErrorCode) {

+ 172 - 75
thirdparty/icu4c/common/ulocimp.h

@@ -10,6 +10,8 @@
 #ifndef ULOCIMP_H
 #define ULOCIMP_H
 
+#include <cstddef>
+
 #include "unicode/bytestream.h"
 #include "unicode/uloc.h"
 
@@ -40,8 +42,10 @@ uloc_getTableStringWithFallback(
     int32_t *pLength,
     UErrorCode *pErrorCode);
 
+namespace {
 /*returns true if a is an ID separator false otherwise*/
-#define _isIDSeparator(a) (a == '_' || a == '-')
+inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
+}  // namespace
 
 U_CFUNC const char* 
 uloc_getCurrentCountryID(const char* oldID);
@@ -49,53 +53,134 @@ uloc_getCurrentCountryID(const char* oldID);
 U_CFUNC const char* 
 uloc_getCurrentLanguageID(const char* oldID);
 
-U_CFUNC void
-ulocimp_getKeywords(const char *localeID,
-             char prev,
-             icu::ByteSink& sink,
-             UBool valuesToo,
-             UErrorCode *status);
-
-icu::CharString U_EXPORT2
-ulocimp_getLanguage(const char *localeID,
-                    const char **pEnd,
-                    UErrorCode &status);
-
-icu::CharString U_EXPORT2
-ulocimp_getScript(const char *localeID,
-                  const char **pEnd,
-                  UErrorCode &status);
-
-icu::CharString U_EXPORT2
-ulocimp_getCountry(const char *localeID,
-                   const char **pEnd,
-                   UErrorCode &status);
-
-U_CAPI void U_EXPORT2
+U_EXPORT icu::CharString
+ulocimp_getKeywords(const char* localeID,
+                    char prev,
+                    bool valuesToo,
+                    UErrorCode& status);
+
+U_EXPORT void
+ulocimp_getKeywords(const char* localeID,
+                    char prev,
+                    icu::ByteSink& sink,
+                    bool valuesToo,
+                    UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_getName(const char* localeID,
+                UErrorCode& err);
+
+U_EXPORT void
 ulocimp_getName(const char* localeID,
                 icu::ByteSink& sink,
-                UErrorCode* err);
+                UErrorCode& err);
+
+U_EXPORT icu::CharString
+ulocimp_getBaseName(const char* localeID,
+                    UErrorCode& err);
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_getBaseName(const char* localeID,
                     icu::ByteSink& sink,
-                    UErrorCode* err);
+                    UErrorCode& err);
+
+U_EXPORT icu::CharString
+ulocimp_canonicalize(const char* localeID,
+                     UErrorCode& err);
 
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_canonicalize(const char* localeID,
                      icu::ByteSink& sink,
-                     UErrorCode* err);
+                     UErrorCode& err);
 
-U_CAPI void U_EXPORT2
+U_EXPORT icu::CharString
+ulocimp_getKeywordValue(const char* localeID,
+                        const char* keywordName,
+                        UErrorCode& status);
+
+U_EXPORT void
 ulocimp_getKeywordValue(const char* localeID,
                         const char* keywordName,
                         icu::ByteSink& sink,
-                        UErrorCode* status);
+                        UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_getLanguage(const char* localeID, UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_getScript(const char* localeID, UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_getRegion(const char* localeID, UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_getVariant(const char* localeID, UErrorCode& status);
+
+U_EXPORT void
+ulocimp_setKeywordValue(const char* keywordName,
+                        const char* keywordValue,
+                        icu::CharString& localeID,
+                        UErrorCode& status);
 
-U_CAPI void U_EXPORT2
+U_EXPORT int32_t
+ulocimp_setKeywordValue(const char* keywords,
+                        const char* keywordName,
+                        const char* keywordValue,
+                        icu::ByteSink& sink,
+                        UErrorCode& status);
+
+U_EXPORT void
+ulocimp_getSubtags(
+        const char* localeID,
+        icu::CharString* language,
+        icu::CharString* script,
+        icu::CharString* region,
+        icu::CharString* variant,
+        const char** pEnd,
+        UErrorCode& status);
+
+U_EXPORT void
+ulocimp_getSubtags(
+        const char* localeID,
+        icu::ByteSink* language,
+        icu::ByteSink* script,
+        icu::ByteSink* region,
+        icu::ByteSink* variant,
+        const char** pEnd,
+        UErrorCode& status);
+
+inline void
+ulocimp_getSubtags(
+        const char* localeID,
+        std::nullptr_t,
+        std::nullptr_t,
+        std::nullptr_t,
+        std::nullptr_t,
+        const char** pEnd,
+        UErrorCode& status) {
+    ulocimp_getSubtags(
+            localeID,
+            static_cast<icu::ByteSink*>(nullptr),
+            static_cast<icu::ByteSink*>(nullptr),
+            static_cast<icu::ByteSink*>(nullptr),
+            static_cast<icu::ByteSink*>(nullptr),
+            pEnd,
+            status);
+}
+
+U_EXPORT icu::CharString
+ulocimp_getParent(const char* localeID,
+                  UErrorCode& err);
+
+U_EXPORT void
 ulocimp_getParent(const char* localeID,
                   icu::ByteSink& sink,
-                  UErrorCode* err);
+                  UErrorCode& err);
+
+U_EXPORT icu::CharString
+ulocimp_toLanguageTag(const char* localeID,
+                      bool strict,
+                      UErrorCode& status);
 
 /**
  * Writes a well-formed language tag for this locale ID.
@@ -116,11 +201,17 @@ ulocimp_getParent(const char* localeID,
  *
  * @internal ICU 64
  */
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_toLanguageTag(const char* localeID,
                       icu::ByteSink& sink,
-                      UBool strict,
-                      UErrorCode* err);
+                      bool strict,
+                      UErrorCode& err);
+
+U_EXPORT icu::CharString
+ulocimp_forLanguageTag(const char* langtag,
+                       int32_t tagLen,
+                       int32_t* parsedLength,
+                       UErrorCode& status);
 
 /**
  * Returns a locale ID for the specified BCP47 language tag string.
@@ -148,12 +239,12 @@ ulocimp_toLanguageTag(const char* localeID,
  *                  failed.
  * @internal ICU 63
  */
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_forLanguageTag(const char* langtag,
                        int32_t tagLen,
                        icu::ByteSink& sink,
                        int32_t* parsedLength,
-                       UErrorCode* err);
+                       UErrorCode& err);
 
 /**
  * Get the region to use for supplemental data lookup. Uses
@@ -161,28 +252,27 @@ ulocimp_forLanguageTag(const char* langtag,
  * (2) any unicode_region_tag in the locale ID; if none then
  * (3) if inferRegion is true, the region suggested by
  * getLikelySubtags on the localeID.
- * If no region is found, returns length 0.
- * 
+ * If no region is found, returns an empty string.
+ *
  * @param localeID
  *     The complete locale ID (with keywords) from which
  *     to get the region to use for supplemental data.
  * @param inferRegion
  *     If true, will try to infer region from localeID if
  *     no other region is found.
- * @param region
- *     Buffer in which to put the region ID found; should
- *     have a capacity at least ULOC_COUNTRY_CAPACITY. 
- * @param regionCapacity
- *     The actual capacity of the region buffer.
  * @param status
  *     Pointer to in/out UErrorCode value for latest status.
  * @return
- *     The length of any region code found, or 0 if none.
+ *     The region code found, empty if none found.
  * @internal ICU 57
  */
-U_CAPI int32_t U_EXPORT2
-ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
-                                     char *region, int32_t regionCapacity, UErrorCode* status);
+U_EXPORT icu::CharString
+ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
+                                     UErrorCode& status);
+
+U_EXPORT icu::CharString
+ulocimp_addLikelySubtags(const char* localeID,
+                         UErrorCode& status);
 
 /**
  * Add the likely subtags for a provided locale ID, per the algorithm described
@@ -213,10 +303,15 @@ ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  * @internal ICU 64
  */
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_addLikelySubtags(const char* localeID,
                          icu::ByteSink& sink,
-                         UErrorCode* err);
+                         UErrorCode& err);
+
+U_EXPORT icu::CharString
+ulocimp_minimizeSubtags(const char* localeID,
+                        bool favorScript,
+                        UErrorCode& status);
 
 /**
  * Minimize the subtags for a provided locale ID, per the algorithm described
@@ -248,70 +343,72 @@ ulocimp_addLikelySubtags(const char* localeID,
  * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  * @internal ICU 64
  */
-U_CAPI void U_EXPORT2
+U_EXPORT void
 ulocimp_minimizeSubtags(const char* localeID,
                         icu::ByteSink& sink,
                         bool favorScript,
-                        UErrorCode* err);
+                        UErrorCode& err);
 
 U_CAPI const char * U_EXPORT2
 locale_getKeywordsStart(const char *localeID);
 
-U_CFUNC UBool
+bool
 ultag_isExtensionSubtags(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isLanguageSubtag(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isRegionSubtag(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isScriptSubtag(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isUnicodeLocaleKey(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isUnicodeLocaleType(const char* s, int32_t len);
 
-U_CFUNC UBool
+bool
 ultag_isVariantSubtags(const char* s, int32_t len);
 
-U_CAPI const char * U_EXPORT2
-ultag_getTKeyStart(const char *localeID);
+const char*
+ultag_getTKeyStart(const char* localeID);
 
-U_CFUNC const char*
+U_EXPORT const char*
 ulocimp_toBcpKey(const char* key);
 
-U_CFUNC const char*
+U_EXPORT const char*
 ulocimp_toLegacyKey(const char* key);
 
-U_CFUNC const char*
-ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+U_EXPORT const char*
+ulocimp_toBcpType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
 
-U_CFUNC const char*
-ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
+U_EXPORT const char*
+ulocimp_toLegacyType(const char* key, const char* type, bool* isKnownKey, bool* isSpecialType);
 
 /* Function for testing purpose */
-U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
+U_EXPORT const char* const*
+ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
 
 // Return true if the value is already canonicalized.
-U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
+U_EXPORT bool
+ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
 
 #endif

+ 3 - 3
thirdparty/icu4c/common/umapfile.cpp

@@ -236,9 +236,9 @@ typedef HANDLE MemoryMap;
 
         /* get a view of the mapping */
 #if U_PLATFORM != U_PF_HPUX
-        data=mmap(0, length, PROT_READ, MAP_SHARED,  fd, 0);
+        data=mmap(nullptr, length, PROT_READ, MAP_SHARED, fd, 0);
 #else
-        data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
+        data=mmap(nullptr, length, PROT_READ, MAP_PRIVATE, fd, 0);
 #endif
         close(fd); /* no longer needed */
         if(data==MAP_FAILED) {
@@ -262,7 +262,7 @@ typedef HANDLE MemoryMap;
             if(munmap(pData->mapAddr, dataLen)==-1) {
             }
             pData->pHeader=nullptr;
-            pData->map=0;
+            pData->map=nullptr;
             pData->mapAddr=nullptr;
         }
     }

+ 0 - 2
thirdparty/icu4c/common/umutex.cpp

@@ -189,7 +189,6 @@ u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *,
     if (U_SUCCESS(*status)) {
         *status = U_UNSUPPORTED_ERROR;
     }
-    return;
 }
 
 
@@ -200,5 +199,4 @@ u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomic
     if (U_SUCCESS(*status)) {
         *status = U_UNSUPPORTED_ERROR;
     }
-    return;
 }

+ 9 - 10
thirdparty/icu4c/common/unicode/brkiter.h

@@ -146,14 +146,13 @@ public:
      * will return distinct unequal values.
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID(void) const override = 0;
+    virtual UClassID getDynamicClassID() const override = 0;
 
     /**
      * Return a CharacterIterator over the text being analyzed.
      * @stable ICU 2.0
      */
-    virtual CharacterIterator& getText(void) const = 0;
-
+    virtual CharacterIterator& getText() const = 0;
 
     /**
       *  Get a UText for the text being analyzed.
@@ -228,14 +227,14 @@ public:
      * @return The offset of the beginning of the text, zero.
      * @stable ICU 2.0
      */
-    virtual int32_t first(void) = 0;
+    virtual int32_t first() = 0;
 
     /**
      * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
      * @return The index immediately BEYOND the last character in the text being scanned.
      * @stable ICU 2.0
      */
-    virtual int32_t last(void) = 0;
+    virtual int32_t last() = 0;
 
     /**
      * Set the iterator position to the boundary preceding the current boundary.
@@ -243,7 +242,7 @@ public:
      * boundaries have been returned.
      * @stable ICU 2.0
      */
-    virtual int32_t previous(void) = 0;
+    virtual int32_t previous() = 0;
 
     /**
      * Advance the iterator to the boundary following the current boundary.
@@ -251,14 +250,14 @@ public:
      * boundaries have been returned.
      * @stable ICU 2.0
      */
-    virtual int32_t next(void) = 0;
+    virtual int32_t next() = 0;
 
     /**
      * Return character index of the current iterator position within the text.
      * @return The boundary most recently returned.
      * @stable ICU 2.0
      */
-    virtual int32_t current(void) const = 0;
+    virtual int32_t current() const = 0;
 
     /**
      * Advance the iterator to the first boundary following the specified offset.
@@ -530,7 +529,7 @@ public:
      *   must be closed by an explicit call to the destructor (not delete).
      * @deprecated ICU 52. Always delete the BreakIterator.
      */
-    inline UBool isBufferClone(void);
+    inline UBool isBufferClone();
 
 #endif /* U_HIDE_DEPRECATED_API */
 
@@ -575,7 +574,7 @@ public:
      * @return a StringEnumeration over the locales available at the time of the call
      * @stable ICU 2.4
      */
-    static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+    static StringEnumeration* U_EXPORT2 getAvailableLocales();
 #endif
 
     /**

+ 4 - 3
thirdparty/icu4c/common/unicode/caniter.h

@@ -128,9 +128,10 @@ public:
      * @param skipZeros  determine if skip zeros
      * @param result     the results in a set.
      * @param status       Fill-in parameter which receives the status of this operation.
+     * @param depth     depth of the call.
      * @internal
      */
-    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
+    static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth=0);
 #endif  /* U_HIDE_INTERNAL_API */
 
     /**
@@ -182,8 +183,8 @@ private:
     // transient fields
     UnicodeString buffer;
 
-    const Normalizer2 &nfd;
-    const Normalizer2Impl &nfcImpl;
+    const Normalizer2 *nfd;
+    const Normalizer2Impl *nfcImpl;
 
     // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
     UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)

+ 34 - 34
thirdparty/icu4c/common/unicode/chariter.h

@@ -133,8 +133,8 @@ public:
      * @return the hash code.
      * @stable ICU 2.0
      */
-    virtual int32_t hashCode(void) const = 0;
-    
+    virtual int32_t hashCode() const = 0;
+
     /**
      * Returns a UClassID for this ForwardCharacterIterator ("poor man's
      * RTTI").<P> Despite the fact that this function is public,
@@ -142,8 +142,8 @@ public:
      * @return a UClassID for this ForwardCharacterIterator 
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID(void) const override = 0;
-    
+    virtual UClassID getDynamicClassID() const override = 0;
+
     /**
      * Gets the current code unit for returning and advances to the next code unit
      * in the iteration range
@@ -152,8 +152,8 @@ public:
      * @return the current code unit.
      * @stable ICU 2.0
      */
-    virtual char16_t         nextPostInc(void) = 0;
-    
+    virtual char16_t nextPostInc() = 0;
+
     /**
      * Gets the current code point for returning and advances to the next code point
      * in the iteration range
@@ -162,8 +162,8 @@ public:
      * @return the current code point.
      * @stable ICU 2.0
      */
-    virtual UChar32       next32PostInc(void) = 0;
-    
+    virtual UChar32 next32PostInc() = 0;
+
     /**
      * Returns false if there are no more code units or code points
      * at or after the current position in the iteration range.
@@ -389,7 +389,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual char16_t         first(void) = 0;
+    virtual char16_t first() = 0;
 
     /**
      * Sets the iterator to refer to the first code unit in its
@@ -399,7 +399,7 @@ public:
      * @return the first code unit in its iteration range.
      * @stable ICU 2.0
      */
-    virtual char16_t         firstPostInc(void);
+    virtual char16_t firstPostInc();
 
     /**
      * Sets the iterator to refer to the first code point in its
@@ -410,7 +410,7 @@ public:
      * @return the first code point in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar32       first32(void) = 0;
+    virtual UChar32 first32() = 0;
 
     /**
      * Sets the iterator to refer to the first code point in its
@@ -420,7 +420,7 @@ public:
      * @return the first code point in its iteration range.
      * @stable ICU 2.0
      */
-    virtual UChar32       first32PostInc(void);
+    virtual UChar32 first32PostInc();
 
     /**
      * Sets the iterator to refer to the first code unit or code point in its
@@ -438,8 +438,8 @@ public:
      * @return the last code unit.
      * @stable ICU 2.0
      */
-    virtual char16_t         last(void) = 0;
-        
+    virtual char16_t last() = 0;
+
     /**
      * Sets the iterator to refer to the last code point in its
      * iteration range, and returns that code unit.
@@ -447,7 +447,7 @@ public:
      * @return the last code point.
      * @stable ICU 2.0
      */
-    virtual UChar32       last32(void) = 0;
+    virtual UChar32 last32() = 0;
 
     /**
      * Sets the iterator to the end of its iteration range, just behind
@@ -486,15 +486,15 @@ public:
      * @return the current code unit. 
      * @stable ICU 2.0
      */
-    virtual char16_t         current(void) const = 0;
-        
+    virtual char16_t current() const = 0;
+
     /**
      * Returns the code point the iterator currently refers to.  
      * @return the current code point.
      * @stable ICU 2.0
      */
-    virtual UChar32       current32(void) const = 0;
-        
+    virtual UChar32 current32() const = 0;
+
     /**
      * Advances to the next code unit in the iteration range
      * (toward endIndex()), and returns that code unit.  If there are
@@ -502,8 +502,8 @@ public:
      * @return the next code unit.
      * @stable ICU 2.0
      */
-    virtual char16_t         next(void) = 0;
-        
+    virtual char16_t next() = 0;
+
     /**
      * Advances to the next code point in the iteration range
      * (toward endIndex()), and returns that code point.  If there are
@@ -514,8 +514,8 @@ public:
      * @return the next code point.
      * @stable ICU 2.0
      */
-    virtual UChar32       next32(void) = 0;
-        
+    virtual UChar32 next32() = 0;
+
     /**
      * Advances to the previous code unit in the iteration range
      * (toward startIndex()), and returns that code unit.  If there are
@@ -523,7 +523,7 @@ public:
      * @return the previous code unit.
      * @stable ICU 2.0
      */
-    virtual char16_t         previous(void) = 0;
+    virtual char16_t previous() = 0;
 
     /**
      * Advances to the previous code point in the iteration range
@@ -532,7 +532,7 @@ public:
      * @return the previous code point. 
      * @stable ICU 2.0
      */
-    virtual UChar32       previous32(void) = 0;
+    virtual UChar32 previous32() = 0;
 
     /**
      * Returns false if there are no more code units or code points
@@ -555,8 +555,8 @@ public:
      * object of the character returned by first().
      * @stable ICU 2.0
      */
-    inline int32_t       startIndex(void) const;
-        
+    inline int32_t startIndex() const;
+
     /**
      * Returns the numeric index in the underlying text-storage
      * object of the position immediately BEYOND the character
@@ -566,8 +566,8 @@ public:
      * returned by last().
      * @stable ICU 2.0
      */
-    inline int32_t       endIndex(void) const;
-        
+    inline int32_t endIndex() const;
+
     /**
      * Returns the numeric index in the underlying text-storage
      * object of the character the iterator currently refers to
@@ -576,7 +576,7 @@ public:
      * the character the iterator currently refers to
      * @stable ICU 2.0
      */
-    inline int32_t       getIndex(void) const;
+    inline int32_t getIndex() const;
 
     /**
      * Returns the length of the entire text in the underlying
@@ -708,22 +708,22 @@ CharacterIterator::setToEnd() {
 }
 
 inline int32_t
-CharacterIterator::startIndex(void) const {
+CharacterIterator::startIndex() const {
     return begin;
 }
 
 inline int32_t
-CharacterIterator::endIndex(void) const {
+CharacterIterator::endIndex() const {
     return end;
 }
 
 inline int32_t
-CharacterIterator::getIndex(void) const {
+CharacterIterator::getIndex() const {
     return pos;
 }
 
 inline int32_t
-CharacterIterator::getLength(void) const {
+CharacterIterator::getLength() const {
     return textLength;
 }
 

+ 5 - 0
thirdparty/icu4c/common/unicode/docmain.h

@@ -143,6 +143,11 @@
  *     <td>icu::MessageFormat</td>
  *   </tr>
  *   <tr>
+ *     <td>Message Formatting 2<br/>(technology preview)</td>
+ *     <td>(no C API)</td>
+ *     <td>icu::message2::MessageFormatter</td>
+ *   </tr>
+ *   <tr>
  *     <td>List Formatting</td>
  *     <td>ulistformatter.h</td>
  *     <td>icu::ListFormatter</td>

+ 2 - 3
thirdparty/icu4c/common/unicode/dtintrv.h

@@ -76,7 +76,7 @@ public:
      * @return          The class ID for all objects of this class.
      * @stable ICU 4.0
      */
-    static UClassID U_EXPORT2 getStaticClassID(void);
+    static UClassID U_EXPORT2 getStaticClassID();
 
     /**
      * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
@@ -89,9 +89,8 @@ public:
      *                  other classes have different class IDs.
      * @stable ICU 4.0
      */
-    virtual UClassID getDynamicClassID(void) const override;
+    virtual UClassID getDynamicClassID() const override;
 
-    
     /**
      * Copy constructor.
      * @stable ICU 4.0

+ 5 - 3
thirdparty/icu4c/common/unicode/localematcher.h

@@ -11,6 +11,8 @@
 
 #if U_SHOW_CPLUSPLUS_API
 
+#include <optional>
+
 #include "unicode/locid.h"
 #include "unicode/stringpiece.h"
 #include "unicode/uobject.h"
@@ -133,10 +135,10 @@ U_NAMESPACE_BEGIN
 
 struct LSR;
 
+class LikelySubtags;
 class LocaleDistance;
 class LocaleLsrIterator;
 class UVector;
-class XLikelySubtags;
 
 /**
  * Immutable class that picks the best match between a user's desired locales and
@@ -678,9 +680,9 @@ private:
 
     int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
 
-    int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
+    std::optional<int32_t> getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
 
-    const XLikelySubtags &likelySubtags;
+    const LikelySubtags &likelySubtags;
     const LocaleDistance &localeDistance;
     int32_t thresholdDistance;
     int32_t demotionPerDesiredLocale;

+ 57 - 43
thirdparty/icu4c/common/unicode/localpointer.h

@@ -162,11 +162,11 @@ protected:
     T *ptr;
 private:
     // No comparison operators with other LocalPointerBases.
-    bool operator==(const LocalPointerBase<T> &other);
-    bool operator!=(const LocalPointerBase<T> &other);
+    bool operator==(const LocalPointerBase<T> &other) = delete;
+    bool operator!=(const LocalPointerBase<T> &other) = delete;
     // No ownership sharing: No copy constructor, no assignment operator.
-    LocalPointerBase(const LocalPointerBase<T> &other);
-    void operator=(const LocalPointerBase<T> &other);
+    LocalPointerBase(const LocalPointerBase<T> &other) = delete;
+    void operator=(const LocalPointerBase<T> &other) = delete;
 };
 
 /**
@@ -548,46 +548,60 @@ public:
  * @stable ICU 4.4
  */
 #define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
-    class LocalPointerClassName : public LocalPointerBase<Type> { \
-    public: \
-        using LocalPointerBase<Type>::operator*; \
-        using LocalPointerBase<Type>::operator->; \
-        explicit LocalPointerClassName(Type *p=nullptr) : LocalPointerBase<Type>(p) {} \
-        LocalPointerClassName(LocalPointerClassName &&src) noexcept \
-                : LocalPointerBase<Type>(src.ptr) { \
-            src.ptr=nullptr; \
-        } \
-        /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
-        explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
-                : LocalPointerBase<Type>(p.release()) {} \
-        ~LocalPointerClassName() { if (ptr != nullptr) { closeFunction(ptr); } } \
-        LocalPointerClassName &operator=(LocalPointerClassName &&src) noexcept { \
-            if (ptr != nullptr) { closeFunction(ptr); } \
-            LocalPointerBase<Type>::ptr=src.ptr; \
-            src.ptr=nullptr; \
-            return *this; \
-        } \
-        /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
-        LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
-            adoptInstead(p.release()); \
-            return *this; \
-        } \
-        void swap(LocalPointerClassName &other) noexcept { \
-            Type *temp=LocalPointerBase<Type>::ptr; \
-            LocalPointerBase<Type>::ptr=other.ptr; \
-            other.ptr=temp; \
-        } \
-        friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) noexcept { \
-            p1.swap(p2); \
-        } \
-        void adoptInstead(Type *p) { \
-            if (ptr != nullptr) { closeFunction(ptr); } \
-            ptr=p; \
-        } \
-        operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
-            return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
-        } \
+    using LocalPointerClassName = internal::LocalOpenPointer<Type, closeFunction>
+
+#ifndef U_IN_DOXYGEN
+namespace internal {
+/**
+ * Implementation, do not use directly: use U_DEFINE_LOCAL_OPEN_POINTER.
+ *
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @internal
+ */
+template <typename Type, auto closeFunction>
+class LocalOpenPointer : public LocalPointerBase<Type> {
+    using LocalPointerBase<Type>::ptr;
+public:
+    using LocalPointerBase<Type>::operator*;
+    using LocalPointerBase<Type>::operator->;
+    explicit LocalOpenPointer(Type *p=nullptr) : LocalPointerBase<Type>(p) {}
+    LocalOpenPointer(LocalOpenPointer &&src) noexcept
+            : LocalPointerBase<Type>(src.ptr) {
+        src.ptr=nullptr;
+    }
+    /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */
+    explicit LocalOpenPointer(std::unique_ptr<Type, decltype(closeFunction)> &&p)
+            : LocalPointerBase<Type>(p.release()) {}
+    ~LocalOpenPointer() { if (ptr != nullptr) { closeFunction(ptr); } }
+    LocalOpenPointer &operator=(LocalOpenPointer &&src) noexcept {
+        if (ptr != nullptr) { closeFunction(ptr); }
+        LocalPointerBase<Type>::ptr=src.ptr;
+        src.ptr=nullptr;
+        return *this;
+    }
+    /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */
+    LocalOpenPointer &operator=(std::unique_ptr<Type, decltype(closeFunction)> &&p) {
+        adoptInstead(p.release());
+        return *this;
+    }
+    void swap(LocalOpenPointer &other) noexcept {
+        Type *temp=LocalPointerBase<Type>::ptr;
+        LocalPointerBase<Type>::ptr=other.ptr;
+        other.ptr=temp;
+    }
+    friend inline void swap(LocalOpenPointer &p1, LocalOpenPointer &p2) noexcept {
+        p1.swap(p2);
     }
+    void adoptInstead(Type *p) {
+        if (ptr != nullptr) { closeFunction(ptr); }
+        ptr=p;
+    }
+    operator std::unique_ptr<Type, decltype(closeFunction)> () && {
+        return std::unique_ptr<Type, decltype(closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction);
+    }
+};
+}  // namespace internal
+#endif
 
 U_NAMESPACE_END
 

+ 43 - 39
thirdparty/icu4c/common/unicode/locid.h

@@ -195,51 +195,50 @@ class UnicodeString;
 class U_COMMON_API Locale : public UObject {
 public:
     /** Useful constant for the Root locale. @stable ICU 4.4 */
-    static const Locale &U_EXPORT2 getRoot(void);
+    static const Locale& U_EXPORT2 getRoot();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getEnglish(void);
+    static const Locale& U_EXPORT2 getEnglish();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getFrench(void);
+    static const Locale& U_EXPORT2 getFrench();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getGerman(void);
+    static const Locale& U_EXPORT2 getGerman();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getItalian(void);
+    static const Locale& U_EXPORT2 getItalian();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getJapanese(void);
+    static const Locale& U_EXPORT2 getJapanese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getKorean(void);
+    static const Locale& U_EXPORT2 getKorean();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getChinese(void);
+    static const Locale& U_EXPORT2 getChinese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getSimplifiedChinese(void);
+    static const Locale& U_EXPORT2 getSimplifiedChinese();
     /** Useful constant for this language. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getTraditionalChinese(void);
+    static const Locale& U_EXPORT2 getTraditionalChinese();
 
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getFrance(void);
+    static const Locale& U_EXPORT2 getFrance();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getGermany(void);
+    static const Locale& U_EXPORT2 getGermany();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getItaly(void);
+    static const Locale& U_EXPORT2 getItaly();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getJapan(void);
+    static const Locale& U_EXPORT2 getJapan();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getKorea(void);
+    static const Locale& U_EXPORT2 getKorea();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getChina(void);
+    static const Locale& U_EXPORT2 getChina();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getPRC(void);
+    static const Locale& U_EXPORT2 getPRC();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getTaiwan(void);
+    static const Locale& U_EXPORT2 getTaiwan();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getUK(void);
+    static const Locale& U_EXPORT2 getUK();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getUS(void);
+    static const Locale& U_EXPORT2 getUS();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getCanada(void);
+    static const Locale& U_EXPORT2 getCanada();
     /** Useful constant for this country/region. @stable ICU 2.0 */
-    static const Locale &U_EXPORT2 getCanadaFrench(void);
-
+    static const Locale& U_EXPORT2 getCanadaFrench();
 
     /**
      * Construct a default locale object, a Locale for the default locale ID.
@@ -274,10 +273,10 @@ public:
      * @see uloc_getDefault
      * @stable ICU 2.0
      */
-    Locale( const   char * language,
-            const   char * country  = 0,
-            const   char * variant  = 0,
-            const   char * keywordsAndValues = 0);
+    Locale(const char* language,
+           const char* country = nullptr,
+           const char* variant = nullptr,
+           const char* keywordsAndValues = nullptr);
 
     /**
      * Initializes a Locale object from another Locale object.
@@ -370,7 +369,7 @@ public:
      * @system
      * @stable ICU 2.0
      */
-    static const Locale& U_EXPORT2 getDefault(void);
+    static const Locale& U_EXPORT2 getDefault();
 
     /**
      * Sets the default. Normally set once at the beginning of a process,
@@ -518,20 +517,20 @@ public:
      * If this Locale is already in the maximal form, or not valid, or there is
      * no data available for maximization, the Locale will be unchanged.
      *
-     * For example, "und-Zzzz" cannot be maximized, since there is no
+     * For example, "sh" cannot be maximized, since there is no
      * reasonable maximization.
      *
      * Examples:
      *
+     * "und_Zzzz" maximizes to "en_Latn_US"
+     *
      * "en" maximizes to "en_Latn_US"
      *
-     * "de" maximizes to "de_Latn_US"
+     * "de" maximizes to "de_Latn_DE"
      *
      * "sr" maximizes to "sr_Cyrl_RS"
      *
-     * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
-     *
-     * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+     * "zh_Hani" maximizes to "zh_Hani_CN"
      *
      * @param status  error information if maximizing this Locale failed.
      *                If this Locale is not well-formed, the error code is
@@ -792,7 +791,7 @@ public:
      * there is no Windows LCID value that corresponds to this locale, returns 0.
      * @stable ICU 2.0
      */
-    uint32_t        getLCID(void) const;
+    uint32_t getLCID() const;
 
     /**
      * Returns whether this locale's script is written right-to-left.
@@ -944,7 +943,7 @@ public:
      * Generates a hash code for the locale.
      * @stable ICU 2.0
      */
-    int32_t         hashCode(void) const;
+    int32_t hashCode() const;
 
     /**
      * Sets the locale to bogus
@@ -961,7 +960,7 @@ public:
      * @return false if it is a real locale, true if it is a bogus locale
      * @stable ICU 2.1
      */
-    inline UBool isBogus(void) const;
+    inline UBool isBogus() const;
 
     /**
      * Returns a list of all installed locales.
@@ -1147,7 +1146,7 @@ private:
     /**
      * Initialize the locale cache for commonly used locales
      */
-    static Locale *getLocaleCache(void);
+    static Locale* getLocaleCache();
 
     char language[ULOC_LANG_CAPACITY];
     char script[ULOC_SCRIPT_CAPACITY];
@@ -1184,6 +1183,7 @@ Locale::operator!=(const    Locale&     other) const
 template<typename StringClass> inline StringClass
 Locale::toLanguageTag(UErrorCode& status) const
 {
+    if (U_FAILURE(status)) { return {}; }
     StringClass result;
     StringByteSink<StringClass> sink(&result);
     toLanguageTag(sink, status);
@@ -1211,7 +1211,7 @@ Locale::getScript() const
 inline const char *
 Locale::getVariant() const
 {
-    return &baseName[variantBegin];
+    return fIsBogus ? "" : &baseName[variantBegin];
 }
 
 inline const char *
@@ -1223,6 +1223,7 @@ Locale::getName() const
 template<typename StringClass, typename OutputIterator> inline void
 Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
 {
+    if (U_FAILURE(status)) { return; }
     LocalPointer<StringEnumeration> keys(createKeywords(status));
     if (U_FAILURE(status) || keys.isNull()) {
         return;
@@ -1240,6 +1241,7 @@ Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
 template<typename StringClass, typename OutputIterator> inline void
 Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
 {
+    if (U_FAILURE(status)) { return; }
     LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status));
     if (U_FAILURE(status) || keys.isNull()) {
         return;
@@ -1257,6 +1259,7 @@ Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
 template<typename StringClass> inline StringClass
 Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
 {
+    if (U_FAILURE(status)) { return {}; }
     StringClass result;
     StringByteSink<StringClass> sink(&result);
     getKeywordValue(keywordName, sink, status);
@@ -1266,6 +1269,7 @@ Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
 template<typename StringClass> inline StringClass
 Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const
 {
+    if (U_FAILURE(status)) { return {}; }
     StringClass result;
     StringByteSink<StringClass> sink(&result);
     getUnicodeKeywordValue(keywordName, sink, status);
@@ -1273,7 +1277,7 @@ Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) cons
 }
 
 inline UBool
-Locale::isBogus(void) const {
+Locale::isBogus() const {
     return fIsBogus;
 }
 

+ 12 - 12
thirdparty/icu4c/common/unicode/normlzr.h

@@ -466,7 +466,7 @@ public:
    * @return the current normalized code point
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UChar32              current(void);
+  UChar32 current();
 
   /**
    * Return the first character in the normalized text.
@@ -476,7 +476,7 @@ public:
    * @return the first normalized code point
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UChar32              first(void);
+  UChar32 first();
 
   /**
    * Return the last character in the normalized text.
@@ -486,7 +486,7 @@ public:
    * @return the last normalized code point
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UChar32              last(void);
+  UChar32 last();
 
   /**
    * Return the next character in the normalized text.
@@ -502,7 +502,7 @@ public:
    * @return the next normalized code point
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UChar32              next(void);
+  UChar32 next();
 
   /**
    * Return the previous character in the normalized text and decrement.
@@ -518,7 +518,7 @@ public:
    * @return the previous normalized code point
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UChar32              previous(void);
+  UChar32 previous();
 
   /**
    * Set the iteration position in the input text that is being normalized,
@@ -536,7 +536,7 @@ public:
    * This is equivalent to setIndexOnly(startIndex)).
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  void                reset(void);
+  void reset();
 
   /**
    * Retrieve the current iteration position in the input text that is
@@ -552,7 +552,7 @@ public:
    * @return the current index in the input text
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  int32_t            getIndex(void) const;
+  int32_t getIndex() const;
 
   /**
    * Retrieve the index of the start of the input text. This is the begin index
@@ -562,7 +562,7 @@ public:
    * @return the smallest index in the input text where the Normalizer operates
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  int32_t            startIndex(void) const;
+  int32_t startIndex() const;
 
   /**
    * Retrieve the index of the end of the input text. This is the end index
@@ -574,7 +574,7 @@ public:
    * @return the first index in the input text where the Normalizer does not operate
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  int32_t            endIndex(void) const;
+  int32_t endIndex() const;
 
   /**
    * Returns true when both iterators refer to the same character in the same
@@ -610,7 +610,7 @@ public:
    * @return the hash code
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  int32_t                hashCode(void) const;
+  int32_t hashCode() const;
 
   //-------------------------------------------------------------------------
   // Property access methods
@@ -643,7 +643,7 @@ public:
    * @see #setMode
    * @deprecated ICU 56 Use Normalizer2 instead.
    */
-  UNormalizationMode getUMode(void) const;
+  UNormalizationMode getUMode() const;
 
   /**
    * Set options that affect this <code>Normalizer</code>'s operation.
@@ -749,7 +749,7 @@ private:
   UBool previousNormalize();
 
   void    init();
-  void    clearBuffer(void);
+  void clearBuffer();
 
   //-------------------------------------------------------------------------
   // Private data

+ 2 - 2
thirdparty/icu4c/common/unicode/parsepos.h

@@ -129,7 +129,7 @@ public:
      * @return the current index.
      * @stable ICU 2.0
      */
-    inline int32_t getIndex(void) const;
+    inline int32_t getIndex() const;
 
     /**
      * Set the current parse position.
@@ -152,7 +152,7 @@ public:
      * error index has not been set.
      * @stable ICU 2.0
      */
-    inline int32_t getErrorIndex(void) const;
+    inline int32_t getErrorIndex() const;
 
     /**
      * ICU "poor man's RTTI", returns a UClassID for this class.

+ 15 - 47
thirdparty/icu4c/common/unicode/platform.h

@@ -206,6 +206,17 @@
 #   define U_PLATFORM U_PF_UNKNOWN
 #endif
 
+/**
+ * \def U_REAL_MSVC
+ * Defined if the compiler is the real MSVC compiler (and not something like
+ * Clang setting _MSC_VER in order to compile Windows code that requires it).
+ * Otherwise undefined.
+ * @internal
+ */
+#if (defined(_MSC_VER) && !(defined(__clang__) && __clang__)) || defined(U_IN_DOXYGEN)
+#   define U_REAL_MSVC
+#endif
+
 /**
  * \def CYGWINMSVC
  * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
@@ -302,51 +313,6 @@
 #   define U_PLATFORM_IS_DARWIN_BASED 0
 #endif
 
-/**
- * \def U_HAVE_STDINT_H
- * Defines whether stdint.h is available. It is a C99 standard header.
- * We used to include inttypes.h which includes stdint.h but we usually do not need
- * the additional definitions from inttypes.h.
- * @internal
- */
-#ifdef U_HAVE_STDINT_H
-    /* Use the predefined value. */
-#elif U_PLATFORM_USES_ONLY_WIN32_API
-#   if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
-        /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
-#       define U_HAVE_STDINT_H 1
-#   else
-#       define U_HAVE_STDINT_H 0
-#   endif
-#elif U_PLATFORM == U_PF_SOLARIS
-    /* Solaris has inttypes.h but not stdint.h. */
-#   define U_HAVE_STDINT_H 0
-#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
-    /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
-#   define U_HAVE_STDINT_H 0
-#else
-#   define U_HAVE_STDINT_H 1
-#endif
-
-/**
- * \def U_HAVE_INTTYPES_H
- * Defines whether inttypes.h is available. It is a C99 standard header.
- * We include inttypes.h where it is available but stdint.h is not.
- * @internal
- */
-#ifdef U_HAVE_INTTYPES_H
-    /* Use the predefined value. */
-#elif U_PLATFORM == U_PF_SOLARIS
-    /* Solaris has inttypes.h but not stdint.h. */
-#   define U_HAVE_INTTYPES_H 1
-#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
-    /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
-#   define U_HAVE_INTTYPES_H 1
-#else
-    /* Most platforms have both inttypes.h and stdint.h, or neither. */
-#   define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
-#endif
-
 /*===========================================================================*/
 /** @{ Compiler and environment features                                     */
 /*===========================================================================*/
@@ -507,6 +473,8 @@
     /* Otherwise use the predefined value. */
 #elif !defined(__cplusplus)
 #   define U_CPLUSPLUS_VERSION 0
+#elif __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L)
+#   define U_CPLUSPLUS_VERSION 17
 #elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
 #   define U_CPLUSPLUS_VERSION 14
 #elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
@@ -754,9 +722,9 @@
     /*
      * Notes:
      * C++11 and C11 require support for UTF-16 literals
-     * TODO: Fix for plain C. Doesn't work on Mac.
+     * Doesn't work on Mac C11 (see workaround in ptypes.h).
      */
-#   if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+#   if defined(__cplusplus) || !U_PLATFORM_IS_DARWIN_BASED
 #       define U_HAVE_CHAR16_T 1
 #   else
 #       define U_HAVE_CHAR16_T 0

+ 11 - 75
thirdparty/icu4c/common/unicode/ptypes.h

@@ -42,89 +42,25 @@
 /* NULL, size_t, wchar_t */
 #include <stddef.h>
 
-/*
- * If all compilers provided all of the C99 headers and types,
- * we would just unconditionally #include <stdint.h> here
- * and not need any of the stuff after including platform.h.
- */
-
-/* Find out if we have stdint.h etc. */
+/* More platform-specific definitions. */
 #include "unicode/platform.h"
 
 /*===========================================================================*/
 /* Generic data types                                                        */
 /*===========================================================================*/
 
-/* If your platform does not have the <stdint.h> header, you may
-   need to edit the typedefs in the #else section below.
-   Use #if...#else...#endif with predefined compiler macros if possible. */
-#if U_HAVE_STDINT_H
-
-/*
- * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
- * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
- * which we almost never use, plus stuff like imaxabs() which we never use.
- */
 #include <stdint.h>
 
-#if U_PLATFORM == U_PF_OS390
-/* The features header is needed to get (u)int64_t sometimes. */
-#include <features.h>
-/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
-#if !defined(__uint8_t)
-#define __uint8_t 1
-typedef unsigned char uint8_t;
-#endif
-#endif /* U_PLATFORM == U_PF_OS390 */
-
-#elif U_HAVE_INTTYPES_H
-
-#   include <inttypes.h>
-
-#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
-
-/// \cond
-#if ! U_HAVE_INT8_T
-typedef signed char int8_t;
-#endif
-
-#if ! U_HAVE_UINT8_T
-typedef unsigned char uint8_t;
-#endif
-
-#if ! U_HAVE_INT16_T
-typedef signed short int16_t;
-#endif
-
-#if ! U_HAVE_UINT16_T
-typedef unsigned short uint16_t;
-#endif
-
-#if ! U_HAVE_INT32_T
-typedef signed int int32_t;
-#endif
-
-#if ! U_HAVE_UINT32_T
-typedef unsigned int uint32_t;
+// C++11 and C11 both specify that the data type char16_t should exist, C++11
+// as a keyword and C11 as a typedef in the uchar.h header file, but not all
+// implementations (looking at you, Apple, spring 2024) actually do this, so
+// ICU4C must detect and deal with that.
+#if !defined(__cplusplus) && !defined(U_IN_DOXYGEN)
+#   if U_HAVE_CHAR16_T
+#       include <uchar.h>
+#   else
+        typedef uint16_t char16_t;
+#   endif
 #endif
 
-#if ! U_HAVE_INT64_T
-#ifdef _MSC_VER
-    typedef signed __int64 int64_t;
-#else
-    typedef signed long long int64_t;
-#endif
-#endif
-
-#if ! U_HAVE_UINT64_T
-#ifdef _MSC_VER
-    typedef unsigned __int64 uint64_t;
-#else
-    typedef unsigned long long uint64_t;
-#endif
-#endif
-/// \endcond
-
-#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
-
 #endif /* _PTYPES_H */

+ 14 - 16
thirdparty/icu4c/common/unicode/rbbi.h

@@ -44,7 +44,7 @@ class  UnhandledEngine;
 class  UStack;
 
 
-#ifndef U_HIDE_DRAFT_API
+#ifndef U_HIDE_INTERNAL_API
 /**
  * The ExternalBreakEngine class define an abstract interface for the host environment
  * to provide a low level facility to break text for unicode text in script that the text boundary
@@ -103,7 +103,7 @@ class ExternalBreakEngine : public UObject {
                                int32_t* foundBreaks, int32_t foundBreaksCapacity,
                                UErrorCode& status) const = 0;
 };
-#endif  /* U_HIDE_DRAFT_API */
+#endif  /* U_HIDE_INTERNAL_API */
 
 
 /**
@@ -388,14 +388,14 @@ public:
      * @return A hash code
      *  @stable ICU 2.0
      */
-    virtual int32_t hashCode(void) const;
+    virtual int32_t hashCode() const;
 
     /**
      * Returns the description used to create this iterator
      * @return the description used to create this iterator
      *  @stable ICU 2.0
      */
-    virtual const UnicodeString& getRules(void) const;
+    virtual const UnicodeString& getRules() const;
 
     //=======================================================================
     // BreakIterator overrides
@@ -425,8 +425,7 @@ public:
      * @return An iterator over the text being analyzed.
      * @stable ICU 2.0
      */
-    virtual  CharacterIterator& getText(void) const override;
-
+    virtual CharacterIterator& getText() const override;
 
     /**
       *  Get a UText for the text being analyzed.
@@ -486,14 +485,14 @@ public:
      * @return The offset of the beginning of the text, zero.
      *  @stable ICU 2.0
      */
-    virtual int32_t first(void) override;
+    virtual int32_t first() override;
 
     /**
      * Sets the current iteration position to the end of the text.
      * @return The text's past-the-end offset.
      *  @stable ICU 2.0
      */
-    virtual int32_t last(void) override;
+    virtual int32_t last() override;
 
     /**
      * Advances the iterator either forward or backward the specified number of steps.
@@ -512,14 +511,14 @@ public:
      * @return The position of the first boundary after this one.
      *  @stable ICU 2.0
      */
-    virtual int32_t next(void) override;
+    virtual int32_t next() override;
 
     /**
      * Moves the iterator backwards, to the last boundary preceding this one.
      * @return The position of the last boundary position preceding this one.
      *  @stable ICU 2.0
      */
-    virtual int32_t previous(void) override;
+    virtual int32_t previous() override;
 
     /**
      * Sets the iterator to refer to the first boundary position following
@@ -557,8 +556,7 @@ public:
      * @return The current iteration position.
      * @stable ICU 2.0
      */
-    virtual int32_t current(void) const override;
-
+    virtual int32_t current() const override;
 
     /**
      * Return the status tag from the break rule that determined the boundary at
@@ -629,7 +627,7 @@ public:
      *                  other classes have different class IDs.
      * @stable ICU 2.0
      */
-    virtual UClassID getDynamicClassID(void) const override;
+    virtual UClassID getDynamicClassID() const override;
 
     /**
      * Returns the class ID for this class.  This is useful only for
@@ -642,7 +640,7 @@ public:
      * @return          The class ID for all objects of this class.
      * @stable ICU 2.0
      */
-    static UClassID U_EXPORT2 getStaticClassID(void);
+    static UClassID U_EXPORT2 getStaticClassID();
 
 #ifndef U_FORCE_HIDE_DEPRECATED_API
     /**
@@ -799,7 +797,7 @@ private:
     void dumpTables();
 #endif  /* U_HIDE_INTERNAL_API */
 
-#ifndef U_HIDE_DRAFT_API
+#ifndef U_HIDE_INTERNAL_API
     /**
      * Register a new external break engine. The external break engine will be adopted.
      * Because ICU may choose to cache break engine internally, this must
@@ -811,7 +809,7 @@ private:
      */
     static void U_EXPORT2 registerExternalBreakEngine(
                   ExternalBreakEngine* toAdopt, UErrorCode& status);
-#endif  /* U_HIDE_DRAFT_API */
+#endif  /* U_HIDE_INTERNAL_API */
 
 };
 

+ 8 - 17
thirdparty/icu4c/common/unicode/resbund.h

@@ -209,8 +209,7 @@ public:
      * @return number of resources in a given resource.
      * @stable ICU 2.0
      */
-    int32_t
-      getSize(void) const;
+    int32_t getSize() const;
 
     /**
      * returns a string from a string resource type
@@ -289,16 +288,14 @@ public:
      * @return true if there are more elements, false if there is no more elements
      * @stable ICU 2.0
      */
-    UBool
-      hasNext(void) const;
+    UBool hasNext() const;
 
     /**
      * Resets the internal context of a resource so that iteration starts from the first element.
      *
      * @stable ICU 2.0
      */
-    void
-      resetIterator(void);
+    void resetIterator();
 
     /**
      * Returns the key associated with this resource. Not all the resources have a key - only
@@ -307,8 +304,7 @@ public:
      * @return a key associated to this resource, or nullptr if it doesn't have a key
      * @stable ICU 2.0
      */
-    const char*
-      getKey(void) const;
+    const char* getKey() const;
 
     /**
      * Gets the locale ID of the resource bundle as a string.
@@ -317,9 +313,7 @@ public:
      * @return the locale ID of the resource bundle as a string
      * @stable ICU 2.0
      */
-    const char*
-      getName(void) const;
-
+    const char* getName() const;
 
     /**
      * Returns the type of a resource. Available types are defined in enum UResType
@@ -327,8 +321,7 @@ public:
      * @return type of the given resource.
      * @stable ICU 2.0
      */
-    UResType
-      getType(void) const;
+    UResType getType() const;
 
     /**
      * Returns the next resource in a given resource or nullptr if there are no more resources
@@ -424,8 +417,7 @@ public:
      * @see getVersion
      * @deprecated ICU 2.8 Use getVersion instead.
      */
-    const char*
-      getVersionNumber(void) const;
+    const char* getVersionNumber() const;
 #endif  /* U_HIDE_DEPRECATED_API */
 
     /**
@@ -445,8 +437,7 @@ public:
      * @return a Locale object
      * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
      */
-    const Locale&
-      getLocale(void) const;
+    const Locale& getLocale() const;
 #endif  /* U_HIDE_DEPRECATED_API */
 
     /**

+ 2 - 2
thirdparty/icu4c/common/unicode/schriter.h

@@ -156,14 +156,14 @@ public:
    * @return a class ID for this object.
    * @stable ICU 2.0
    */
-  virtual UClassID         getDynamicClassID(void) const override;
+  virtual UClassID getDynamicClassID() const override;
 
   /**
    * Return a class ID for this class (not really public)
    * @return a class ID for this class
    * @stable ICU 2.0
    */
-  static UClassID   U_EXPORT2 getStaticClassID(void);
+  static UClassID U_EXPORT2 getStaticClassID();
 
 protected:
   /**

+ 1 - 3
thirdparty/icu4c/common/unicode/simpleformatter.h

@@ -25,11 +25,9 @@
 U_NAMESPACE_BEGIN
 
 // Forward declaration:
-namespace number {
-namespace impl {
+namespace number::impl {
 class SimpleModifier;
 }
-}
 
 /**
  * Formats simple patterns like "{1} was born in {0}".

+ 4 - 4
thirdparty/icu4c/common/unicode/stringpiece.h

@@ -130,13 +130,13 @@ class U_COMMON_API StringPiece : public UMemory {
    * @stable ICU 65
    */
   template <typename T,
-            typename = typename std::enable_if<
-                (std::is_same<decltype(T().data()), const char*>::value
+            typename = std::enable_if_t<
+                (std::is_same_v<decltype(T().data()), const char*>
 #if defined(__cpp_char8_t)
-                    || std::is_same<decltype(T().data()), const char8_t*>::value
+                    || std::is_same_v<decltype(T().data()), const char8_t*>
 #endif
                 ) &&
-                std::is_same<decltype(T().size()), size_t>::value>::type>
+                std::is_same_v<decltype(T().size()), size_t>>>
   StringPiece(T str)
       : ptr_(reinterpret_cast<const char*>(str.data())),
         length_(static_cast<int32_t>(str.size())) {}

+ 143 - 2
thirdparty/icu4c/common/unicode/uchar.h

@@ -25,6 +25,7 @@
 #ifndef UCHAR_H
 #define UCHAR_H
 
+#include <stdbool.h>
 #include "unicode/utypes.h"
 #include "unicode/stringoptions.h"
 #include "unicode/ucpmap.h"
@@ -668,12 +669,21 @@ typedef enum UProperty {
      * @stable ICU 63
      */
     UCHAR_VERTICAL_ORIENTATION=0x1018,
+#ifndef U_HIDE_DRAFT_API
+    /**
+     * Enumerated property Identifier_Status.
+     * Used for UTS #39 General Security Profile for Identifiers
+     * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
+     * @draft ICU 75
+     */
+    UCHAR_IDENTIFIER_STATUS=0x1019,
+#endif  // U_HIDE_DRAFT_API
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the last constant for enumerated/integer Unicode properties.
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    UCHAR_INT_LIMIT=0x1019,
+    UCHAR_INT_LIMIT=0x101A,
 #endif  // U_HIDE_DEPRECATED_API
 
     /** Bitmask property General_Category_Mask.
@@ -774,12 +784,28 @@ typedef enum UProperty {
     UCHAR_SCRIPT_EXTENSIONS=0x7000,
     /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
     UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
+#ifndef U_HIDE_DRAFT_API
+    /**
+     * Miscellaneous property Identifier_Type.
+     * Used for UTS #39 General Security Profile for Identifiers
+     * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
+     *
+     * Corresponds to u_hasIDType() and u_getIDTypes().
+     *
+     * Each code point maps to a <i>set</i> of UIdentifierType values.
+     *
+     * @see u_hasIDType
+     * @see u_getIDTypes
+     * @draft ICU 75
+     */
+    UCHAR_IDENTIFIER_TYPE=0x7001,
+#endif  // U_HIDE_DRAFT_API
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the last constant for Unicode properties with unusual value types.
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
+    UCHAR_OTHER_PROPERTY_LIMIT=0x7002,
 #endif  // U_HIDE_DEPRECATED_API
 
     /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
@@ -2701,6 +2727,68 @@ typedef enum UVerticalOrientation {
     U_VO_UPRIGHT,
 } UVerticalOrientation;
 
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Identifier Status constants.
+ * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
+ *
+ * @see UCHAR_IDENTIFIER_STATUS
+ * @draft ICU 75
+ */
+typedef enum UIdentifierStatus {
+    /*
+     * Note: UIdentifierStatus constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_ID_STATUS_<Unicode Identifier_Status value name>
+     */
+
+    /** @draft ICU 75 */
+    U_ID_STATUS_RESTRICTED,
+    /** @draft ICU 75 */
+    U_ID_STATUS_ALLOWED,
+} UIdentifierStatus;
+
+/**
+ * Identifier Type constants.
+ * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
+ *
+ * @see UCHAR_IDENTIFIER_TYPE
+ * @draft ICU 75
+ */
+typedef enum UIdentifierType {
+    /*
+     * Note: UIdentifierType constants are parsed by preparseucd.py.
+     * It matches lines like
+     *     U_ID_TYPE_<Unicode Identifier_Type value name>
+     */
+
+    /** @draft ICU 75 */
+    U_ID_TYPE_NOT_CHARACTER,
+    /** @draft ICU 75 */
+    U_ID_TYPE_DEPRECATED,
+    /** @draft ICU 75 */
+    U_ID_TYPE_DEFAULT_IGNORABLE,
+    /** @draft ICU 75 */
+    U_ID_TYPE_NOT_NFKC,
+    /** @draft ICU 75 */
+    U_ID_TYPE_NOT_XID,
+    /** @draft ICU 75 */
+    U_ID_TYPE_EXCLUSION,
+    /** @draft ICU 75 */
+    U_ID_TYPE_OBSOLETE,
+    /** @draft ICU 75 */
+    U_ID_TYPE_TECHNICAL,
+    /** @draft ICU 75 */
+    U_ID_TYPE_UNCOMMON_USE,
+    /** @draft ICU 75 */
+    U_ID_TYPE_LIMITED_USE,
+    /** @draft ICU 75 */
+    U_ID_TYPE_INCLUSION,
+    /** @draft ICU 75 */
+    U_ID_TYPE_RECOMMENDED,
+} UIdentifierType;
+#endif  // U_HIDE_DRAFT_API
+
 /**
  * Check a binary Unicode property for a code point.
  *
@@ -3905,6 +3993,59 @@ u_isIDStart(UChar32 c);
 U_CAPI UBool U_EXPORT2
 u_isIDPart(UChar32 c);
 
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Does the set of Identifier_Type values code point c contain the given type?
+ *
+ * Used for UTS #39 General Security Profile for Identifiers
+ * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
+ *
+ * Each code point maps to a <i>set</i> of UIdentifierType values.
+ *
+ * @param c code point
+ * @param type Identifier_Type to check
+ * @return true if type is in Identifier_Type(c)
+ * @draft ICU 75
+ */
+U_CAPI bool U_EXPORT2
+u_hasIDType(UChar32 c, UIdentifierType type);
+
+/**
+ * Writes code point c's Identifier_Type as a list of UIdentifierType values
+ * to the output types array and returns the number of types.
+ *
+ * Used for UTS #39 General Security Profile for Identifiers
+ * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
+ *
+ * Each code point maps to a <i>set</i> of UIdentifierType values.
+ * There is always at least one type.
+ * The order of output values is undefined.
+ * Each type is output at most once;
+ * there cannot be more output values than UIdentifierType constants.
+ * In addition, only some of the types can be combined with others,
+ * and usually only a small number of types occur together.
+ * Future versions might add additional types.
+ * See UTS #39 and its data files for details.
+ *
+ * If there are more than capacity types to be written, then
+ * U_BUFFER_OVERFLOW_ERROR is set and the number of types is returned.
+ * (Usual ICU buffer handling behavior.)
+ *
+ * @param c code point
+ * @param types output array
+ * @param capacity capacity of the array
+ * @param pErrorCode Standard ICU error code. Its input value must
+ *                   pass the U_SUCCESS() test, or else the function returns
+ *                   immediately. Check for U_FAILURE() on output or use with
+ *                   function chaining. (See User Guide for details.)
+ * @return number of values in c's Identifier_Type,
+ *         written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
+ * @draft ICU 75
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode);
+#endif  // U_HIDE_DRAFT_API
+
 /**
  * Determines if the specified character should be regarded
  * as an ignorable character in an identifier,

+ 17 - 17
thirdparty/icu4c/common/unicode/uchriter.h

@@ -126,7 +126,7 @@ public:
    * @return the hash code.
    * @stable ICU 2.0
    */
-  virtual int32_t         hashCode(void) const override;
+  virtual int32_t hashCode() const override;
 
   /**
    * Returns a new UCharCharacterIterator referring to the same
@@ -144,7 +144,7 @@ public:
    * @return the first code unit in its iteration range.
    * @stable ICU 2.0
    */
-  virtual char16_t         first(void) override;
+  virtual char16_t first() override;
 
   /**
    * Sets the iterator to refer to the first code unit in its
@@ -154,7 +154,7 @@ public:
    * @return the first code unit in its iteration range
    * @stable ICU 2.0
    */
-  virtual char16_t         firstPostInc(void) override;
+  virtual char16_t firstPostInc() override;
 
   /**
    * Sets the iterator to refer to the first code point in its
@@ -165,7 +165,7 @@ public:
    * @return the first code point in its iteration range
    * @stable ICU 2.0
    */
-  virtual UChar32       first32(void) override;
+  virtual UChar32 first32() override;
 
   /**
    * Sets the iterator to refer to the first code point in its
@@ -175,7 +175,7 @@ public:
    * @return the first code point in its iteration range.
    * @stable ICU 2.0
    */
-  virtual UChar32       first32PostInc(void) override;
+  virtual UChar32 first32PostInc() override;
 
   /**
    * Sets the iterator to refer to the last code unit in its
@@ -184,7 +184,7 @@ public:
    * @return the last code unit in its iteration range.
    * @stable ICU 2.0
    */
-  virtual char16_t         last(void) override;
+  virtual char16_t last() override;
 
   /**
    * Sets the iterator to refer to the last code point in its
@@ -193,7 +193,7 @@ public:
    * @return the last code point in its iteration range.
    * @stable ICU 2.0
    */
-  virtual UChar32       last32(void) override;
+  virtual UChar32 last32() override;
 
   /**
    * Sets the iterator to refer to the "position"-th code unit
@@ -223,14 +223,14 @@ public:
    * @return the code unit the iterator currently refers to.
    * @stable ICU 2.0
    */
-  virtual char16_t         current(void) const override;
+  virtual char16_t current() const override;
 
   /**
    * Returns the code point the iterator currently refers to.
    * @return the code point the iterator currently refers to.
    * @stable ICU 2.0
    */
-  virtual UChar32       current32(void) const override;
+  virtual UChar32 current32() const override;
 
   /**
    * Advances to the next code unit in the iteration range (toward
@@ -239,7 +239,7 @@ public:
    * @return the next code unit in the iteration range.
    * @stable ICU 2.0
    */
-  virtual char16_t         next(void) override;
+  virtual char16_t next() override;
 
   /**
    * Gets the current code unit for returning and advances to the next code unit
@@ -249,7 +249,7 @@ public:
    * @return the current code unit.
    * @stable ICU 2.0
    */
-  virtual char16_t         nextPostInc(void) override;
+  virtual char16_t nextPostInc() override;
 
   /**
    * Advances to the next code point in the iteration range (toward
@@ -261,7 +261,7 @@ public:
    * @return the next code point in the iteration range.
    * @stable ICU 2.0
    */
-  virtual UChar32       next32(void) override;
+  virtual UChar32 next32() override;
 
   /**
    * Gets the current code point for returning and advances to the next code point
@@ -271,7 +271,7 @@ public:
    * @return the current point.
    * @stable ICU 2.0
    */
-  virtual UChar32       next32PostInc(void) override;
+  virtual UChar32 next32PostInc() override;
 
   /**
    * Returns false if there are no more code units or code points
@@ -291,7 +291,7 @@ public:
    * @return the previous code unit in the iteration range.
    * @stable ICU 2.0
    */
-  virtual char16_t         previous(void) override;
+  virtual char16_t previous() override;
 
   /**
    * Advances to the previous code point in the iteration range (toward
@@ -300,7 +300,7 @@ public:
    * @return the previous code point in the iteration range.
    * @stable ICU 2.0
    */
-  virtual UChar32       previous32(void) override;
+  virtual UChar32 previous32() override;
 
   /**
    * Returns false if there are no more code units or code points
@@ -363,14 +363,14 @@ public:
    * @return a class ID for this class
    * @stable ICU 2.0
    */
-  static UClassID         U_EXPORT2 getStaticClassID(void);
+  static UClassID U_EXPORT2 getStaticClassID();
 
   /**
    * Return a class ID for this object (not really public)
    * @return a class ID for this object.
    * @stable ICU 2.0
    */
-  virtual UClassID        getDynamicClassID(void) const override;
+  virtual UClassID getDynamicClassID() const override;
 
 protected:
   /**

+ 0 - 3
thirdparty/icu4c/common/unicode/ucnv.h

@@ -344,8 +344,6 @@ ucnv_compareNames(const char *name1, const char *name2);
  * other than its an alias starting with the letters "cp". Please do not
  * associate any meaning to these aliases.</p>
  *
- * \snippet samples/ucnv/convsamp.cpp ucnv_open
- *
  * @param converterName Name of the coded character set table.
  *          This may have options appended to the string.
  *          IANA alias character set names, IBM CCSIDs starting with "ibm-",
@@ -1986,7 +1984,6 @@ ucnv_usesFallback(const UConverter *cnv);
  * instead of the input signature bytes.
  * <p>
  * Usage:
- * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
  *
  * @param source            The source string in which the signature should be detected.
  * @param sourceLength      Length of the input string, or -1 if terminated with a NUL byte.

+ 11 - 0
thirdparty/icu4c/common/unicode/uconfig.h

@@ -413,6 +413,17 @@
 #   define UCONFIG_NO_FORMATTING 0
 #endif
 
+/**
+ * \def UCONFIG_NO_MF2
+ * This switch turns off the experimental MessageFormat 2.0 API.
+ *
+ * @internal ICU 75 technology preview
+ * @deprecated This API is for technology preview only.
+ */
+#ifndef UCONFIG_NO_MF2
+#   define UCONFIG_NO_MF2 0
+#endif
+
 /**
  * \def UCONFIG_NO_TRANSLITERATION
  * This switch turns off transliteration.

+ 21 - 4
thirdparty/icu4c/common/unicode/uloc.h

@@ -399,6 +399,9 @@ uloc_setDefault(const char* localeID,
 /**
  * Gets the language code for the specified locale.
  *
+ * This function may return with a failure error code for certain kinds of inputs
+ * but does not fully check for well-formed locale IDs / language tags.
+ *
  * @param localeID the locale to get the ISO language code with
  * @param language the language code for localeID
  * @param languageCapacity the size of the language buffer to store the  
@@ -417,6 +420,9 @@ uloc_getLanguage(const char*    localeID,
 /**
  * Gets the script code for the specified locale.
  *
+ * This function may return with a failure error code for certain kinds of inputs
+ * but does not fully check for well-formed locale IDs / language tags.
+ *
  * @param localeID the locale to get the ISO language code with
  * @param script the language code for localeID
  * @param scriptCapacity the size of the language buffer to store the  
@@ -435,6 +441,9 @@ uloc_getScript(const char*    localeID,
 /**
  * Gets the  country code for the specified locale.
  *
+ * This function may return with a failure error code for certain kinds of inputs
+ * but does not fully check for well-formed locale IDs / language tags.
+ *
  * @param localeID the locale to get the country code with
  * @param country the country code for localeID
  * @param countryCapacity the size of the country buffer to store the  
@@ -453,6 +462,9 @@ uloc_getCountry(const char*    localeID,
 /**
  * Gets the variant code for the specified locale.
  *
+ * This function may return with a failure error code for certain kinds of inputs
+ * but does not fully check for well-formed locale IDs / language tags.
+ *
  * @param localeID the locale to get the variant code with
  * @param variant the variant code for localeID
  * @param variantCapacity the size of the variant buffer to store the 
@@ -471,6 +483,10 @@ uloc_getVariant(const char*    localeID,
 
 /**
  * Gets the full name for the specified locale.
+ *
+ * This function may return with a failure error code for certain kinds of inputs
+ * but does not fully check for well-formed locale IDs / language tags.
+ *
  * Note: This has the effect of 'canonicalizing' the ICU locale ID to
  * a certain extent. Upper and lower case are set as needed.
  * It does NOT map aliased names in any way.
@@ -1158,19 +1174,20 @@ uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
  *
  * If localeID is already in the maximal form, or there is no data available
  * for maximization, it will be copied to the output buffer.  For example,
- * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ * "sh" cannot be maximized, since there is no reasonable maximization.
  *
  * Examples:
  *
+ * "und_Zzzz" maximizes to "en_Latn_US"
+ *
  * "en" maximizes to "en_Latn_US"
  *
- * "de" maximizes to "de_Latn_US"
+ * "de" maximizes to "de_Latn_DE"
  *
  * "sr" maximizes to "sr_Cyrl_RS"
  *
- * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ * "zh_Hani" maximizes to "zh_Hani_CN"
  *
- * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
  *
  * @param localeID The locale to maximize
  * @param maximizedLocaleID The maximized locale

+ 1 - 1
thirdparty/icu4c/common/unicode/ulocbuilder.h

@@ -74,7 +74,7 @@ typedef struct ULocaleBuilder ULocaleBuilder;
  * @draft ICU 74
  */
 U_CAPI ULocaleBuilder* U_EXPORT2
-ulocbld_open();
+ulocbld_open(void);
 
 /**
  * Close the builder and destroy it's internal states.

+ 3 - 11
thirdparty/icu4c/common/unicode/umachine.h

@@ -370,22 +370,14 @@ typedef int8_t UBool;
 #if 1
     // #if 1 is normal. UChar defaults to char16_t in C++.
     // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
-    // The intltest Makefile #defines UCHAR_TYPE=char16_t,
-    // so we only #define it to uint16_t if it is undefined so far.
-#elif !defined(UCHAR_TYPE)
+#else
 #   define UCHAR_TYPE uint16_t
 #endif
 
-#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
-        defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
-    // Inside the ICU library code, never configurable.
+#if defined(U_ALL_IMPLEMENTATION) || !defined(UCHAR_TYPE)
     typedef char16_t UChar;
-#elif defined(UCHAR_TYPE)
-    typedef UCHAR_TYPE UChar;
-#elif U_CPLUSPLUS_VERSION != 0
-    typedef char16_t UChar;  // C++
 #else
-    typedef uint16_t UChar;  // C
+    typedef UCHAR_TYPE UChar;
 #endif
 
 /**

+ 2 - 2
thirdparty/icu4c/common/unicode/unifunct.h

@@ -83,7 +83,7 @@ public:
      * @return          The class ID for all objects of this class.
      * @stable ICU 2.0
      */
-    static UClassID U_EXPORT2 getStaticClassID(void);
+    static UClassID U_EXPORT2 getStaticClassID();
 
     /**
      * Returns a unique class ID <b>polymorphically</b>.  This method
@@ -100,7 +100,7 @@ public:
      * different class IDs.
      * @stable ICU 2.4
      */
-    virtual UClassID getDynamicClassID(void) const override = 0;
+    virtual UClassID getDynamicClassID() const override = 0;
 
     /**
      * Set the data object associated with this functor.  The data

+ 10 - 10
thirdparty/icu4c/common/unicode/uniset.h

@@ -333,7 +333,7 @@ public:
      * @see setToBogus()
      * @stable ICU 4.0
      */
-    inline UBool isBogus(void) const;
+    inline UBool isBogus() const;
 
     /**
      * Make this UnicodeSet object invalid.
@@ -522,7 +522,7 @@ public:
      * @see Object#hashCode()
      * @stable ICU 2.0
      */
-    virtual int32_t hashCode(void) const;
+    virtual int32_t hashCode() const;
 
     /**
      * Get a UnicodeSet pointer from a USet
@@ -792,7 +792,7 @@ public:
      * @stable ICU 2.0
      * @see getRangeCount
      */
-    virtual int32_t size(void) const;
+    virtual int32_t size() const;
 
     /**
      * Returns <tt>true</tt> if this set contains no elements.
@@ -800,7 +800,7 @@ public:
      * @return <tt>true</tt> if this set contains no elements.
      * @stable ICU 2.0
      */
-    virtual UBool isEmpty(void) const;
+    virtual UBool isEmpty() const;
 
     /**
      * @return true if this set contains multi-character strings or the empty string.
@@ -1394,7 +1394,7 @@ public:
      * A frozen set will not be modified.
      * @stable ICU 2.0
      */
-    virtual UnicodeSet& clear(void);
+    virtual UnicodeSet& clear();
 
     /**
      * Close this set over the given attribute.  For the attribute
@@ -1440,7 +1440,7 @@ public:
      * @see #getRangeEnd
      * @stable ICU 2.4
      */
-    virtual int32_t getRangeCount(void) const;
+    virtual int32_t getRangeCount() const;
 
     /**
      * Iteration method that returns the first character in the
@@ -1529,7 +1529,7 @@ public:
      * @return          The class ID for all objects of this class.
      * @stable ICU 2.0
      */
-    static UClassID U_EXPORT2 getStaticClassID(void);
+    static UClassID U_EXPORT2 getStaticClassID();
 
     /**
      * Implement UnicodeFunctor API.
@@ -1539,9 +1539,9 @@ public:
      * different class IDs.
      * @stable ICU 2.4
      */
-    virtual UClassID getDynamicClassID(void) const override;
+    virtual UClassID getDynamicClassID() const override;
 
-private:
+  private:
 
     // Private API for the USet API
 
@@ -1602,7 +1602,7 @@ private:
 
     bool ensureBufferCapacity(int32_t newLen);
 
-    void swapBuffers(void);
+    void swapBuffers();
 
     UBool allocateStrings(UErrorCode &status);
     int32_t stringsSize() const;

+ 24 - 26
thirdparty/icu4c/common/unicode/unistr.h

@@ -1611,9 +1611,9 @@ public:
    * @stable ICU 2.0
    */
   inline int32_t extract(int32_t start,
-                 int32_t startLength,
-                 char *target,
-                 const char *codepage = 0) const;
+                         int32_t startLength,
+                         char* target,
+                         const char* codepage = nullptr) const;
 
   /**
    * Copy the characters in the range
@@ -1759,7 +1759,7 @@ public:
    * @see countChar32
    * @stable ICU 2.0
    */
-  inline int32_t length(void) const;
+  inline int32_t length() const;
 
   /**
    * Count Unicode code points in the length char16_t code units of the string.
@@ -1808,7 +1808,7 @@ public:
    * @return true if this string contains 0 characters, false otherwise.
    * @stable ICU 2.0
    */
-  inline UBool isEmpty(void) const;
+  inline UBool isEmpty() const;
 
   /**
    * Return the capacity of the internal buffer of the UnicodeString object.
@@ -1819,7 +1819,7 @@ public:
    * @see getBuffer
    * @stable ICU 2.0
    */
-  inline int32_t getCapacity(void) const;
+  inline int32_t getCapacity() const;
 
   /* Other operations */
 
@@ -1828,7 +1828,7 @@ public:
    * @return The hash code of this UnicodeString.
    * @stable ICU 2.0
    */
-  inline int32_t hashCode(void) const;
+  inline int32_t hashCode() const;
 
   /**
    * Determine if this object contains a valid string.
@@ -1842,8 +1842,7 @@ public:
    * @see setToBogus()
    * @stable ICU 2.0
    */
-  inline UBool isBogus(void) const;
-
+  inline UBool isBogus() const;
 
   //========================================
   // Write operations
@@ -2624,8 +2623,7 @@ public:
    * @return a reference to this
    * @stable ICU 2.0
    */
-  UnicodeString& trim(void);
-
+  UnicodeString& trim();
 
   /* Miscellaneous operations */
 
@@ -2634,7 +2632,7 @@ public:
    * @return a reference to this
    * @stable ICU 2.0
    */
-  inline UnicodeString& reverse(void);
+  inline UnicodeString& reverse();
 
   /**
    * Reverse the range [`start`, `start + length`) in
@@ -2653,7 +2651,7 @@ public:
    * @return A reference to this.
    * @stable ICU 2.0
    */
-  UnicodeString& toUpper(void);
+  UnicodeString& toUpper();
 
   /**
    * Convert the characters in this to UPPER CASE following the conventions of
@@ -2670,7 +2668,7 @@ public:
    * @return A reference to this.
    * @stable ICU 2.0
    */
-  UnicodeString& toLower(void);
+  UnicodeString& toLower();
 
   /**
    * Convert the characters in this to lower case following the conventions of
@@ -3590,12 +3588,12 @@ private:
                int32_t length);
 
   // calculate hash code
-  int32_t doHashCode(void) const;
+  int32_t doHashCode() const;
 
   // get pointer to start of array
   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
-  inline char16_t* getArrayStart(void);
-  inline const char16_t* getArrayStart(void) const;
+  inline char16_t* getArrayStart();
+  inline const char16_t* getArrayStart() const;
 
   inline UBool hasShortLength() const;
   inline int32_t getShortLength() const;
@@ -3622,7 +3620,7 @@ private:
   UBool allocate(int32_t capacity);
 
   // release the array if owned
-  void releaseArray(void);
+  void releaseArray();
 
   // turn a bogus string into an empty one
   void unBogus();
@@ -3684,10 +3682,10 @@ private:
    * Return false if memory could not be allocated.
    */
   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
-                            int32_t growCapacity = -1,
-                            UBool doCopyArray = true,
-                            int32_t **pBufferToDelete = 0,
-                            UBool forceClone = false);
+                           int32_t growCapacity = -1,
+                           UBool doCopyArray = true,
+                           int32_t** pBufferToDelete = nullptr,
+                           UBool forceClone = false);
 
   /**
    * Common function for UnicodeString case mappings.
@@ -3702,9 +3700,9 @@ private:
           UStringCaseMapper *stringCaseMapper);
 
   // ref counting
-  void addRef(void);
-  int32_t removeRef(void);
-  int32_t refCount(void) const;
+  void addRef();
+  int32_t removeRef();
+  int32_t refCount() const;
 
   // constants
   enum {
@@ -4510,7 +4508,7 @@ UnicodeString::extract(int32_t start,
 
 {
   // This dstSize value will be checked explicitly
-  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+  return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
 }
 
 #endif

+ 8 - 1
thirdparty/icu4c/common/unicode/urename.h

@@ -272,6 +272,7 @@
 #define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
 #define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter)
 #define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure)
+#define u_getIDTypes U_ICU_ENTRY_POINT_RENAME(u_getIDTypes)
 #define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment)
 #define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap)
 #define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
@@ -289,6 +290,7 @@
 #define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
 #define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
 #define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
+#define u_hasIDType U_ICU_ENTRY_POINT_RENAME(u_hasIDType)
 #define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
 #define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
 #define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
@@ -1192,16 +1194,20 @@
 #define ulocimp_canonicalize U_ICU_ENTRY_POINT_RENAME(ulocimp_canonicalize)
 #define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag)
 #define ulocimp_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocimp_getBaseName)
-#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry)
 #define ulocimp_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywordValue)
 #define ulocimp_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywords)
 #define ulocimp_getKnownCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_getKnownCanonicalizedLocaleForTest)
 #define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage)
 #define ulocimp_getName U_ICU_ENTRY_POINT_RENAME(ulocimp_getName)
+#define ulocimp_getParent U_ICU_ENTRY_POINT_RENAME(ulocimp_getParent)
+#define ulocimp_getRegion U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegion)
 #define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData)
 #define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript)
+#define ulocimp_getSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_getSubtags)
+#define ulocimp_getVariant U_ICU_ENTRY_POINT_RENAME(ulocimp_getVariant)
 #define ulocimp_isCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_isCanonicalizedLocaleForTest)
 #define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags)
+#define ulocimp_setKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_setKeywordValue)
 #define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
 #define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
 #define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag)
@@ -1800,6 +1806,7 @@
 #define usnum_multiplyByPowerOfTen U_ICU_ENTRY_POINT_RENAME(usnum_multiplyByPowerOfTen)
 #define usnum_openForInt64 U_ICU_ENTRY_POINT_RENAME(usnum_openForInt64)
 #define usnum_roundTo U_ICU_ENTRY_POINT_RENAME(usnum_roundTo)
+#define usnum_setMaximumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMaximumIntegerDigits)
 #define usnum_setMinimumFractionDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumFractionDigits)
 #define usnum_setMinimumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumIntegerDigits)
 #define usnum_setSign U_ICU_ENTRY_POINT_RENAME(usnum_setSign)

+ 4 - 1
thirdparty/icu4c/common/unicode/uscript.h

@@ -500,6 +500,9 @@ typedef enum UScriptCode {
       /** @stable ICU 72 */
       USCRIPT_NAG_MUNDARI                   = 199,/* Nagm */
 
+      /** @stable ICU 75 */
+      USCRIPT_ARABIC_NASTALIQ               = 200, /* Aran */
+
 #ifndef U_HIDE_DEPRECATED_API
     /**
      * One more than the highest normal UScriptCode value.
@@ -507,7 +510,7 @@ typedef enum UScriptCode {
      *
      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
      */
-    USCRIPT_CODE_LIMIT    = 200
+    USCRIPT_CODE_LIMIT    = 201
 #endif  // U_HIDE_DEPRECATED_API
 } UScriptCode;
 

+ 1 - 3
thirdparty/icu4c/common/unicode/uset.h

@@ -109,7 +109,6 @@ enum {
      */
     USET_ADD_CASE_MAPPINGS = 4,
 
-#ifndef U_HIDE_DRAFT_API
     /**
      * Enable case insensitive matching.
      * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
@@ -120,10 +119,9 @@ enum {
      * regular expression implementations where only Simple_Case_Folding mappings are used,
      * such as in ECMAScript (JavaScript) regular expressions.
      *
-     * @draft ICU 73
+     * @stable ICU 73
      */
     USET_SIMPLE_CASE_INSENSITIVE = 6
-#endif  // U_HIDE_DRAFT_API
 };
 
 /**

Some files were not shown because too many files changed in this diff