2
0

script_iterator.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /**************************************************************************/
  2. /* script_iterator.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "script_iterator.h"
  31. // This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
  32. inline constexpr UChar32 ZERO_WIDTH_JOINER = 0x200d;
  33. inline constexpr UChar32 VARIATION_SELECTOR_15 = 0xfe0e;
  34. inline constexpr UChar32 VARIATION_SELECTOR_16 = 0xfe0f;
  35. inline constexpr UChar32 COMBINING_ENCLOSING_KEYCAP = 0x20e3;
  36. inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
  37. return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
  38. }
  39. inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {
  40. if (p_next == VARIATION_SELECTOR_15 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
  41. return false;
  42. } else if (p_next == VARIATION_SELECTOR_16 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
  43. return true;
  44. } else {
  45. return u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR);
  46. }
  47. }
  48. ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
  49. struct ParenStackEntry {
  50. int pair_index;
  51. UScriptCode script_code;
  52. };
  53. struct EmojiSubrunEntry {
  54. int start;
  55. int end;
  56. };
  57. if (p_start >= p_length) {
  58. p_start = p_length - 1;
  59. }
  60. if (p_start < 0) {
  61. p_start = 0;
  62. }
  63. int paren_size = PAREN_STACK_DEPTH;
  64. ParenStackEntry starter_paren_stack[PAREN_STACK_DEPTH];
  65. ParenStackEntry *paren_stack = starter_paren_stack;
  66. int emoji_size = EMOJI_STACK_DEPTH;
  67. EmojiSubrunEntry starter_emoji_stack[EMOJI_STACK_DEPTH];
  68. EmojiSubrunEntry *emoji_stack = starter_emoji_stack;
  69. int script_start;
  70. int script_end = p_start;
  71. UScriptCode script_code;
  72. int paren_sp = -1;
  73. int start_sp = paren_sp;
  74. UErrorCode err = U_ZERO_ERROR;
  75. const char32_t *str = p_string.ptr();
  76. do {
  77. script_code = USCRIPT_COMMON;
  78. int emoji_sp = -1;
  79. bool emoji_run = false;
  80. for (script_start = script_end; script_end < p_length; script_end++) {
  81. UChar32 ch = str[script_end];
  82. UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;
  83. if (is_emoji(ch, n)) {
  84. if (!emoji_run) {
  85. emoji_run = true;
  86. emoji_sp++;
  87. if (unlikely(emoji_sp >= emoji_size)) {
  88. emoji_size += EMOJI_STACK_DEPTH;
  89. if (emoji_stack == starter_emoji_stack) {
  90. emoji_stack = static_cast<EmojiSubrunEntry *>(memalloc(emoji_size * sizeof(EmojiSubrunEntry)));
  91. } else {
  92. emoji_stack = static_cast<EmojiSubrunEntry *>(memrealloc(emoji_stack, emoji_size * sizeof(EmojiSubrunEntry)));
  93. }
  94. }
  95. emoji_stack[emoji_sp].start = script_end;
  96. emoji_stack[emoji_sp].end = script_end;
  97. }
  98. } else if (emoji_run && ch != ZERO_WIDTH_JOINER && ch != VARIATION_SELECTOR_16 && ch != COMBINING_ENCLOSING_KEYCAP && !(u_hasBinaryProperty(ch, UCHAR_EXTENDED_PICTOGRAPHIC) && n != VARIATION_SELECTOR_15)) {
  99. emoji_run = false;
  100. emoji_stack[emoji_sp].end = script_end;
  101. }
  102. UScriptCode sc = uscript_getScript(ch, &err);
  103. if (U_FAILURE(err)) {
  104. if (paren_stack != starter_paren_stack) {
  105. memfree(paren_stack);
  106. }
  107. ERR_FAIL_MSG(u_errorName(err));
  108. }
  109. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
  110. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
  111. // If it's an open character, push it onto the stack.
  112. paren_sp++;
  113. if (unlikely(paren_sp >= paren_size)) {
  114. // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
  115. paren_size += PAREN_STACK_DEPTH;
  116. if (paren_stack == starter_paren_stack) {
  117. paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));
  118. } else {
  119. paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));
  120. }
  121. }
  122. paren_stack[paren_sp].pair_index = ch;
  123. paren_stack[paren_sp].script_code = script_code;
  124. } else if (paren_sp >= 0) {
  125. // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
  126. UChar32 paired_ch = u_getBidiPairedBracket(ch);
  127. while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {
  128. paren_sp -= 1;
  129. }
  130. if (paren_sp < start_sp) {
  131. start_sp = paren_sp;
  132. }
  133. if (paren_sp >= 0) {
  134. sc = paren_stack[paren_sp].script_code;
  135. }
  136. }
  137. }
  138. if (same_script(script_code, sc)) {
  139. if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
  140. script_code = sc;
  141. // Now that we have a final script code, fix any open characters we pushed before we knew the script code.
  142. while (start_sp < paren_sp) {
  143. paren_stack[++start_sp].script_code = script_code;
  144. }
  145. }
  146. if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {
  147. // If this character is a close paired character pop the matching open character from the stack.
  148. paren_sp -= 1;
  149. if (start_sp >= 0) {
  150. start_sp -= 1;
  151. }
  152. }
  153. } else {
  154. break;
  155. }
  156. }
  157. if (emoji_run) {
  158. emoji_stack[emoji_sp].end = script_end;
  159. }
  160. for (int sub = 0; sub <= emoji_sp; sub++) {
  161. if (emoji_stack[sub].start > script_start) {
  162. ScriptRange rng;
  163. rng.script = hb_icu_script_to_script(script_code);
  164. rng.start = script_start;
  165. rng.end = emoji_stack[sub].start;
  166. script_ranges.push_back(rng);
  167. }
  168. ScriptRange rng;
  169. rng.script = (hb_script_t)HB_TAG('Z', 's', 'y', 'e');
  170. rng.start = emoji_stack[sub].start;
  171. rng.end = emoji_stack[sub].end;
  172. script_ranges.push_back(rng);
  173. script_start = emoji_stack[sub].end;
  174. }
  175. if (script_start != script_end) {
  176. ScriptRange rng;
  177. rng.script = hb_icu_script_to_script(script_code);
  178. rng.start = script_start;
  179. rng.end = script_end;
  180. script_ranges.push_back(rng);
  181. }
  182. if (emoji_stack != starter_emoji_stack) {
  183. memfree(emoji_stack);
  184. }
  185. } while (script_end < p_length);
  186. if (paren_stack != starter_paren_stack) {
  187. memfree(paren_stack);
  188. }
  189. }