2
0

script_iterator.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. /**************************************************************************/
  2. /* script_iterator.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "script_iterator.h"
  31. // This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
  32. inline constexpr UChar32 ZERO_WIDTH_JOINER = 0x200d;
  33. inline constexpr UChar32 VARIATION_SELECTOR_15 = 0xfe0e;
  34. inline constexpr UChar32 VARIATION_SELECTOR_16 = 0xfe0f;
  35. inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
  36. return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
  37. }
  38. inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {
  39. if (p_next == VARIATION_SELECTOR_15 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
  40. return false;
  41. } else if (p_next == VARIATION_SELECTOR_16 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
  42. return true;
  43. } else {
  44. return u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR);
  45. }
  46. }
  47. ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
  48. struct ParenStackEntry {
  49. int pair_index;
  50. UScriptCode script_code;
  51. };
  52. struct EmojiSubrunEntry {
  53. int start;
  54. int end;
  55. };
  56. if (p_start >= p_length) {
  57. p_start = p_length - 1;
  58. }
  59. if (p_start < 0) {
  60. p_start = 0;
  61. }
  62. int paren_size = PAREN_STACK_DEPTH;
  63. ParenStackEntry starter_paren_stack[PAREN_STACK_DEPTH];
  64. ParenStackEntry *paren_stack = starter_paren_stack;
  65. int emoji_size = EMOJI_STACK_DEPTH;
  66. EmojiSubrunEntry starter_emoji_stack[EMOJI_STACK_DEPTH];
  67. EmojiSubrunEntry *emoji_stack = starter_emoji_stack;
  68. int script_start;
  69. int script_end = p_start;
  70. UScriptCode script_code;
  71. int paren_sp = -1;
  72. int start_sp = paren_sp;
  73. UErrorCode err = U_ZERO_ERROR;
  74. const char32_t *str = p_string.ptr();
  75. do {
  76. script_code = USCRIPT_COMMON;
  77. int emoji_sp = -1;
  78. bool emoji_run = false;
  79. for (script_start = script_end; script_end < p_length; script_end++) {
  80. UChar32 ch = str[script_end];
  81. UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;
  82. if (is_emoji(ch, n)) {
  83. if (!emoji_run) {
  84. emoji_run = true;
  85. emoji_sp++;
  86. if (unlikely(emoji_sp >= emoji_size)) {
  87. emoji_size += EMOJI_STACK_DEPTH;
  88. if (emoji_stack == starter_emoji_stack) {
  89. emoji_stack = static_cast<EmojiSubrunEntry *>(memalloc(emoji_size * sizeof(EmojiSubrunEntry)));
  90. } else {
  91. emoji_stack = static_cast<EmojiSubrunEntry *>(memrealloc(emoji_stack, emoji_size * sizeof(EmojiSubrunEntry)));
  92. }
  93. }
  94. emoji_stack[emoji_sp].start = script_end;
  95. emoji_stack[emoji_sp].end = script_end;
  96. }
  97. } else if (emoji_run && ch != ZERO_WIDTH_JOINER && ch != VARIATION_SELECTOR_16 && !(u_hasBinaryProperty(ch, UCHAR_EXTENDED_PICTOGRAPHIC) && n != VARIATION_SELECTOR_15)) {
  98. emoji_run = false;
  99. emoji_stack[emoji_sp].end = script_end;
  100. }
  101. UScriptCode sc = uscript_getScript(ch, &err);
  102. if (U_FAILURE(err)) {
  103. if (paren_stack != starter_paren_stack) {
  104. memfree(paren_stack);
  105. }
  106. ERR_FAIL_MSG(u_errorName(err));
  107. }
  108. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
  109. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
  110. // If it's an open character, push it onto the stack.
  111. paren_sp++;
  112. if (unlikely(paren_sp >= paren_size)) {
  113. // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
  114. paren_size += PAREN_STACK_DEPTH;
  115. if (paren_stack == starter_paren_stack) {
  116. paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));
  117. } else {
  118. paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));
  119. }
  120. }
  121. paren_stack[paren_sp].pair_index = ch;
  122. paren_stack[paren_sp].script_code = script_code;
  123. } else if (paren_sp >= 0) {
  124. // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
  125. UChar32 paired_ch = u_getBidiPairedBracket(ch);
  126. while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {
  127. paren_sp -= 1;
  128. }
  129. if (paren_sp < start_sp) {
  130. start_sp = paren_sp;
  131. }
  132. if (paren_sp >= 0) {
  133. sc = paren_stack[paren_sp].script_code;
  134. }
  135. }
  136. }
  137. if (same_script(script_code, sc)) {
  138. if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
  139. script_code = sc;
  140. // Now that we have a final script code, fix any open characters we pushed before we knew the script code.
  141. while (start_sp < paren_sp) {
  142. paren_stack[++start_sp].script_code = script_code;
  143. }
  144. }
  145. if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {
  146. // If this character is a close paired character pop the matching open character from the stack.
  147. paren_sp -= 1;
  148. if (start_sp >= 0) {
  149. start_sp -= 1;
  150. }
  151. }
  152. } else {
  153. break;
  154. }
  155. }
  156. if (emoji_run) {
  157. emoji_stack[emoji_sp].end = script_end;
  158. }
  159. for (int sub = 0; sub <= emoji_sp; sub++) {
  160. if (emoji_stack[sub].start > script_start) {
  161. ScriptRange rng;
  162. rng.script = hb_icu_script_to_script(script_code);
  163. rng.start = script_start;
  164. rng.end = emoji_stack[sub].start;
  165. script_ranges.push_back(rng);
  166. }
  167. ScriptRange rng;
  168. rng.script = (hb_script_t)HB_TAG('Z', 's', 'y', 'e');
  169. rng.start = emoji_stack[sub].start;
  170. rng.end = emoji_stack[sub].end;
  171. script_ranges.push_back(rng);
  172. script_start = emoji_stack[sub].end;
  173. }
  174. if (script_start != script_end) {
  175. ScriptRange rng;
  176. rng.script = hb_icu_script_to_script(script_code);
  177. rng.start = script_start;
  178. rng.end = script_end;
  179. script_ranges.push_back(rng);
  180. }
  181. if (emoji_stack != starter_emoji_stack) {
  182. memfree(emoji_stack);
  183. }
  184. } while (script_end < p_length);
  185. if (paren_stack != starter_paren_stack) {
  186. memfree(paren_stack);
  187. }
  188. }