script_iterator.cpp 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /**************************************************************************/
  2. /* script_iterator.cpp */
  3. /**************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* https://godotengine.org */
  7. /**************************************************************************/
  8. /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
  9. /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
  10. /* */
  11. /* Permission is hereby granted, free of charge, to any person obtaining */
  12. /* a copy of this software and associated documentation files (the */
  13. /* "Software"), to deal in the Software without restriction, including */
  14. /* without limitation the rights to use, copy, modify, merge, publish, */
  15. /* distribute, sublicense, and/or sell copies of the Software, and to */
  16. /* permit persons to whom the Software is furnished to do so, subject to */
  17. /* the following conditions: */
  18. /* */
  19. /* The above copyright notice and this permission notice shall be */
  20. /* included in all copies or substantial portions of the Software. */
  21. /* */
  22. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  23. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  24. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
  25. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  26. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  27. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  28. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  29. /**************************************************************************/
  30. #include "script_iterator.h"
  31. // This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
  32. inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
  33. return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
  34. }
  35. inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {
  36. if (p_next == 0xFE0E) { // Variation Selector-15
  37. return false;
  38. } else if (p_next == 0xFE0F) { // Variation Selector-16
  39. return true;
  40. } else {
  41. return u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC);
  42. }
  43. }
  44. ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
  45. struct ParenStackEntry {
  46. int pair_index;
  47. UScriptCode script_code;
  48. };
  49. if (p_start >= p_length) {
  50. p_start = p_length - 1;
  51. }
  52. if (p_start < 0) {
  53. p_start = 0;
  54. }
  55. int paren_size = PAREN_STACK_DEPTH;
  56. ParenStackEntry *paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));
  57. int script_start;
  58. int script_end = p_start;
  59. UScriptCode script_code;
  60. int paren_sp = -1;
  61. int start_sp = paren_sp;
  62. UErrorCode err = U_ZERO_ERROR;
  63. const char32_t *str = p_string.ptr();
  64. do {
  65. script_code = USCRIPT_COMMON;
  66. for (script_start = script_end; script_end < p_length; script_end++) {
  67. UChar32 ch = str[script_end];
  68. UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;
  69. UScriptCode sc = uscript_getScript(ch, &err);
  70. if (U_FAILURE(err)) {
  71. memfree(paren_stack);
  72. ERR_FAIL_MSG(u_errorName(err));
  73. }
  74. if (is_emoji(ch, n)) {
  75. sc = USCRIPT_SYMBOLS_EMOJI;
  76. }
  77. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
  78. if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
  79. // If it's an open character, push it onto the stack.
  80. paren_sp++;
  81. if (unlikely(paren_sp >= paren_size)) {
  82. // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
  83. paren_size += PAREN_STACK_DEPTH;
  84. paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));
  85. }
  86. paren_stack[paren_sp].pair_index = ch;
  87. paren_stack[paren_sp].script_code = script_code;
  88. } else if (paren_sp >= 0) {
  89. // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
  90. UChar32 paired_ch = u_getBidiPairedBracket(ch);
  91. while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {
  92. paren_sp -= 1;
  93. }
  94. if (paren_sp < start_sp) {
  95. start_sp = paren_sp;
  96. }
  97. if (paren_sp >= 0) {
  98. sc = paren_stack[paren_sp].script_code;
  99. }
  100. }
  101. }
  102. if (script_code == USCRIPT_SYMBOLS_EMOJI && script_code != sc) {
  103. UCharCategory cat = (UCharCategory)u_charType(ch);
  104. if ((cat >= U_SPACE_SEPARATOR && cat <= U_CONTROL_CHAR) || (cat >= U_DASH_PUNCTUATION && cat <= U_OTHER_PUNCTUATION) || (cat >= U_INITIAL_PUNCTUATION && cat <= U_FINAL_PUNCTUATION)) {
  105. break;
  106. }
  107. } else if (same_script(script_code, sc)) {
  108. if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
  109. script_code = sc;
  110. // Now that we have a final script code, fix any open characters we pushed before we knew the script code.
  111. while (start_sp < paren_sp) {
  112. paren_stack[++start_sp].script_code = script_code;
  113. }
  114. }
  115. if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {
  116. // If this character is a close paired character pop the matching open character from the stack.
  117. paren_sp -= 1;
  118. if (start_sp >= 0) {
  119. start_sp -= 1;
  120. }
  121. }
  122. } else {
  123. break;
  124. }
  125. }
  126. ScriptRange rng;
  127. rng.script = hb_icu_script_to_script(script_code);
  128. rng.start = script_start;
  129. rng.end = script_end;
  130. script_ranges.push_back(rng);
  131. } while (script_end < p_length);
  132. memfree(paren_stack);
  133. }