Browse Source

Merge pull request #56192 from bruvzg/tts2.0

Rémi Verschelde 3 years ago
parent
commit
d25c3aaaa7
54 changed files with 3962 additions and 2 deletions
  1. 1 1
      .github/workflows/linux_builds.yml
  2. 102 0
      doc/classes/DisplayServer.xml
  3. 8 0
      doc/classes/TextServer.xml
  4. 8 0
      doc/classes/TextServerExtension.xml
  5. 60 0
      modules/text_server_adv/text_server_adv.cpp
  6. 4 0
      modules/text_server_adv/text_server_adv.h
  7. 24 1
      modules/text_server_fb/text_server_fb.cpp
  8. 2 0
      modules/text_server_fb/text_server_fb.h
  9. 1 0
      platform/android/SCsub
  10. 30 0
      platform/android/display_server_android.cpp
  11. 9 0
      platform/android/display_server_android.h
  12. 3 0
      platform/android/java/lib/src/org/godotengine/godot/Godot.java
  13. 5 0
      platform/android/java/lib/src/org/godotengine/godot/GodotLib.java
  14. 298 0
      platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java
  15. 55 0
      platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java
  16. 6 0
      platform/android/java_godot_lib_jni.cpp
  17. 1 0
      platform/android/java_godot_lib_jni.h
  18. 189 0
      platform/android/tts_android.cpp
  19. 67 0
      platform/android/tts_android.h
  20. 1 0
      platform/iphone/SCsub
  21. 11 0
      platform/iphone/display_server_iphone.h
  22. 40 0
      platform/iphone/display_server_iphone.mm
  23. 59 0
      platform/iphone/tts_ios.h
  24. 164 0
      platform/iphone/tts_ios.mm
  25. 86 0
      platform/javascript/display_server_javascript.cpp
  26. 16 0
      platform/javascript/display_server_javascript.h
  27. 10 0
      platform/javascript/godot_js.h
  28. 90 0
      platform/javascript/js/libs/library_godot_display.js
  29. 3 0
      platform/linuxbsd/SCsub
  30. 8 0
      platform/linuxbsd/detect.py
  31. 49 0
      platform/linuxbsd/display_server_x11.cpp
  32. 19 0
      platform/linuxbsd/display_server_x11.h
  33. 881 0
      platform/linuxbsd/speechd-so_wrap.c
  34. 330 0
      platform/linuxbsd/speechd-so_wrap.h
  35. 261 0
      platform/linuxbsd/tts_linux.cpp
  36. 78 0
      platform/linuxbsd/tts_linux.h
  37. 1 0
      platform/osx/SCsub
  38. 11 0
      platform/osx/display_server_osx.h
  39. 41 0
      platform/osx/display_server_osx.mm
  40. 66 0
      platform/osx/tts_osx.h
  41. 266 0
      platform/osx/tts_osx.mm
  42. 1 0
      platform/windows/SCsub
  43. 2 0
      platform/windows/detect.py
  44. 43 0
      platform/windows/display_server_windows.cpp
  45. 12 0
      platform/windows/display_server_windows.h
  46. 269 0
      platform/windows/tts_windows.cpp
  47. 80 0
      platform/windows/tts_windows.h
  48. 94 0
      servers/display_server.cpp
  49. 36 0
      servers/display_server.h
  50. 10 0
      servers/text/text_server_extension.cpp
  51. 3 0
      servers/text/text_server_extension.h
  52. 2 0
      servers/text_server.cpp
  53. 3 0
      servers/text_server.h
  54. 43 0
      tests/servers/test_text_server.h

+ 1 - 1
.github/workflows/linux_builds.yml

@@ -87,7 +87,7 @@ jobs:
           sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \
               libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \
               libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \
-              llvm
+              llvm libspeechd-dev speech-dispatcher
 
       - name: Setup Godot build cache
         uses: ./.github/actions/godot-cache

+ 102 - 0
doc/classes/DisplayServer.xml

@@ -814,6 +814,93 @@
 				[b]Note:[/b] This method is implemented on Windows.
 			</description>
 		</method>
+		<method name="tts_get_voices" qualifiers="const">
+			<return type="Array" />
+			<description>
+				Returns an [Array] of voice information dictionaries.
+				Each [Dictionary] contains two [String] entries:
+				- [code]name[/code] is voice name.
+				- [code]id[/code] is voice identifier.
+				- [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_get_voices_for_language" qualifiers="const">
+			<return type="PackedStringArray" />
+			<argument index="0" name="language" type="String" />
+			<description>
+				Returns an [PackedStringArray] of voice identifiers for the [code]language[/code].
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_paused" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is in a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_speaking" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_pause">
+			<return type="void" />
+			<description>
+				Puts the synthesizer into a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_resume">
+			<return type="void" />
+			<description>
+				Resumes the synthesizer if it was paused.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_set_utterance_callback">
+			<return type="void" />
+			<argument index="0" name="event" type="int" enum="DisplayServer.TTSUtteranceEvent" />
+			<argument index="1" name="callable" type="Callable" />
+			<description>
+				Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary.
+				- [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id.
+				- [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id.
+				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_speak">
+			<return type="void" />
+			<argument index="0" name="text" type="String" />
+			<argument index="1" name="voice" type="String" />
+			<argument index="2" name="volume" type="int" default="50" />
+			<argument index="3" name="pitch" type="float" default="1.0" />
+			<argument index="4" name="rate" type="float" default="1.0" />
+			<argument index="5" name="utterance_id" type="int" default="0" />
+			<argument index="6" name="interrupt" type="bool" default="false" />
+			<description>
+				Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first.
+				- [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language].
+				- [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest).
+				- [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice.
+				- [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative.
+				- [code]utterance_id[/code] is passed as a parameter to the callback functions.
+				[b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
+				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_stop">
+			<return type="void" />
+			<description>
+				Stops synthesis in progress and removes all utterances from the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
 		<method name="virtual_keyboard_get_height" qualifiers="const">
 			<return type="int" />
 			<description>
@@ -1184,6 +1271,9 @@
 		</constant>
 		<constant name="FEATURE_CLIPBOARD_PRIMARY" value="18" enum="Feature">
 		</constant>
+		<constant name="FEATURE_TEXT_TO_SPEECH" value="19" enum="Feature">
+			Display server supports text-to-speech. See [code]tts_*[/code] methods.
+		</constant>
 		<constant name="MOUSE_MODE_VISIBLE" value="0" enum="MouseMode">
 			Makes the mouse cursor visible if it is hidden.
 		</constant>
@@ -1335,5 +1425,17 @@
 			- MacOS: [code]NSView*[/code] for the window main view.
 			- iOS: [code]UIView*[/code] for the window main view.
 		</constant>
+		<constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent">
+			Utterance has begun to be spoken.
+		</constant>
+		<constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent">
+			Utterance was successfully finished.
+		</constant>
+		<constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent">
+			Utterance was canceled, or TTS service was unable to process it.
+		</constant>
+		<constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent">
+			Utterance reached a word or sentence boundary.
+		</constant>
 	</constants>
 </class>

+ 8 - 0
doc/classes/TextServer.xml

@@ -1441,6 +1441,14 @@
 				Aligns shaped text to the given tab-stops.
 			</description>
 		</method>
+		<method name="string_get_word_breaks" qualifiers="const">
+			<return type="PackedInt32Array" />
+			<argument index="0" name="string" type="String" />
+			<argument index="1" name="language" type="String" default="&quot;&quot;" />
+			<description>
+				Returns array of the word break character offsets.
+			</description>
+		</method>
 		<method name="string_to_lower" qualifiers="const">
 			<return type="String" />
 			<argument index="0" name="string" type="String" />

+ 8 - 0
doc/classes/TextServerExtension.xml

@@ -1461,6 +1461,14 @@
 				[b]Note:[/b] This method is used by default line/word breaking methods, and its implementation might be omitted if custom line breaking in implemented.
 			</description>
 		</method>
+		<method name="string_get_word_breaks" qualifiers="virtual const">
+			<return type="PackedInt32Array" />
+			<argument index="0" name="string" type="String" />
+			<argument index="1" name="language" type="String" />
+			<description>
+				Returns array of the word break character offsets.
+			</description>
+		</method>
 		<method name="string_to_lower" qualifiers="virtual const">
 			<return type="String" />
 			<argument index="0" name="string" type="String" />

+ 60 - 0
modules/text_server_adv/text_server_adv.cpp

@@ -3255,6 +3255,19 @@ void TextServerAdvanced::font_set_global_oversampling(double p_oversampling) {
 /* Shaped text buffer interface                                          */
 /*************************************************************************/
 
+int64_t TextServerAdvanced::_convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const {
+	int64_t limit = p_pos;
+	if (p_utf32.length() != p_utf16.length()) {
+		const UChar *data = p_utf16.ptr();
+		for (int i = 0; i < p_pos; i++) {
+			if (U16_IS_LEAD(data[i])) {
+				limit--;
+			}
+		}
+	}
+	return limit;
+}
+
 int64_t TextServerAdvanced::_convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const {
 	int64_t limit = p_pos;
 	if (p_sd->text.length() != p_sd->utf16.length()) {
@@ -5555,6 +5568,53 @@ String TextServerAdvanced::string_to_lower(const String &p_string, const String
 	return String::utf16(lower.ptr(), len);
 }
 
+PackedInt32Array TextServerAdvanced::string_get_word_breaks(const String &p_string, const String &p_language) const {
+	// Convert to UTF-16.
+	Char16String utf16 = p_string.utf16();
+
+	Set<int> breaks;
+	UErrorCode err = U_ZERO_ERROR;
+	UBreakIterator *bi = ubrk_open(UBRK_LINE, p_language.ascii().get_data(), (const UChar *)utf16.ptr(), utf16.length(), &err);
+	if (U_FAILURE(err)) {
+		// No data loaded - use fallback.
+		for (int i = 0; i < p_string.length(); i++) {
+			char32_t c = p_string[i];
+			if (is_whitespace(c) || is_linebreak(c)) {
+				breaks.insert(i);
+			}
+		}
+	} else {
+		while (ubrk_next(bi) != UBRK_DONE) {
+			int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
+			if (pos != p_string.length() - 1) {
+				breaks.insert(pos);
+			}
+		}
+	}
+	ubrk_close(bi);
+
+	PackedInt32Array ret;
+	for (int i = 0; i < p_string.length(); i++) {
+		char32_t c = p_string[i];
+		if (c == 0xfffc) {
+			continue;
+		}
+		if (u_ispunct(c) && c != 0x005F) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_underscore(c)) {
+			ret.push_back(i);
+			continue;
+		}
+		if (breaks.has(i)) {
+			ret.push_back(i);
+			continue;
+		}
+	}
+	return ret;
+}
+
 TextServerAdvanced::TextServerAdvanced() {
 	_insert_num_systems_lang();
 	_insert_feature_sets();

+ 4 - 0
modules/text_server_adv/text_server_adv.h

@@ -393,11 +393,13 @@ class TextServerAdvanced : public TextServerExtension {
 	mutable RID_PtrOwner<ShapedTextDataAdvanced> shaped_owner;
 
 	void _realign(ShapedTextDataAdvanced *p_sd) const;
+	int64_t _convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const;
 	int64_t _convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;
 	int64_t _convert_pos_inv(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;
 	bool _shape_substr(ShapedTextDataAdvanced *p_new_sd, const ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_length) const;
 	void _shape_run(ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_end, hb_script_t p_script, hb_direction_t p_direction, Array p_fonts, int64_t p_span, int64_t p_fb_index);
 	Glyph _shape_single_glyph(ShapedTextDataAdvanced *p_sd, char32_t p_char, hb_script_t p_script, hb_direction_t p_direction, const RID &p_font, int64_t p_font_size);
+
 	_FORCE_INLINE_ void _add_featuers(const Dictionary &p_source, Vector<hb_feature_t> &r_ftrs);
 
 	// HarfBuzz bitmap font interface.
@@ -686,6 +688,8 @@ public:
 	virtual String parse_number(const String &p_string, const String &p_language = "") const override;
 	virtual String percent_sign(const String &p_language = "") const override;
 
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
+
 	virtual String strip_diacritics(const String &p_string) const override;
 
 	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;

+ 24 - 1
modules/text_server_fb/text_server_fb.cpp

@@ -3079,7 +3079,7 @@ bool TextServerFallback::shaped_text_update_breaks(const RID &p_shaped) {
 		if (sd_glyphs[i].count > 0) {
 			char32_t c = sd->text[sd_glyphs[i].start - sd->start];
 			if (c_punct_size == 0) {
-				if (is_punct(c)) {
+				if (is_punct(c) && c != 0x005F) {
 					sd_glyphs[i].flags |= GRAPHEME_IS_PUNCTUATION;
 				}
 			} else {
@@ -3623,6 +3623,29 @@ String TextServerFallback::string_to_lower(const String &p_string, const String
 	return lower;
 }
 
+PackedInt32Array TextServerFallback::string_get_word_breaks(const String &p_string, const String &p_language) const {
+	PackedInt32Array ret;
+	for (int i = 0; i < p_string.length(); i++) {
+		char32_t c = p_string[i];
+		if (c == 0xfffc) {
+			continue;
+		}
+		if (is_punct(c) && c != 0x005F) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_underscore(c)) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_whitespace(c) || is_linebreak(c)) {
+			ret.push_back(i);
+			continue;
+		}
+	}
+	return ret;
+}
+
 TextServerFallback::TextServerFallback() {
 	_insert_feature_sets();
 };

+ 2 - 0
modules/text_server_fb/text_server_fb.h

@@ -573,6 +573,8 @@ public:
 	virtual double shaped_text_get_underline_position(const RID &p_shaped) const override;
 	virtual double shaped_text_get_underline_thickness(const RID &p_shaped) const override;
 
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
+
 	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;
 	virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;
 

+ 1 - 0
platform/android/SCsub

@@ -8,6 +8,7 @@ android_files = [
     "file_access_android.cpp",
     "audio_driver_opensl.cpp",
     "dir_access_jandroid.cpp",
+    "tts_android.cpp",
     "thread_jandroid.cpp",
     "net_socket_android.cpp",
     "java_godot_lib_jni.cpp",

+ 30 - 0
platform/android/display_server_android.cpp

@@ -34,6 +34,7 @@
 #include "java_godot_io_wrapper.h"
 #include "java_godot_wrapper.h"
 #include "os_android.h"
+#include "tts_android.h"
 
 #if defined(VULKAN_ENABLED)
 #include "drivers/vulkan/rendering_device_vulkan.h"
@@ -63,6 +64,7 @@ bool DisplayServerAndroid::has_feature(Feature p_feature) const {
 		case FEATURE_ORIENTATION:
 		case FEATURE_TOUCHSCREEN:
 		case FEATURE_VIRTUAL_KEYBOARD:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default:
 			return false;
@@ -73,6 +75,34 @@ String DisplayServerAndroid::get_name() const {
 	return "Android";
 }
 
+bool DisplayServerAndroid::tts_is_speaking() const {
+	return TTS_Android::is_speaking();
+}
+
+bool DisplayServerAndroid::tts_is_paused() const {
+	return TTS_Android::is_paused();
+}
+
+Array DisplayServerAndroid::tts_get_voices() const {
+	return TTS_Android::get_voices();
+}
+
+void DisplayServerAndroid::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	TTS_Android::speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void DisplayServerAndroid::tts_pause() {
+	TTS_Android::pause();
+}
+
+void DisplayServerAndroid::tts_resume() {
+	TTS_Android::resume();
+}
+
+void DisplayServerAndroid::tts_stop() {
+	TTS_Android::stop();
+}
+
 void DisplayServerAndroid::clipboard_set(const String &p_text) {
 	GodotJavaWrapper *godot_java = OS_Android::get_singleton()->get_godot_java();
 	ERR_FAIL_COND(!godot_java);

+ 9 - 0
platform/android/display_server_android.h

@@ -91,6 +91,15 @@ public:
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	virtual void clipboard_set(const String &p_text) override;
 	virtual String clipboard_get() const override;
 	virtual bool clipboard_has() const override;

+ 3 - 0
platform/android/java/lib/src/org/godotengine/godot/Godot.java

@@ -36,6 +36,7 @@ import static android.content.Context.WINDOW_SERVICE;
 import org.godotengine.godot.input.GodotEditText;
 import org.godotengine.godot.plugin.GodotPlugin;
 import org.godotengine.godot.plugin.GodotPluginRegistry;
+import org.godotengine.godot.tts.GodotTTS;
 import org.godotengine.godot.utils.GodotNetUtils;
 import org.godotengine.godot.utils.PermissionsUtil;
 import org.godotengine.godot.xr.XRMode;
@@ -165,6 +166,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC
 
 	public static GodotIO io;
 	public static GodotNetUtils netUtils;
+	public static GodotTTS tts;
 
 	public interface ResultCallback {
 		void callback(int requestCode, int resultCode, Intent data);
@@ -458,6 +460,7 @@ public class Godot extends Fragment implements SensorEventListener, IDownloaderC
 		io = new GodotIO(activity);
 		GodotLib.io = io;
 		netUtils = new GodotNetUtils(activity);
+		tts = new GodotTTS(activity);
 		mSensorManager = (SensorManager)activity.getSystemService(Context.SENSOR_SERVICE);
 		mAccelerometer = mSensorManager.getDefaultSensor(Sensor.TYPE_ACCELEROMETER);
 		mSensorManager.registerListener(this, mAccelerometer, SensorManager.SENSOR_DELAY_GAME);

+ 5 - 0
platform/android/java/lib/src/org/godotengine/godot/GodotLib.java

@@ -91,6 +91,11 @@ public class GodotLib {
 	 */
 	public static native boolean step();
 
+	/**
+	 * TTS callback.
+	 */
+	public static native void ttsCallback(int event, int id, int pos);
+
 	/**
 	 * Forward touch events from the main thread to the GL thread.
 	 */

+ 298 - 0
platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java

@@ -0,0 +1,298 @@
+/*************************************************************************/
+/*  GodotTTS.java                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+package org.godotengine.godot.tts;
+
+import org.godotengine.godot.GodotLib;
+
+import android.app.Activity;
+import android.os.Bundle;
+import android.speech.tts.TextToSpeech;
+import android.speech.tts.UtteranceProgressListener;
+import android.speech.tts.Voice;
+
+import androidx.annotation.Keep;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Set;
+
+/**
+ * Wrapper for Android Text to Speech API and custom utterance query implementation.
+ * <p>
+ * A [GodotTTS] provides the following features:
+ * <p>
+ * <ul>
+ * <li>Access to the Android Text to Speech API.
+ * <li>Utterance pause / resume functions, unsupported by Android TTS API.
+ * </ul>
+ */
+@Keep
+public class GodotTTS extends UtteranceProgressListener {
+	// Note: These constants must be in sync with DisplayServer::TTSUtteranceEvent enum from "servers/display_server.h".
+	final private static int EVENT_START = 0;
+	final private static int EVENT_END = 1;
+	final private static int EVENT_CANCEL = 2;
+	final private static int EVENT_BOUNDARY = 3;
+
+	final private TextToSpeech synth;
+	final private LinkedList<GodotUtterance> queue;
+	final private Object lock = new Object();
+	private GodotUtterance lastUtterance;
+
+	private boolean speaking;
+	private boolean paused;
+
+	public GodotTTS(Activity p_activity) {
+		synth = new TextToSpeech(p_activity, null);
+		queue = new LinkedList<GodotUtterance>();
+
+		synth.setOnUtteranceProgressListener(this);
+	}
+
+	private void updateTTS() {
+		if (!speaking && queue.size() > 0) {
+			int mode = TextToSpeech.QUEUE_FLUSH;
+			GodotUtterance message = queue.pollFirst();
+
+			Set<Voice> voices = synth.getVoices();
+			for (Voice v : voices) {
+				if (v.getName().equals(message.voice)) {
+					synth.setVoice(v);
+					break;
+				}
+			}
+			synth.setPitch(message.pitch);
+			synth.setSpeechRate(message.rate);
+
+			Bundle params = new Bundle();
+			params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, message.volume / 100.f);
+
+			lastUtterance = message;
+			lastUtterance.start = 0;
+			lastUtterance.offset = 0;
+			paused = false;
+
+			synth.speak(message.text, mode, params, String.valueOf(message.id));
+			speaking = true;
+		}
+	}
+
+	/**
+	 * Called by TTS engine when the TTS service is about to speak the specified range.
+	 */
+	@Override
+	public void onRangeStart(String utteranceId, int start, int end, int frame) {
+		synchronized (lock) {
+			if (lastUtterance != null && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				lastUtterance.offset = start;
+				GodotLib.ttsCallback(EVENT_BOUNDARY, lastUtterance.id, start + lastUtterance.start);
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance was canceled in progress.
+	 */
+	@Override
+	public void onStop(String utteranceId, boolean interrupted) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance has begun to be spoken..
+	 */
+	@Override
+	public void onStart(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && lastUtterance.start == 0 && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_START, lastUtterance.id, 0);
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an utterance was successfully finished.
+	 */
+	@Override
+	public void onDone(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_END, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an error has occurred during processing.
+	 */
+	@Override
+	public void onError(String utteranceId, int errorCode) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Called by TTS engine when an error has occurred during processing (pre API level 21 version).
+	 */
+	@Override
+	public void onError(String utteranceId) {
+		synchronized (lock) {
+			if (lastUtterance != null && !paused && Integer.parseInt(utteranceId) == lastUtterance.id) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+				speaking = false;
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Adds an utterance to the queue.
+	 */
+	public void speak(String text, String voice, int volume, float pitch, float rate, int utterance_id, boolean interrupt) {
+		synchronized (lock) {
+			GodotUtterance message = new GodotUtterance(text, voice, volume, pitch, rate, utterance_id);
+			queue.addLast(message);
+
+			if (isPaused()) {
+				resumeSpeaking();
+			} else {
+				updateTTS();
+			}
+		}
+	}
+
+	/**
+	 * Puts the synthesizer into a paused state.
+	 */
+	public void pauseSpeaking() {
+		synchronized (lock) {
+			if (!paused) {
+				paused = true;
+				synth.stop();
+			}
+		}
+	}
+
+	/**
+	 * Resumes the synthesizer if it was paused.
+	 */
+	public void resumeSpeaking() {
+		synchronized (lock) {
+			if (lastUtterance != null && paused) {
+				int mode = TextToSpeech.QUEUE_FLUSH;
+
+				Set<Voice> voices = synth.getVoices();
+				for (Voice v : voices) {
+					if (v.getName().equals(lastUtterance.voice)) {
+						synth.setVoice(v);
+						break;
+					}
+				}
+				synth.setPitch(lastUtterance.pitch);
+				synth.setSpeechRate(lastUtterance.rate);
+
+				Bundle params = new Bundle();
+				params.putFloat(TextToSpeech.Engine.KEY_PARAM_VOLUME, lastUtterance.volume / 100.f);
+
+				lastUtterance.start = lastUtterance.offset;
+				lastUtterance.offset = 0;
+				paused = false;
+
+				synth.speak(lastUtterance.text.substring(lastUtterance.start), mode, params, String.valueOf(lastUtterance.id));
+				speaking = true;
+			} else {
+				paused = false;
+			}
+		}
+	}
+
+	/**
+	 * Stops synthesis in progress and removes all utterances from the queue.
+	 */
+	public void stopSpeaking() {
+		synchronized (lock) {
+			for (GodotUtterance u : queue) {
+				GodotLib.ttsCallback(EVENT_CANCEL, u.id, 0);
+			}
+			queue.clear();
+
+			if (lastUtterance != null) {
+				GodotLib.ttsCallback(EVENT_CANCEL, lastUtterance.id, 0);
+			}
+			lastUtterance = null;
+
+			paused = false;
+			speaking = false;
+
+			synth.stop();
+		}
+	}
+
+	/**
+	 * Returns voice information.
+	 */
+	public String[] getVoices() {
+		Set<Voice> voices = synth.getVoices();
+		String[] list = new String[voices.size()];
+		int i = 0;
+		for (Voice v : voices) {
+			list[i++] = v.getLocale().toString() + ";" + v.getName();
+		}
+		return list;
+	}
+
+	/**
+	 * Returns true if the synthesizer is generating speech, or have utterance waiting in the queue.
+	 */
+	public boolean isSpeaking() {
+		return speaking;
+	}
+
+	/**
+	 * Returns true if the synthesizer is in a paused state.
+	 */
+	public boolean isPaused() {
+		return paused;
+	}
+}

+ 55 - 0
platform/android/java/lib/src/org/godotengine/godot/tts/GodotUtterance.java

@@ -0,0 +1,55 @@
+/*************************************************************************/
+/*  GodotUtterance.java                                                  */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+package org.godotengine.godot.tts;
+
+/**
+ * A speech request for GodotTTS.
+ */
+class GodotUtterance {
+	final String text;
+	final String voice;
+	final int volume;
+	final float pitch;
+	final float rate;
+	final int id;
+
+	int offset = -1;
+	int start = 0;
+
+	GodotUtterance(String text, String voice, int volume, float pitch, float rate, int id) {
+		this.text = text;
+		this.voice = voice;
+		this.volume = volume;
+		this.pitch = pitch;
+		this.rate = rate;
+		this.id = id;
+	}
+}

+ 6 - 0
platform/android/java_godot_lib_jni.cpp

@@ -49,6 +49,7 @@
 #include "os_android.h"
 #include "string_android.h"
 #include "thread_jandroid.h"
+#include "tts_android.h"
 
 #include <android/input.h>
 #include <unistd.h>
@@ -96,6 +97,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *en
 
 	DirAccessJAndroid::setup(godot_io_java->get_instance());
 	NetSocketAndroid::setup(godot_java->get_member_object("netUtils", "Lorg/godotengine/godot/utils/GodotNetUtils;", env));
+	TTS_Android::setup(godot_java->get_member_object("tts", "Lorg/godotengine/godot/tts/GodotTTS;", env));
 
 	os_android = new OS_Android(godot_java, godot_io_java, p_use_apk_expansion);
 
@@ -213,6 +215,10 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jcl
 	}
 }
 
+JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos) {
+	TTS_Android::_java_utterance_callback(event, id, pos);
+}
+
 JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz) {
 	if (step.get() == -1) {
 		return true;

+ 1 - 0
platform/android/java_godot_lib_jni.h

@@ -43,6 +43,7 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setup(JNIEnv *env, jc
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_resize(JNIEnv *env, jclass clazz, jobject p_surface, jint p_width, jint p_height);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_newcontext(JNIEnv *env, jclass clazz, jobject p_surface);
 JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_step(JNIEnv *env, jclass clazz);
+JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_ttsCallback(JNIEnv *env, jclass clazz, jint event, jint id, jint pos);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_back(JNIEnv *env, jclass clazz);
 void touch_preprocessing(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions, jint buttons_mask = 0, jfloat vertical_factor = 0, jfloat horizontal_factor = 0);
 JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_touch__IIII_3F(JNIEnv *env, jclass clazz, jint input_device, jint ev, jint pointer, jint pointer_count, jfloatArray positions);

+ 189 - 0
platform/android/tts_android.cpp

@@ -0,0 +1,189 @@
+/*************************************************************************/
+/*  tts_android.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_android.h"
+
+#include "java_godot_wrapper.h"
+#include "os_android.h"
+#include "string_android.h"
+#include "thread_jandroid.h"
+
+jobject TTS_Android::tts = 0;
+jclass TTS_Android::cls = 0;
+
+jmethodID TTS_Android::_is_speaking = 0;
+jmethodID TTS_Android::_is_paused = 0;
+jmethodID TTS_Android::_get_voices = 0;
+jmethodID TTS_Android::_speak = 0;
+jmethodID TTS_Android::_pause_speaking = 0;
+jmethodID TTS_Android::_resume_speaking = 0;
+jmethodID TTS_Android::_stop_speaking = 0;
+
+Map<int, Char16String> TTS_Android::ids;
+
+void TTS_Android::setup(jobject p_tts) {
+	JNIEnv *env = get_jni_env();
+
+	tts = env->NewGlobalRef(p_tts);
+
+	jclass c = env->GetObjectClass(tts);
+	cls = (jclass)env->NewGlobalRef(c);
+
+	_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
+	_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
+	_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
+	_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
+	_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
+	_resume_speaking = env->GetMethodID(cls, "resumeSpeaking", "()V");
+	_stop_speaking = env->GetMethodID(cls, "stopSpeaking", "()V");
+}
+
+void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
+	if (ids.has(p_id)) {
+		int pos = 0;
+		if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
+			// Convert position from UTF-16 to UTF-32.
+			const Char16String &string = ids[p_id];
+			for (int i = 0; i < MIN(p_pos, string.length()); i++) {
+				char16_t c = string[i];
+				if ((c & 0xfffffc00) == 0xd800) {
+					i++;
+				}
+				pos++;
+			}
+		} else if ((DisplayServer::TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) {
+			ids.erase(p_id);
+		}
+		DisplayServer::get_singleton()->tts_post_utterance_event((DisplayServer::TTSUtteranceEvent)p_event, p_id, pos);
+	}
+}
+
+bool TTS_Android::is_speaking() {
+	if (_is_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND_V(env == nullptr, false);
+		return env->CallBooleanMethod(tts, _is_speaking);
+	} else {
+		return false;
+	}
+}
+
+bool TTS_Android::is_paused() {
+	if (_is_paused) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND_V(env == nullptr, false);
+		return env->CallBooleanMethod(tts, _is_paused);
+	} else {
+		return false;
+	}
+}
+
+Array TTS_Android::get_voices() {
+	Array list;
+	if (_get_voices) {
+		JNIEnv *env = get_jni_env();
+		ERR_FAIL_COND_V(env == nullptr, list);
+
+		jobject voices_object = env->CallObjectMethod(tts, _get_voices);
+		jobjectArray *arr = reinterpret_cast<jobjectArray *>(&voices_object);
+
+		jsize len = env->GetArrayLength(*arr);
+		for (int i = 0; i < len; i++) {
+			jstring jStr = (jstring)env->GetObjectArrayElement(*arr, i);
+			String str = jstring_to_string(jStr, env);
+			Vector<String> tokens = str.split(";", true, 2);
+			if (tokens.size() == 2) {
+				Dictionary voice_d;
+				voice_d["name"] = tokens[1];
+				voice_d["id"] = tokens[1];
+				voice_d["language"] = tokens[0];
+				list.push_back(voice_d);
+			}
+			env->DeleteLocalRef(jStr);
+		}
+	}
+	return list;
+}
+
+void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.is_empty()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	ids[p_utterance_id] = p_text.utf16();
+
+	if (_speak) {
+		JNIEnv *env = get_jni_env();
+		ERR_FAIL_COND(env == nullptr);
+
+		jstring jStrT = env->NewStringUTF(p_text.utf8().get_data());
+		jstring jStrV = env->NewStringUTF(p_voice.utf8().get_data());
+		env->CallVoidMethod(tts, _speak, jStrT, jStrV, CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, p_interrupt);
+	}
+}
+
+void TTS_Android::pause() {
+	if (_pause_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _pause_speaking);
+	}
+}
+
+void TTS_Android::resume() {
+	if (_resume_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _resume_speaking);
+	}
+}
+
+void TTS_Android::stop() {
+	for (Map<int, Char16String>::Element *E = ids.front(); E; E = E->next()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key());
+	}
+	ids.clear();
+
+	if (_stop_speaking) {
+		JNIEnv *env = get_jni_env();
+
+		ERR_FAIL_COND(env == nullptr);
+		env->CallVoidMethod(tts, _stop_speaking);
+	}
+}

+ 67 - 0
platform/android/tts_android.h

@@ -0,0 +1,67 @@
+/*************************************************************************/
+/*  tts_android.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_ANDROID_H
+#define TTS_ANDROID_H
+
+#include "core/string/ustring.h"
+#include "core/variant/array.h"
+#include "servers/display_server.h"
+
+#include <jni.h>
+
+class TTS_Android {
+	static jobject tts;
+	static jclass cls;
+
+	static jmethodID _is_speaking;
+	static jmethodID _is_paused;
+	static jmethodID _get_voices;
+	static jmethodID _speak;
+	static jmethodID _pause_speaking;
+	static jmethodID _resume_speaking;
+	static jmethodID _stop_speaking;
+
+	static Map<int, Char16String> ids;
+
+public:
+	static void setup(jobject p_tts);
+	static void _java_utterance_callback(int p_event, int p_id, int p_pos);
+
+	static bool is_speaking();
+	static bool is_paused();
+	static Array get_voices();
+	static void speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt);
+	static void pause();
+	static void resume();
+	static void stop();
+};
+
+#endif // TTS_ANDROID_H

+ 1 - 0
platform/iphone/SCsub

@@ -13,6 +13,7 @@ iphone_lib = [
     "display_server_iphone.mm",
     "joypad_iphone.mm",
     "godot_view.mm",
+    "tts_ios.mm",
     "display_layer.mm",
     "godot_app_delegate.m",
     "godot_view_renderer.mm",

+ 11 - 0
platform/iphone/display_server_iphone.h

@@ -58,6 +58,8 @@ class DisplayServerIPhone : public DisplayServer {
 	RenderingDeviceVulkan *rendering_device_vulkan = nullptr;
 #endif
 
+	id tts = nullptr;
+
 	DisplayServer::ScreenOrientation screen_orientation;
 
 	ObjectID window_attached_instance_id;
@@ -123,6 +125,15 @@ public:
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	virtual int get_screen_count() const override;
 	virtual Point2i screen_get_position(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;
 	virtual Size2i screen_get_size(int p_screen = SCREEN_OF_MAIN_WINDOW) const override;

+ 40 - 0
platform/iphone/display_server_iphone.mm

@@ -38,6 +38,7 @@
 #include "ios.h"
 #import "keyboard_input_view.h"
 #include "os_iphone.h"
+#include "tts_ios.h"
 #import "view_controller.h"
 
 #import <Foundation/Foundation.h>
@@ -52,6 +53,9 @@ DisplayServerIPhone *DisplayServerIPhone::get_singleton() {
 DisplayServerIPhone::DisplayServerIPhone(const String &p_rendering_driver, WindowMode p_mode, DisplayServer::VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error) {
 	rendering_driver = p_rendering_driver;
 
+	// Init TTS
+	tts = [[TTS_IOS alloc] init];
+
 #if defined(GLES3_ENABLED)
 	// FIXME: Add support for both OpenGL and Vulkan when OpenGL is implemented
 	// again,
@@ -310,6 +314,7 @@ bool DisplayServerIPhone::has_feature(Feature p_feature) const {
 		case FEATURE_ORIENTATION:
 		case FEATURE_TOUCHSCREEN:
 		case FEATURE_VIRTUAL_KEYBOARD:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default:
 			return false;
@@ -320,6 +325,41 @@ String DisplayServerIPhone::get_name() const {
 	return "iPhone";
 }
 
+bool DisplayServerIPhone::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isSpeaking];
+}
+
+bool DisplayServerIPhone::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isPaused];
+}
+
+Array DisplayServerIPhone::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return [tts getVoices];
+}
+
+void DisplayServerIPhone::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
+}
+
+void DisplayServerIPhone::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	[tts pauseSpeaking];
+}
+
+void DisplayServerIPhone::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	[tts resumeSpeaking];
+}
+
+void DisplayServerIPhone::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	[tts stopSpeaking];
+}
+
 int DisplayServerIPhone::get_screen_count() const {
 	return 1;
 }

+ 59 - 0
platform/iphone/tts_ios.h

@@ -0,0 +1,59 @@
+/*************************************************************************/
+/*  tts_ios.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_IOS_H
+#define TTS_IOS_H
+
+#include <AVFAudio/AVSpeechSynthesis.h>
+
+#include "core/string/ustring.h"
+#include "core/templates/list.h"
+#include "core/templates/map.h"
+#include "core/variant/array.h"
+#include "servers/display_server.h"
+
+@interface TTS_IOS : NSObject <AVSpeechSynthesizerDelegate> {
+	bool speaking;
+	Map<id, int> ids;
+
+	AVSpeechSynthesizer *av_synth;
+	List<DisplayServer::TTSUtterance> queue;
+}
+
+- (void)pauseSpeaking;
+- (void)resumeSpeaking;
+- (void)stopSpeaking;
+- (bool)isSpeaking;
+- (bool)isPaused;
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt;
+- (Array)getVoices;
+@end
+
+#endif // TTS_IOS_H

+ 164 - 0
platform/iphone/tts_ios.mm

@@ -0,0 +1,164 @@
+/*************************************************************************/
+/*  tts_ios.mm                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_ios.h"
+
+@implementation TTS_IOS
+
+- (id)init {
+	self = [super init];
+	self->speaking = false;
+	self->av_synth = [[AVSpeechSynthesizer alloc] init];
+	[self->av_synth setDelegate:self];
+	print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized.");
+	return self;
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance {
+	NSString *string = [utterance speechString];
+
+	// Convert from UTF-16 to UTF-32 position.
+	int pos = 0;
+	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+		unichar c = [string characterAtIndex:i];
+		if ((c & 0xfffffc00) == 0xd800) {
+			i++;
+		}
+		pos++;
+	}
+
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos);
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance {
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance {
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+- (void)update {
+	if (!speaking && queue.size() > 0) {
+		DisplayServer::TTSUtterance &message = queue.front()->get();
+
+		AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+		[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]];
+		if (message.rate > 1.f) {
+			[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)];
+		} else if (message.rate < 1.f) {
+			[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)];
+		}
+		[new_utterance setPitchMultiplier:message.pitch];
+		[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+
+		ids[new_utterance] = message.id;
+		[av_synth speakUtterance:new_utterance];
+
+		queue.pop_front();
+
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id);
+		speaking = true;
+	}
+}
+
+- (void)pauseSpeaking {
+	[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+}
+
+- (void)resumeSpeaking {
+	[av_synth continueSpeaking];
+}
+
+- (void)stopSpeaking {
+	for (DisplayServer::TTSUtterance &message : queue) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	speaking = false;
+}
+
+- (bool)isSpeaking {
+	return speaking || (queue.size() > 0);
+}
+
+- (bool)isPaused {
+	return [av_synth isPaused];
+}
+
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt {
+	if (interrupt) {
+		[self stopSpeaking];
+	}
+
+	if (text.is_empty()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id);
+		return;
+	}
+
+	DisplayServer::TTSUtterance message;
+	message.text = text;
+	message.voice = voice;
+	message.volume = CLAMP(volume, 0, 100);
+	message.pitch = CLAMP(pitch, 0.f, 2.f);
+	message.rate = CLAMP(rate, 0.1f, 10.f);
+	message.id = utterance_id;
+	queue.push_back(message);
+
+	if ([self isPaused]) {
+		[self resumeSpeaking];
+	} else {
+		[self update];
+	}
+}
+
+- (Array)getVoices {
+	Array list;
+	for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) {
+		NSString *voiceIdentifierString = [voice identifier];
+		NSString *voiceLocaleIdentifier = [voice language];
+		NSString *voiceName = [voice name];
+		Dictionary voice_d;
+		voice_d["name"] = String::utf8([voiceName UTF8String]);
+		voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]);
+		voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]);
+		list.push_back(voice_d);
+	}
+	return list;
+}
+
+@end

+ 86 - 0
platform/javascript/display_server_javascript.cpp

@@ -274,6 +274,90 @@ const char *DisplayServerJavaScript::godot2dom_cursor(DisplayServer::CursorShape
 	}
 }
 
+bool DisplayServerJavaScript::tts_is_speaking() const {
+	return godot_js_tts_is_speaking();
+}
+
+bool DisplayServerJavaScript::tts_is_paused() const {
+	return godot_js_tts_is_paused();
+}
+
+void DisplayServerJavaScript::update_voices_callback(int p_size, const char **p_voice) {
+	get_singleton()->voices.clear();
+	for (int i = 0; i < p_size; i++) {
+		Vector<String> tokens = String::utf8(p_voice[i]).split(";", true, 2);
+		if (tokens.size() == 2) {
+			Dictionary voice_d;
+			voice_d["name"] = tokens[1];
+			voice_d["id"] = tokens[1];
+			voice_d["language"] = tokens[0];
+			get_singleton()->voices.push_back(voice_d);
+		}
+	}
+}
+
+Array DisplayServerJavaScript::tts_get_voices() const {
+	godot_js_tts_get_voices(update_voices_callback);
+	return voices;
+}
+
+void DisplayServerJavaScript::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	if (p_interrupt) {
+		tts_stop();
+	}
+
+	if (p_text.is_empty()) {
+		tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	CharString string = p_text.utf8();
+	utterance_ids[p_utterance_id] = string;
+
+	godot_js_tts_speak(string.get_data(), p_voice.utf8().get_data(), CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, DisplayServerJavaScript::_js_utterance_callback);
+}
+
+void DisplayServerJavaScript::tts_pause() {
+	godot_js_tts_pause();
+}
+
+void DisplayServerJavaScript::tts_resume() {
+	godot_js_tts_resume();
+}
+
+void DisplayServerJavaScript::tts_stop() {
+	for (Map<int, CharString>::Element *E = utterance_ids.front(); E; E = E->next()) {
+		tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key());
+	}
+	utterance_ids.clear();
+	godot_js_tts_stop();
+}
+
+void DisplayServerJavaScript::_js_utterance_callback(int p_event, int p_id, int p_pos) {
+	DisplayServerJavaScript *ds = (DisplayServerJavaScript *)DisplayServer::get_singleton();
+	if (ds->utterance_ids.has(p_id)) {
+		int pos = 0;
+		if ((TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
+			// Convert position from UTF-8 to UTF-32.
+			const CharString &string = ds->utterance_ids[p_id];
+			for (int i = 0; i < MIN(p_pos, string.length()); i++) {
+				uint8_t c = string[i];
+				if ((c & 0xe0) == 0xc0) {
+					i += 1;
+				} else if ((c & 0xf0) == 0xe0) {
+					i += 2;
+				} else if ((c & 0xf8) == 0xf0) {
+					i += 3;
+				}
+				pos++;
+			}
+		} else if ((TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) {
+			ds->utterance_ids.erase(p_id);
+		}
+		ds->tts_post_utterance_event((TTSUtteranceEvent)p_event, p_id, pos);
+	}
+}
+
 void DisplayServerJavaScript::cursor_set_shape(CursorShape p_shape) {
 	ERR_FAIL_INDEX(p_shape, CURSOR_MAX);
 	if (cursor_shape == p_shape) {
@@ -755,6 +839,8 @@ bool DisplayServerJavaScript::has_feature(Feature p_feature) const {
 		//case FEATURE_ORIENTATION:
 		case FEATURE_VIRTUAL_KEYBOARD:
 			return godot_js_display_vk_available() != 0;
+		case FEATURE_TEXT_TO_SPEECH:
+			return godot_js_display_tts_available() != 0;
 		default:
 			return false;
 	}

+ 16 - 0
platform/javascript/display_server_javascript.h

@@ -55,6 +55,8 @@ private:
 	EMSCRIPTEN_WEBGL_CONTEXT_HANDLE webgl_ctx = 0;
 #endif
 
+	Map<int, CharString> utterance_ids;
+
 	WindowMode window_mode = WINDOW_MODE_WINDOWED;
 	ObjectID window_attached_instance_id = {};
 
@@ -66,6 +68,8 @@ private:
 	String clipboard;
 	Point2 touches[32];
 
+	Array voices;
+
 	char canvas_id[256] = { 0 };
 	bool cursor_inside_canvas = true;
 	CursorShape cursor_shape = CURSOR_ARROW;
@@ -89,6 +93,7 @@ private:
 	static void vk_input_text_callback(const char *p_text, int p_cursor);
 	static void gamepad_callback(int p_index, int p_connected, const char *p_id, const char *p_guid);
 	void process_joypads();
+	static void _js_utterance_callback(int p_event, int p_id, int p_pos);
 
 	static Vector<String> get_rendering_drivers_func();
 	static DisplayServer *create_func(const String &p_rendering_driver, WindowMode p_window_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error);
@@ -97,6 +102,7 @@ private:
 
 	static void request_quit_callback();
 	static void window_blur_callback();
+	static void update_voices_callback(int p_size, const char **p_voice);
 	static void update_clipboard_callback(const char *p_text);
 	static void send_window_event_callback(int p_notification);
 	static void drop_files_js_callback(char **p_filev, int p_filec);
@@ -115,6 +121,16 @@ public:
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+	// tts
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	// cursor
 	virtual void cursor_set_shape(CursorShape p_shape) override;
 	virtual CursorShape cursor_get_shape() const override;

+ 10 - 0
platform/javascript/godot_js.h

@@ -67,6 +67,15 @@ extern int godot_js_input_gamepad_sample_get(int p_idx, float r_btns[16], int32_
 extern void godot_js_input_paste_cb(void (*p_callback)(const char *p_text));
 extern void godot_js_input_drop_files_cb(void (*p_callback)(char **p_filev, int p_filec));
 
+// TTS
+extern int godot_js_tts_is_speaking();
+extern int godot_js_tts_is_paused();
+extern int godot_js_tts_get_voices(void (*p_callback)(int p_size, const char **p_voices));
+extern void godot_js_tts_speak(const char *p_text, const char *p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, void (*p_callback)(int p_event, int p_id, int p_pos));
+extern void godot_js_tts_pause();
+extern void godot_js_tts_resume();
+extern void godot_js_tts_stop();
+
 // Display
 extern int godot_js_display_screen_dpi_get();
 extern double godot_js_display_pixel_ratio_get();
@@ -109,6 +118,7 @@ extern void godot_js_display_notification_cb(void (*p_callback)(int p_notificati
 
 // Display Virtual Keyboard
 extern int godot_js_display_vk_available();
+extern int godot_js_display_tts_available();
 extern void godot_js_display_vk_cb(void (*p_input)(const char *p_text, int p_cursor));
 extern void godot_js_display_vk_show(const char *p_text, int p_multiline, int p_start, int p_end);
 extern void godot_js_display_vk_hide();

+ 90 - 0
platform/javascript/js/libs/library_godot_display.js

@@ -330,6 +330,91 @@ const GodotDisplay = {
 		return 0;
 	},
 
+	godot_js_tts_is_speaking__sig: 'i',
+	godot_js_tts_is_speaking: function () {
+		return window.speechSynthesis.speaking;
+	},
+
+	godot_js_tts_is_paused__sig: 'i',
+	godot_js_tts_is_paused: function () {
+		return window.speechSynthesis.paused;
+	},
+
+	godot_js_tts_get_voices__sig: 'vi',
+	godot_js_tts_get_voices: function (p_callback) {
+		const func = GodotRuntime.get_func(p_callback);
+		try {
+			const arr = [];
+			const voices = window.speechSynthesis.getVoices();
+			for (let i = 0; i < voices.length; i++) {
+				arr.push(`${voices[i].lang};${voices[i].name}`);
+			}
+			const c_ptr = GodotRuntime.allocStringArray(arr);
+			func(arr.length, c_ptr);
+			GodotRuntime.freeStringArray(c_ptr, arr.length);
+		} catch (e) {
+			// Fail graciously.
+		}
+	},
+
+	godot_js_tts_speak__sig: 'viiiffii',
+	godot_js_tts_speak: function (p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_callback) {
+		const func = GodotRuntime.get_func(p_callback);
+
+		function listener_end(evt) {
+			evt.currentTarget.cb(1 /*TTS_UTTERANCE_ENDED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_start(evt) {
+			evt.currentTarget.cb(0 /*TTS_UTTERANCE_STARTED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_error(evt) {
+			evt.currentTarget.cb(2 /*TTS_UTTERANCE_CANCELED*/, evt.currentTarget.id, 0);
+		}
+
+		function listener_bound(evt) {
+			evt.currentTarget.cb(3 /*TTS_UTTERANCE_BOUNDARY*/, evt.currentTarget.id, evt.charIndex);
+		}
+
+		const utterance = new SpeechSynthesisUtterance(GodotRuntime.parseString(p_text));
+		utterance.rate = p_rate;
+		utterance.pitch = p_pitch;
+		utterance.volume = p_volume / 100.0;
+		utterance.addEventListener('end', listener_end);
+		utterance.addEventListener('start', listener_start);
+		utterance.addEventListener('error', listener_error);
+		utterance.addEventListener('boundary', listener_bound);
+		utterance.id = p_utterance_id;
+		utterance.cb = func;
+		const voice = GodotRuntime.parseString(p_voice);
+		const voices = window.speechSynthesis.getVoices();
+		for (let i = 0; i < voices.length; i++) {
+			if (voices[i].name === voice) {
+				utterance.voice = voices[i];
+				break;
+			}
+		}
+		window.speechSynthesis.resume();
+		window.speechSynthesis.speak(utterance);
+	},
+
+	godot_js_tts_pause__sig: 'v',
+	godot_js_tts_pause: function () {
+		window.speechSynthesis.pause();
+	},
+
+	godot_js_tts_resume__sig: 'v',
+	godot_js_tts_resume: function () {
+		window.speechSynthesis.resume();
+	},
+
+	godot_js_tts_stop__sig: 'v',
+	godot_js_tts_stop: function () {
+		window.speechSynthesis.cancel();
+		window.speechSynthesis.resume();
+	},
+
 	godot_js_display_alert__sig: 'vi',
 	godot_js_display_alert: function (p_text) {
 		window.alert(GodotRuntime.parseString(p_text)); // eslint-disable-line no-alert
@@ -625,6 +710,11 @@ const GodotDisplay = {
 		return GodotDisplayVK.available();
 	},
 
+	godot_js_display_tts_available__sig: 'i',
+	godot_js_display_tts_available: function () {
+		return 'speechSynthesis' in window;
+	},
+
 	godot_js_display_vk_cb__sig: 'vi',
 	godot_js_display_vk_cb: function (p_input_cb) {
 		const input_cb = GodotRuntime.get_func(p_input_cb);

+ 3 - 0
platform/linuxbsd/SCsub

@@ -20,6 +20,9 @@ if "x11" in env and env["x11"]:
         "key_mapping_x11.cpp",
     ]
 
+if "speechd" in env and env["speechd"]:
+    common_linuxbsd.append(["speechd-so_wrap.c", "tts_linux.cpp"])
+
 if "vulkan" in env and env["vulkan"]:
     common_linuxbsd.append("vulkan_context_x11.cpp")
 

+ 8 - 0
platform/linuxbsd/detect.py

@@ -75,6 +75,7 @@ def get_opts():
         BoolVariable("use_msan", "Use LLVM compiler memory sanitizer (MSAN)", False),
         BoolVariable("pulseaudio", "Detect and use PulseAudio", True),
         BoolVariable("dbus", "Detect and use D-Bus to handle screensaver", True),
+        BoolVariable("speechd", "Detect and use Speech Dispatcher for Text-to-Speech support", True),
         BoolVariable("udev", "Use udev for gamepad connection callbacks", True),
         BoolVariable("x11", "Enable X11 display", True),
         BoolVariable("debug_symbols", "Add debugging symbols to release/release_debug builds", True),
@@ -337,6 +338,13 @@ def configure(env):
         else:
             print("Warning: D-Bus development libraries not found. Disabling screensaver prevention.")
 
+    if env["speechd"]:
+        if os.system("pkg-config --exists speech-dispatcher") == 0:  # 0 means found
+            env.Append(CPPDEFINES=["SPEECHD_ENABLED"])
+            env.ParseConfig("pkg-config speech-dispatcher --cflags")  # Only cflags, we dlopen the library.
+        else:
+            print("Warning: Speech Dispatcher development libraries not found. Disabling Text-to-Speech support.")
+
     if platform.system() == "Linux":
         env.Append(CPPDEFINES=["JOYDEV_ENABLED"])
         if env["udev"]:

+ 49 - 0
platform/linuxbsd/display_server_x11.cpp

@@ -139,6 +139,7 @@ bool DisplayServerX11::has_feature(Feature p_feature) const {
 		case FEATURE_KEEP_SCREEN_ON:
 #endif
 		case FEATURE_CLIPBOARD_PRIMARY:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default: {
 		}
@@ -307,6 +308,45 @@ void DisplayServerX11::_flush_mouse_motion() {
 	xi.relative_motion.y = 0;
 }
 
+#ifdef SPEECHD_ENABLED
+
+bool DisplayServerX11::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_speaking();
+}
+
+bool DisplayServerX11::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_paused();
+}
+
+Array DisplayServerX11::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return tts->get_voices();
+}
+
+void DisplayServerX11::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void DisplayServerX11::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	tts->pause();
+}
+
+void DisplayServerX11::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	tts->resume();
+}
+
+void DisplayServerX11::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	tts->stop();
+}
+
+#endif
+
 void DisplayServerX11::mouse_set_mode(MouseMode p_mode) {
 	_THREAD_SAFE_METHOD_
 
@@ -4633,6 +4673,11 @@ DisplayServerX11::DisplayServerX11(const String &p_rendering_driver, WindowMode
 	xdnd_finished = XInternAtom(x11_display, "XdndFinished", False);
 	xdnd_selection = XInternAtom(x11_display, "XdndSelection", False);
 
+#ifdef SPEECHD_ENABLED
+	// Init TTS
+	tts = memnew(TTS_Linux);
+#endif
+
 	//!!!!!!!!!!!!!!!!!!!!!!!!!!
 	//TODO - do Vulkan and OpenGL support checks, driver selection and fallback
 	rendering_driver = p_rendering_driver;
@@ -4985,6 +5030,10 @@ DisplayServerX11::~DisplayServerX11() {
 		memfree(xmbstring);
 	}
 
+#ifdef SPEECHD_ENABLED
+	memdelete(tts);
+#endif
+
 #ifdef DBUS_ENABLED
 	memdelete(screensaver);
 #endif

+ 19 - 0
platform/linuxbsd/display_server_x11.h

@@ -46,6 +46,10 @@
 #include "servers/rendering/renderer_compositor.h"
 #include "servers/rendering_server.h"
 
+#if defined(SPEECHD_ENABLED)
+#include "tts_linux.h"
+#endif
+
 #if defined(GLES3_ENABLED)
 #include "gl_manager_x11.h"
 #endif
@@ -112,6 +116,10 @@ class DisplayServerX11 : public DisplayServer {
 	bool keep_screen_on = false;
 #endif
 
+#ifdef SPEECHD_ENABLED
+	TTS_Linux *tts = nullptr;
+#endif
+
 	struct WindowData {
 		Window x11_window;
 		::XIC xic;
@@ -298,6 +306,17 @@ public:
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+#ifdef SPEECHD_ENABLED
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+#endif
+
 	virtual void mouse_set_mode(MouseMode p_mode) override;
 	virtual MouseMode mouse_get_mode() const override;
 

+ 881 - 0
platform/linuxbsd/speechd-so_wrap.c

@@ -0,0 +1,881 @@
+// This file is generated. Do not edit!
+// see https://github.com/hpvb/dynload-wrapper for details
+// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21
+// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c
+//
+#include <stdint.h>
+
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd
+#define spd_open spd_open_dylibloader_orig_speechd
+#define spd_open2 spd_open2_dylibloader_orig_speechd
+#define spd_close spd_close_dylibloader_orig_speechd
+#define spd_say spd_say_dylibloader_orig_speechd
+#define spd_sayf spd_sayf_dylibloader_orig_speechd
+#define spd_stop spd_stop_dylibloader_orig_speechd
+#define spd_stop_all spd_stop_all_dylibloader_orig_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd
+#define spd_cancel spd_cancel_dylibloader_orig_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd
+#define spd_pause spd_pause_dylibloader_orig_speechd
+#define spd_pause_all spd_pause_all_dylibloader_orig_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd
+#define spd_resume spd_resume_dylibloader_orig_speechd
+#define spd_resume_all spd_resume_all_dylibloader_orig_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd
+#define spd_key spd_key_dylibloader_orig_speechd
+#define spd_char spd_char_dylibloader_orig_speechd
+#define spd_wchar spd_wchar_dylibloader_orig_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd
+#define spd_set_notification spd_set_notification_dylibloader_orig_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd
+#define spd_set_volume spd_set_volume_dylibloader_orig_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd
+#define spd_get_volume spd_get_volume_dylibloader_orig_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd
+#define spd_set_language spd_set_language_dylibloader_orig_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd
+#define spd_get_language spd_get_language_dylibloader_orig_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd
+#define spd_list_modules spd_list_modules_dylibloader_orig_speechd
+#define free_spd_modules free_spd_modules_dylibloader_orig_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd
+#define spd_list_voices spd_list_voices_dylibloader_orig_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd
+#define free_spd_voices free_spd_voices_dylibloader_orig_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd
+#define spd_execute_command spd_execute_command_dylibloader_orig_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd
+#define spd_send_data spd_send_data_dylibloader_orig_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd
+#include <libspeechd.h>
+#undef SPDConnectionAddress__free
+#undef spd_get_default_address
+#undef spd_open
+#undef spd_open2
+#undef spd_close
+#undef spd_say
+#undef spd_sayf
+#undef spd_stop
+#undef spd_stop_all
+#undef spd_stop_uid
+#undef spd_cancel
+#undef spd_cancel_all
+#undef spd_cancel_uid
+#undef spd_pause
+#undef spd_pause_all
+#undef spd_pause_uid
+#undef spd_resume
+#undef spd_resume_all
+#undef spd_resume_uid
+#undef spd_key
+#undef spd_char
+#undef spd_wchar
+#undef spd_sound_icon
+#undef spd_set_voice_type
+#undef spd_set_voice_type_all
+#undef spd_set_voice_type_uid
+#undef spd_get_voice_type
+#undef spd_set_synthesis_voice
+#undef spd_set_synthesis_voice_all
+#undef spd_set_synthesis_voice_uid
+#undef spd_set_data_mode
+#undef spd_set_notification_on
+#undef spd_set_notification_off
+#undef spd_set_notification
+#undef spd_set_voice_rate
+#undef spd_set_voice_rate_all
+#undef spd_set_voice_rate_uid
+#undef spd_get_voice_rate
+#undef spd_set_voice_pitch
+#undef spd_set_voice_pitch_all
+#undef spd_set_voice_pitch_uid
+#undef spd_get_voice_pitch
+#undef spd_set_voice_pitch_range
+#undef spd_set_voice_pitch_range_all
+#undef spd_set_voice_pitch_range_uid
+#undef spd_set_volume
+#undef spd_set_volume_all
+#undef spd_set_volume_uid
+#undef spd_get_volume
+#undef spd_set_punctuation
+#undef spd_set_punctuation_all
+#undef spd_set_punctuation_uid
+#undef spd_set_capital_letters
+#undef spd_set_capital_letters_all
+#undef spd_set_capital_letters_uid
+#undef spd_set_spelling
+#undef spd_set_spelling_all
+#undef spd_set_spelling_uid
+#undef spd_set_language
+#undef spd_set_language_all
+#undef spd_set_language_uid
+#undef spd_get_language
+#undef spd_set_output_module
+#undef spd_set_output_module_all
+#undef spd_set_output_module_uid
+#undef spd_get_message_list_fd
+#undef spd_list_modules
+#undef free_spd_modules
+#undef spd_get_output_module
+#undef spd_list_voices
+#undef spd_list_synthesis_voices
+#undef free_spd_voices
+#undef spd_execute_command_with_list_reply
+#undef spd_execute_command
+#undef spd_execute_command_with_reply
+#undef spd_execute_command_wo_mutex
+#undef spd_send_data
+#undef spd_send_data_wo_mutex
+#include <dlfcn.h>
+#include <stdio.h>
+void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*);
+SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**);
+SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode);
+SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**);
+void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...);
+int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t);
+int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int);
+SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode);
+int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*);
+int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int);
+int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int);
+int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int);
+int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*);
+int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**);
+char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*);
+void (*free_spd_modules_dylibloader_wrapper_speechd)( char**);
+char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*);
+char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**);
+char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**);
+int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+int initialize_speechd(int verbose) {
+  void *handle;
+  char *error;
+  handle = dlopen("libspeechd.so.2", RTLD_LAZY);
+  if (!handle) {
+    if (verbose) {
+      fprintf(stderr, "%s\n", dlerror());
+    }
+    return(1);
+  }
+  dlerror();
+// SPDConnectionAddress__free
+  *(void **) (&SPDConnectionAddress__free_dylibloader_wrapper_speechd) = dlsym(handle, "SPDConnectionAddress__free");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_default_address
+  *(void **) (&spd_get_default_address_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_default_address");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_open
+  *(void **) (&spd_open_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_open2
+  *(void **) (&spd_open2_dylibloader_wrapper_speechd) = dlsym(handle, "spd_open2");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_close
+  *(void **) (&spd_close_dylibloader_wrapper_speechd) = dlsym(handle, "spd_close");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_say
+  *(void **) (&spd_say_dylibloader_wrapper_speechd) = dlsym(handle, "spd_say");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_sayf
+  *(void **) (&spd_sayf_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sayf");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop
+  *(void **) (&spd_stop_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop_all
+  *(void **) (&spd_stop_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_stop_uid
+  *(void **) (&spd_stop_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_stop_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel
+  *(void **) (&spd_cancel_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel_all
+  *(void **) (&spd_cancel_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_cancel_uid
+  *(void **) (&spd_cancel_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_cancel_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause
+  *(void **) (&spd_pause_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause_all
+  *(void **) (&spd_pause_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_pause_uid
+  *(void **) (&spd_pause_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_pause_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume
+  *(void **) (&spd_resume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume_all
+  *(void **) (&spd_resume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_resume_uid
+  *(void **) (&spd_resume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_resume_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_key
+  *(void **) (&spd_key_dylibloader_wrapper_speechd) = dlsym(handle, "spd_key");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_char
+  *(void **) (&spd_char_dylibloader_wrapper_speechd) = dlsym(handle, "spd_char");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_wchar
+  *(void **) (&spd_wchar_dylibloader_wrapper_speechd) = dlsym(handle, "spd_wchar");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_sound_icon
+  *(void **) (&spd_sound_icon_dylibloader_wrapper_speechd) = dlsym(handle, "spd_sound_icon");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type
+  *(void **) (&spd_set_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type_all
+  *(void **) (&spd_set_voice_type_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_type_uid
+  *(void **) (&spd_set_voice_type_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_type_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_type
+  *(void **) (&spd_get_voice_type_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_type");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice
+  *(void **) (&spd_set_synthesis_voice_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice_all
+  *(void **) (&spd_set_synthesis_voice_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_synthesis_voice_uid
+  *(void **) (&spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_synthesis_voice_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_data_mode
+  *(void **) (&spd_set_data_mode_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_data_mode");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification_on
+  *(void **) (&spd_set_notification_on_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_on");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification_off
+  *(void **) (&spd_set_notification_off_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification_off");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_notification
+  *(void **) (&spd_set_notification_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_notification");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate
+  *(void **) (&spd_set_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate_all
+  *(void **) (&spd_set_voice_rate_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_rate_uid
+  *(void **) (&spd_set_voice_rate_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_rate_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_rate
+  *(void **) (&spd_get_voice_rate_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_rate");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch
+  *(void **) (&spd_set_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_all
+  *(void **) (&spd_set_voice_pitch_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_uid
+  *(void **) (&spd_set_voice_pitch_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_voice_pitch
+  *(void **) (&spd_get_voice_pitch_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_voice_pitch");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range
+  *(void **) (&spd_set_voice_pitch_range_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range_all
+  *(void **) (&spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_voice_pitch_range_uid
+  *(void **) (&spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_voice_pitch_range_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume
+  *(void **) (&spd_set_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume_all
+  *(void **) (&spd_set_volume_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_volume_uid
+  *(void **) (&spd_set_volume_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_volume_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_volume
+  *(void **) (&spd_get_volume_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_volume");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation
+  *(void **) (&spd_set_punctuation_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation_all
+  *(void **) (&spd_set_punctuation_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_punctuation_uid
+  *(void **) (&spd_set_punctuation_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_punctuation_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters
+  *(void **) (&spd_set_capital_letters_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters_all
+  *(void **) (&spd_set_capital_letters_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_capital_letters_uid
+  *(void **) (&spd_set_capital_letters_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_capital_letters_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling
+  *(void **) (&spd_set_spelling_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling_all
+  *(void **) (&spd_set_spelling_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_spelling_uid
+  *(void **) (&spd_set_spelling_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_spelling_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language
+  *(void **) (&spd_set_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language_all
+  *(void **) (&spd_set_language_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_language_uid
+  *(void **) (&spd_set_language_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_language_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_language
+  *(void **) (&spd_get_language_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_language");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module
+  *(void **) (&spd_set_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module_all
+  *(void **) (&spd_set_output_module_all_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_all");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_set_output_module_uid
+  *(void **) (&spd_set_output_module_uid_dylibloader_wrapper_speechd) = dlsym(handle, "spd_set_output_module_uid");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_message_list_fd
+  *(void **) (&spd_get_message_list_fd_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_message_list_fd");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_modules
+  *(void **) (&spd_list_modules_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_modules");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// free_spd_modules
+  *(void **) (&free_spd_modules_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_modules");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_get_output_module
+  *(void **) (&spd_get_output_module_dylibloader_wrapper_speechd) = dlsym(handle, "spd_get_output_module");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_voices
+  *(void **) (&spd_list_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_list_synthesis_voices
+  *(void **) (&spd_list_synthesis_voices_dylibloader_wrapper_speechd) = dlsym(handle, "spd_list_synthesis_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// free_spd_voices
+  *(void **) (&free_spd_voices_dylibloader_wrapper_speechd) = dlsym(handle, "free_spd_voices");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_with_list_reply
+  *(void **) (&spd_execute_command_with_list_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_list_reply");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command
+  *(void **) (&spd_execute_command_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_with_reply
+  *(void **) (&spd_execute_command_with_reply_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_with_reply");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_execute_command_wo_mutex
+  *(void **) (&spd_execute_command_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_execute_command_wo_mutex");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_send_data
+  *(void **) (&spd_send_data_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+// spd_send_data_wo_mutex
+  *(void **) (&spd_send_data_wo_mutex_dylibloader_wrapper_speechd) = dlsym(handle, "spd_send_data_wo_mutex");
+  if (verbose) {
+    error = dlerror();
+    if (error != NULL) {
+      fprintf(stderr, "%s\n", error);
+    }
+  }
+return 0;
+}

+ 330 - 0
platform/linuxbsd/speechd-so_wrap.h

@@ -0,0 +1,330 @@
+#ifndef DYLIBLOAD_WRAPPER_SPEECHD
+#define DYLIBLOAD_WRAPPER_SPEECHD
+// This file is generated. Do not edit!
+// see https://github.com/hpvb/dynload-wrapper for details
+// generated by ./dynload-wrapper/generate-wrapper.py 0.3 on 2022-04-28 14:34:21
+// flags: ./dynload-wrapper/generate-wrapper.py --sys-include <libspeechd.h> --include /usr/include/speech-dispatcher/libspeechd.h --soname libspeechd.so.2 --init-name speechd --omit-prefix spd_get_client_list --output-header speechd-so_wrap.h --output-implementation speechd-so_wrap.c
+//
+#include <stdint.h>
+
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_orig_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_orig_speechd
+#define spd_open spd_open_dylibloader_orig_speechd
+#define spd_open2 spd_open2_dylibloader_orig_speechd
+#define spd_close spd_close_dylibloader_orig_speechd
+#define spd_say spd_say_dylibloader_orig_speechd
+#define spd_sayf spd_sayf_dylibloader_orig_speechd
+#define spd_stop spd_stop_dylibloader_orig_speechd
+#define spd_stop_all spd_stop_all_dylibloader_orig_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_orig_speechd
+#define spd_cancel spd_cancel_dylibloader_orig_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_orig_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_orig_speechd
+#define spd_pause spd_pause_dylibloader_orig_speechd
+#define spd_pause_all spd_pause_all_dylibloader_orig_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_orig_speechd
+#define spd_resume spd_resume_dylibloader_orig_speechd
+#define spd_resume_all spd_resume_all_dylibloader_orig_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_orig_speechd
+#define spd_key spd_key_dylibloader_orig_speechd
+#define spd_char spd_char_dylibloader_orig_speechd
+#define spd_wchar spd_wchar_dylibloader_orig_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_orig_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_orig_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_orig_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_orig_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_orig_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_orig_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_orig_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_orig_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_orig_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_orig_speechd
+#define spd_set_notification spd_set_notification_dylibloader_orig_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_orig_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_orig_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_orig_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_orig_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_orig_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_orig_speechd
+#define spd_set_volume spd_set_volume_dylibloader_orig_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_orig_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_orig_speechd
+#define spd_get_volume spd_get_volume_dylibloader_orig_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_orig_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_orig_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_orig_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_orig_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_orig_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_orig_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_orig_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_orig_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_orig_speechd
+#define spd_set_language spd_set_language_dylibloader_orig_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_orig_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_orig_speechd
+#define spd_get_language spd_get_language_dylibloader_orig_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_orig_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_orig_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_orig_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_orig_speechd
+#define spd_list_modules spd_list_modules_dylibloader_orig_speechd
+#define free_spd_modules free_spd_modules_dylibloader_orig_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_orig_speechd
+#define spd_list_voices spd_list_voices_dylibloader_orig_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_orig_speechd
+#define free_spd_voices free_spd_voices_dylibloader_orig_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_orig_speechd
+#define spd_execute_command spd_execute_command_dylibloader_orig_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_orig_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_orig_speechd
+#define spd_send_data spd_send_data_dylibloader_orig_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_orig_speechd
+#include <libspeechd.h>
+#undef SPDConnectionAddress__free
+#undef spd_get_default_address
+#undef spd_open
+#undef spd_open2
+#undef spd_close
+#undef spd_say
+#undef spd_sayf
+#undef spd_stop
+#undef spd_stop_all
+#undef spd_stop_uid
+#undef spd_cancel
+#undef spd_cancel_all
+#undef spd_cancel_uid
+#undef spd_pause
+#undef spd_pause_all
+#undef spd_pause_uid
+#undef spd_resume
+#undef spd_resume_all
+#undef spd_resume_uid
+#undef spd_key
+#undef spd_char
+#undef spd_wchar
+#undef spd_sound_icon
+#undef spd_set_voice_type
+#undef spd_set_voice_type_all
+#undef spd_set_voice_type_uid
+#undef spd_get_voice_type
+#undef spd_set_synthesis_voice
+#undef spd_set_synthesis_voice_all
+#undef spd_set_synthesis_voice_uid
+#undef spd_set_data_mode
+#undef spd_set_notification_on
+#undef spd_set_notification_off
+#undef spd_set_notification
+#undef spd_set_voice_rate
+#undef spd_set_voice_rate_all
+#undef spd_set_voice_rate_uid
+#undef spd_get_voice_rate
+#undef spd_set_voice_pitch
+#undef spd_set_voice_pitch_all
+#undef spd_set_voice_pitch_uid
+#undef spd_get_voice_pitch
+#undef spd_set_voice_pitch_range
+#undef spd_set_voice_pitch_range_all
+#undef spd_set_voice_pitch_range_uid
+#undef spd_set_volume
+#undef spd_set_volume_all
+#undef spd_set_volume_uid
+#undef spd_get_volume
+#undef spd_set_punctuation
+#undef spd_set_punctuation_all
+#undef spd_set_punctuation_uid
+#undef spd_set_capital_letters
+#undef spd_set_capital_letters_all
+#undef spd_set_capital_letters_uid
+#undef spd_set_spelling
+#undef spd_set_spelling_all
+#undef spd_set_spelling_uid
+#undef spd_set_language
+#undef spd_set_language_all
+#undef spd_set_language_uid
+#undef spd_get_language
+#undef spd_set_output_module
+#undef spd_set_output_module_all
+#undef spd_set_output_module_uid
+#undef spd_get_message_list_fd
+#undef spd_list_modules
+#undef free_spd_modules
+#undef spd_get_output_module
+#undef spd_list_voices
+#undef spd_list_synthesis_voices
+#undef free_spd_voices
+#undef spd_execute_command_with_list_reply
+#undef spd_execute_command
+#undef spd_execute_command_with_reply
+#undef spd_execute_command_wo_mutex
+#undef spd_send_data
+#undef spd_send_data_wo_mutex
+#ifdef __cplusplus
+extern "C" {
+#endif
+#define SPDConnectionAddress__free SPDConnectionAddress__free_dylibloader_wrapper_speechd
+#define spd_get_default_address spd_get_default_address_dylibloader_wrapper_speechd
+#define spd_open spd_open_dylibloader_wrapper_speechd
+#define spd_open2 spd_open2_dylibloader_wrapper_speechd
+#define spd_close spd_close_dylibloader_wrapper_speechd
+#define spd_say spd_say_dylibloader_wrapper_speechd
+#define spd_sayf spd_sayf_dylibloader_wrapper_speechd
+#define spd_stop spd_stop_dylibloader_wrapper_speechd
+#define spd_stop_all spd_stop_all_dylibloader_wrapper_speechd
+#define spd_stop_uid spd_stop_uid_dylibloader_wrapper_speechd
+#define spd_cancel spd_cancel_dylibloader_wrapper_speechd
+#define spd_cancel_all spd_cancel_all_dylibloader_wrapper_speechd
+#define spd_cancel_uid spd_cancel_uid_dylibloader_wrapper_speechd
+#define spd_pause spd_pause_dylibloader_wrapper_speechd
+#define spd_pause_all spd_pause_all_dylibloader_wrapper_speechd
+#define spd_pause_uid spd_pause_uid_dylibloader_wrapper_speechd
+#define spd_resume spd_resume_dylibloader_wrapper_speechd
+#define spd_resume_all spd_resume_all_dylibloader_wrapper_speechd
+#define spd_resume_uid spd_resume_uid_dylibloader_wrapper_speechd
+#define spd_key spd_key_dylibloader_wrapper_speechd
+#define spd_char spd_char_dylibloader_wrapper_speechd
+#define spd_wchar spd_wchar_dylibloader_wrapper_speechd
+#define spd_sound_icon spd_sound_icon_dylibloader_wrapper_speechd
+#define spd_set_voice_type spd_set_voice_type_dylibloader_wrapper_speechd
+#define spd_set_voice_type_all spd_set_voice_type_all_dylibloader_wrapper_speechd
+#define spd_set_voice_type_uid spd_set_voice_type_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_type spd_get_voice_type_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice spd_set_synthesis_voice_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice_all spd_set_synthesis_voice_all_dylibloader_wrapper_speechd
+#define spd_set_synthesis_voice_uid spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd
+#define spd_set_data_mode spd_set_data_mode_dylibloader_wrapper_speechd
+#define spd_set_notification_on spd_set_notification_on_dylibloader_wrapper_speechd
+#define spd_set_notification_off spd_set_notification_off_dylibloader_wrapper_speechd
+#define spd_set_notification spd_set_notification_dylibloader_wrapper_speechd
+#define spd_set_voice_rate spd_set_voice_rate_dylibloader_wrapper_speechd
+#define spd_set_voice_rate_all spd_set_voice_rate_all_dylibloader_wrapper_speechd
+#define spd_set_voice_rate_uid spd_set_voice_rate_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_rate spd_get_voice_rate_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch spd_set_voice_pitch_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_all spd_set_voice_pitch_all_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_uid spd_set_voice_pitch_uid_dylibloader_wrapper_speechd
+#define spd_get_voice_pitch spd_get_voice_pitch_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range spd_set_voice_pitch_range_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range_all spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd
+#define spd_set_voice_pitch_range_uid spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd
+#define spd_set_volume spd_set_volume_dylibloader_wrapper_speechd
+#define spd_set_volume_all spd_set_volume_all_dylibloader_wrapper_speechd
+#define spd_set_volume_uid spd_set_volume_uid_dylibloader_wrapper_speechd
+#define spd_get_volume spd_get_volume_dylibloader_wrapper_speechd
+#define spd_set_punctuation spd_set_punctuation_dylibloader_wrapper_speechd
+#define spd_set_punctuation_all spd_set_punctuation_all_dylibloader_wrapper_speechd
+#define spd_set_punctuation_uid spd_set_punctuation_uid_dylibloader_wrapper_speechd
+#define spd_set_capital_letters spd_set_capital_letters_dylibloader_wrapper_speechd
+#define spd_set_capital_letters_all spd_set_capital_letters_all_dylibloader_wrapper_speechd
+#define spd_set_capital_letters_uid spd_set_capital_letters_uid_dylibloader_wrapper_speechd
+#define spd_set_spelling spd_set_spelling_dylibloader_wrapper_speechd
+#define spd_set_spelling_all spd_set_spelling_all_dylibloader_wrapper_speechd
+#define spd_set_spelling_uid spd_set_spelling_uid_dylibloader_wrapper_speechd
+#define spd_set_language spd_set_language_dylibloader_wrapper_speechd
+#define spd_set_language_all spd_set_language_all_dylibloader_wrapper_speechd
+#define spd_set_language_uid spd_set_language_uid_dylibloader_wrapper_speechd
+#define spd_get_language spd_get_language_dylibloader_wrapper_speechd
+#define spd_set_output_module spd_set_output_module_dylibloader_wrapper_speechd
+#define spd_set_output_module_all spd_set_output_module_all_dylibloader_wrapper_speechd
+#define spd_set_output_module_uid spd_set_output_module_uid_dylibloader_wrapper_speechd
+#define spd_get_message_list_fd spd_get_message_list_fd_dylibloader_wrapper_speechd
+#define spd_list_modules spd_list_modules_dylibloader_wrapper_speechd
+#define free_spd_modules free_spd_modules_dylibloader_wrapper_speechd
+#define spd_get_output_module spd_get_output_module_dylibloader_wrapper_speechd
+#define spd_list_voices spd_list_voices_dylibloader_wrapper_speechd
+#define spd_list_synthesis_voices spd_list_synthesis_voices_dylibloader_wrapper_speechd
+#define free_spd_voices free_spd_voices_dylibloader_wrapper_speechd
+#define spd_execute_command_with_list_reply spd_execute_command_with_list_reply_dylibloader_wrapper_speechd
+#define spd_execute_command spd_execute_command_dylibloader_wrapper_speechd
+#define spd_execute_command_with_reply spd_execute_command_with_reply_dylibloader_wrapper_speechd
+#define spd_execute_command_wo_mutex spd_execute_command_wo_mutex_dylibloader_wrapper_speechd
+#define spd_send_data spd_send_data_dylibloader_wrapper_speechd
+#define spd_send_data_wo_mutex spd_send_data_wo_mutex_dylibloader_wrapper_speechd
+extern void (*SPDConnectionAddress__free_dylibloader_wrapper_speechd)( SPDConnectionAddress*);
+extern SPDConnectionAddress* (*spd_get_default_address_dylibloader_wrapper_speechd)( char**);
+extern SPDConnection* (*spd_open_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode);
+extern SPDConnection* (*spd_open2_dylibloader_wrapper_speechd)(const char*,const char*,const char*, SPDConnectionMode, SPDConnectionAddress*, int, char**);
+extern void (*spd_close_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_say_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_sayf_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*,...);
+extern int (*spd_stop_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_stop_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_stop_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_cancel_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_cancel_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_cancel_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_pause_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_pause_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_pause_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_resume_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_resume_all_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_resume_uid_dylibloader_wrapper_speechd)( SPDConnection*, int);
+extern int (*spd_key_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_char_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_wchar_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority, wchar_t);
+extern int (*spd_sound_icon_dylibloader_wrapper_speechd)( SPDConnection*, SPDPriority,const char*);
+extern int (*spd_set_voice_type_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+extern int (*spd_set_voice_type_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType);
+extern int (*spd_set_voice_type_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDVoiceType, unsigned int);
+extern SPDVoiceType (*spd_get_voice_type_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_synthesis_voice_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_synthesis_voice_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_synthesis_voice_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern int (*spd_set_data_mode_dylibloader_wrapper_speechd)( SPDConnection*, SPDDataMode);
+extern int (*spd_set_notification_on_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+extern int (*spd_set_notification_off_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification);
+extern int (*spd_set_notification_dylibloader_wrapper_speechd)( SPDConnection*, SPDNotification,const char*);
+extern int (*spd_set_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_rate_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_rate_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_voice_rate_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_voice_pitch_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_voice_pitch_range_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_range_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_voice_pitch_range_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_set_volume_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_volume_all_dylibloader_wrapper_speechd)( SPDConnection*, signed int);
+extern int (*spd_set_volume_uid_dylibloader_wrapper_speechd)( SPDConnection*, signed int, unsigned int);
+extern int (*spd_get_volume_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_punctuation_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+extern int (*spd_set_punctuation_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation);
+extern int (*spd_set_punctuation_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDPunctuation, unsigned int);
+extern int (*spd_set_capital_letters_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+extern int (*spd_set_capital_letters_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters);
+extern int (*spd_set_capital_letters_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDCapitalLetters, unsigned int);
+extern int (*spd_set_spelling_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+extern int (*spd_set_spelling_all_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling);
+extern int (*spd_set_spelling_uid_dylibloader_wrapper_speechd)( SPDConnection*, SPDSpelling, unsigned int);
+extern int (*spd_set_language_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_language_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_language_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern char* (*spd_get_language_dylibloader_wrapper_speechd)( SPDConnection*);
+extern int (*spd_set_output_module_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_output_module_all_dylibloader_wrapper_speechd)( SPDConnection*,const char*);
+extern int (*spd_set_output_module_uid_dylibloader_wrapper_speechd)( SPDConnection*,const char*, unsigned int);
+extern int (*spd_get_message_list_fd_dylibloader_wrapper_speechd)( SPDConnection*, int, int*, char**);
+extern char** (*spd_list_modules_dylibloader_wrapper_speechd)( SPDConnection*);
+extern void (*free_spd_modules_dylibloader_wrapper_speechd)( char**);
+extern char* (*spd_get_output_module_dylibloader_wrapper_speechd)( SPDConnection*);
+extern char** (*spd_list_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+extern SPDVoice** (*spd_list_synthesis_voices_dylibloader_wrapper_speechd)( SPDConnection*);
+extern void (*free_spd_voices_dylibloader_wrapper_speechd)( SPDVoice**);
+extern char** (*spd_execute_command_with_list_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern int (*spd_execute_command_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern int (*spd_execute_command_with_reply_dylibloader_wrapper_speechd)( SPDConnection*, char*, char**);
+extern int (*spd_execute_command_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*, char*);
+extern char* (*spd_send_data_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+extern char* (*spd_send_data_wo_mutex_dylibloader_wrapper_speechd)( SPDConnection*,const char*, int);
+int initialize_speechd(int verbose);
+#ifdef __cplusplus
+}
+#endif
+#endif

+ 261 - 0
platform/linuxbsd/tts_linux.cpp

@@ -0,0 +1,261 @@
+/*************************************************************************/
+/*  tts_linux.cpp                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_linux.h"
+
+#include "core/config/project_settings.h"
+#include "servers/text_server.h"
+
+TTS_Linux *TTS_Linux::singleton = nullptr;
+
+void TTS_Linux::speech_init_thread_func(void *p_userdata) {
+	TTS_Linux *tts = (TTS_Linux *)p_userdata;
+	if (tts) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+#ifdef DEBUG_ENABLED
+		int dylibloader_verbose = 1;
+#else
+		int dylibloader_verbose = 0;
+#endif
+		if (initialize_speechd(dylibloader_verbose) == 0) {
+			CharString class_str;
+			String config_name = GLOBAL_GET("application/config/name");
+			if (config_name.length() == 0) {
+				class_str = "Godot_Engine";
+			} else {
+				class_str = config_name.utf8();
+			}
+			tts->synth = spd_open(class_str, "Godot_Engine_Speech_API", "Godot_Engine", SPD_MODE_THREADED);
+			if (tts->synth) {
+				tts->synth->callback_end = &speech_event_callback;
+				tts->synth->callback_cancel = &speech_event_callback;
+				tts->synth->callback_im = &speech_event_index_mark;
+				spd_set_notification_on(tts->synth, SPD_END);
+				spd_set_notification_on(tts->synth, SPD_CANCEL);
+
+				print_verbose("Text-to-Speech: Speech Dispatcher initialized.");
+			} else {
+				print_verbose("Text-to-Speech: Cannot initialize Speech Dispatcher synthesizer!");
+			}
+		} else {
+			print_verbose("Text-to-Speech: Cannot load Speech Dispatcher library!");
+		}
+	}
+}
+
+void TTS_Linux::speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark) {
+	TTS_Linux *tts = TTS_Linux::get_singleton();
+	if (tts && tts->ids.has(p_msg_id)) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+		// Get word offset from the index mark injected to the text stream.
+		String mark = String::utf8(p_index_mark);
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[p_msg_id], mark.to_int());
+	}
+}
+
+void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type) {
+	TTS_Linux *tts = TTS_Linux::get_singleton();
+	if (tts) {
+		MutexLock thread_safe_method(tts->_thread_safe_);
+		List<DisplayServer::TTSUtterance> &queue = tts->queue;
+		if (!tts->paused && tts->ids.has(p_msg_id)) {
+			if (p_type == SPD_EVENT_END) {
+				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[p_msg_id]);
+				tts->ids.erase(p_msg_id);
+				tts->last_msg_id = -1;
+				tts->speaking = false;
+			} else if (p_type == SPD_EVENT_CANCEL) {
+				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, tts->ids[p_msg_id]);
+				tts->ids.erase(p_msg_id);
+				tts->last_msg_id = -1;
+				tts->speaking = false;
+			}
+		}
+		if (!tts->speaking && queue.size() > 0) {
+			DisplayServer::TTSUtterance &message = queue.front()->get();
+
+			// Inject index mark after each word.
+			String text;
+			String language;
+			SPDVoice **voices = spd_list_synthesis_voices(tts->synth);
+			if (voices != nullptr) {
+				SPDVoice **voices_ptr = voices;
+				while (*voices_ptr != nullptr) {
+					if (String::utf8((*voices_ptr)->name) == message.voice) {
+						language = String::utf8((*voices_ptr)->language);
+						break;
+					}
+					voices_ptr++;
+				}
+				free_spd_voices(voices);
+			}
+			PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
+			int prev = 0;
+			for (int i = 0; i < breaks.size(); i++) {
+				text += message.text.substr(prev, breaks[i] - prev);
+				text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>";
+				prev = breaks[i];
+			}
+			text += message.text.substr(prev, -1);
+
+			spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data());
+			spd_set_volume(tts->synth, message.volume * 2 - 100);
+			spd_set_voice_pitch(tts->synth, (message.pitch - 1) * 100);
+			float rate = 0;
+			if (message.rate > 1.f) {
+				rate = log10(MIN(message.rate, 2.5f)) / log10(2.5f) * 100;
+			} else if (message.rate < 1.f) {
+				rate = log10(MAX(message.rate, 0.5f)) / log10(0.5f) * -100;
+			}
+			spd_set_voice_rate(tts->synth, rate);
+			spd_set_data_mode(tts->synth, SPD_DATA_SSML);
+			tts->last_msg_id = spd_say(tts->synth, SPD_TEXT, text.utf8().get_data());
+			tts->ids[tts->last_msg_id] = message.id;
+			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id);
+
+			queue.pop_front();
+			tts->speaking = true;
+		}
+	}
+}
+
+bool TTS_Linux::is_speaking() const {
+	return speaking;
+}
+
+bool TTS_Linux::is_paused() const {
+	return paused;
+}
+
+Array TTS_Linux::get_voices() const {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND_V(!synth, Array());
+	Array list;
+	SPDVoice **voices = spd_list_synthesis_voices(synth);
+	if (voices != nullptr) {
+		SPDVoice **voices_ptr = voices;
+		while (*voices_ptr != nullptr) {
+			Dictionary voice_d;
+			voice_d["name"] = String::utf8((*voices_ptr)->name);
+			voice_d["id"] = String::utf8((*voices_ptr)->name);
+			voice_d["language"] = String::utf8((*voices_ptr)->language) + "_" + String::utf8((*voices_ptr)->variant);
+			list.push_back(voice_d);
+
+			voices_ptr++;
+		}
+		free_spd_voices(voices);
+	}
+	return list;
+}
+
+void TTS_Linux::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.is_empty()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	DisplayServer::TTSUtterance message;
+	message.text = p_text;
+	message.voice = p_voice;
+	message.volume = CLAMP(p_volume, 0, 100);
+	message.pitch = CLAMP(p_pitch, 0.f, 2.f);
+	message.rate = CLAMP(p_rate, 0.1f, 10.f);
+	message.id = p_utterance_id;
+	queue.push_back(message);
+
+	if (is_paused()) {
+		resume();
+	} else {
+		speech_event_callback(0, 0, SPD_EVENT_BEGIN);
+	}
+}
+
+void TTS_Linux::pause() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	if (spd_pause(synth) == 0) {
+		paused = true;
+	}
+}
+
+void TTS_Linux::resume() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	spd_resume(synth);
+	paused = false;
+}
+
+void TTS_Linux::stop() {
+	_THREAD_SAFE_METHOD_
+
+	ERR_FAIL_COND(!synth);
+	for (DisplayServer::TTSUtterance &message : queue) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	if ((last_msg_id != -1) && ids.has(last_msg_id)) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[last_msg_id]);
+	}
+	queue.clear();
+	ids.clear();
+	last_msg_id = -1;
+	spd_cancel(synth);
+	spd_resume(synth);
+	speaking = false;
+	paused = false;
+}
+
+TTS_Linux *TTS_Linux::get_singleton() {
+	return singleton;
+}
+
+TTS_Linux::TTS_Linux() {
+	singleton = this;
+	// Speech Dispatcher init can be slow, it might wait for helper process to start on background, so run it in the thread.
+	init_thread.start(speech_init_thread_func, this);
+}
+
+TTS_Linux::~TTS_Linux() {
+	init_thread.wait_to_finish();
+	if (synth) {
+		spd_close(synth);
+	}
+
+	singleton = nullptr;
+}

+ 78 - 0
platform/linuxbsd/tts_linux.h

@@ -0,0 +1,78 @@
+/*************************************************************************/
+/*  tts_linux.h                                                          */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_LINUX_H
+#define TTS_LINUX_H
+
+#include "core/os/thread.h"
+#include "core/os/thread_safe.h"
+#include "core/string/ustring.h"
+#include "core/templates/list.h"
+#include "core/templates/map.h"
+#include "core/variant/array.h"
+#include "servers/display_server.h"
+
+#include "speechd-so_wrap.h"
+
+class TTS_Linux {
+	_THREAD_SAFE_CLASS_
+
+	List<DisplayServer::TTSUtterance> queue;
+	SPDConnection *synth = nullptr;
+	bool speaking = false;
+	bool paused = false;
+	int last_msg_id = -1;
+	Map<int, int> ids;
+
+	Thread init_thread;
+
+	static void speech_init_thread_func(void *p_userdata);
+	static void speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type);
+	static void speech_event_index_mark(size_t p_msg_id, size_t p_client_id, SPDNotificationType p_type, char *p_index_mark);
+
+	static TTS_Linux *singleton;
+
+public:
+	static TTS_Linux *get_singleton();
+
+	bool is_speaking() const;
+	bool is_paused() const;
+	Array get_voices() const;
+
+	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void pause();
+	void resume();
+	void stop();
+
+	TTS_Linux();
+	~TTS_Linux();
+};
+
+#endif // TTS_LINUX_H

+ 1 - 0
platform/osx/SCsub

@@ -18,6 +18,7 @@ files = [
     "key_mapping_osx.mm",
     "godot_main_osx.mm",
     "dir_access_osx.mm",
+    "tts_osx.mm",
     "joypad_osx.cpp",
     "vulkan_context_osx.mm",
     "gl_manager_osx_legacy.mm",

+ 11 - 0
platform/osx/display_server_osx.h

@@ -137,6 +137,8 @@ private:
 	Vector<KeyEvent> key_event_buffer;
 	int key_event_pos = 0;
 
+	id tts = nullptr;
+
 	Point2i im_selection;
 	String im_text;
 
@@ -264,6 +266,15 @@ public:
 	virtual void global_menu_remove_item(const String &p_menu_root, int p_idx) override;
 	virtual void global_menu_clear(const String &p_menu_root) override;
 
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	virtual Error dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) override;
 	virtual Error dialog_input_text(String p_title, String p_description, String p_partial, const Callable &p_callback) override;
 

+ 41 - 0
platform/osx/display_server_osx.mm

@@ -37,6 +37,8 @@
 #include "key_mapping_osx.h"
 #include "os_osx.h"
 
+#include "tts_osx.h"
+
 #include "core/io/marshalls.h"
 #include "core/math/geometry_2d.h"
 #include "core/os/keyboard.h"
@@ -702,6 +704,7 @@ bool DisplayServerOSX::has_feature(Feature p_feature) const {
 		case FEATURE_NATIVE_ICON:
 		//case FEATURE_KEEP_SCREEN_ON:
 		case FEATURE_SWAP_BUFFERS:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default: {
 		}
@@ -1458,6 +1461,41 @@ void DisplayServerOSX::global_menu_clear(const String &p_menu_root) {
 	}
 }
 
+bool DisplayServerOSX::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isSpeaking];
+}
+
+bool DisplayServerOSX::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return [tts isPaused];
+}
+
+Array DisplayServerOSX::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return [tts getVoices];
+}
+
+void DisplayServerOSX::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	[tts speak:p_text voice:p_voice volume:p_volume pitch:p_pitch rate:p_rate utterance_id:p_utterance_id interrupt:p_interrupt];
+}
+
+void DisplayServerOSX::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	[tts pauseSpeaking];
+}
+
+void DisplayServerOSX::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	[tts resumeSpeaking];
+}
+
+void DisplayServerOSX::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	[tts stopSpeaking];
+}
+
 Error DisplayServerOSX::dialog_show(String p_title, String p_description, Vector<String> p_buttons, const Callable &p_callback) {
 	_THREAD_SAFE_METHOD_
 
@@ -3121,6 +3159,9 @@ DisplayServerOSX::DisplayServerOSX(const String &p_rendering_driver, WindowMode
 	// Register to be notified on displays arrangement changes.
 	CGDisplayRegisterReconfigurationCallback(_displays_arrangement_changed, nullptr);
 
+	// Init TTS
+	tts = [[TTS_OSX alloc] init];
+
 	NSMenuItem *menu_item;
 	NSString *title;
 

+ 66 - 0
platform/osx/tts_osx.h

@@ -0,0 +1,66 @@
+/*************************************************************************/
+/*  tts_osx.h                                                            */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_OSX_H
+#define TTS_OSX_H
+
+#include "core/string/ustring.h"
+#include "core/templates/list.h"
+#include "core/templates/map.h"
+#include "core/variant/array.h"
+#include "servers/display_server.h"
+
+#include <AVFAudio/AVSpeechSynthesis.h>
+#include <AppKit/AppKit.h>
+
+@interface TTS_OSX : NSObject <AVSpeechSynthesizerDelegate> {
+	// AVSpeechSynthesizer
+	bool speaking;
+	Map<id, int> ids;
+
+	// NSSpeechSynthesizer
+	bool paused;
+	bool have_utterance;
+	int last_utterance;
+
+	id synth; // NSSpeechSynthesizer or AVSpeechSynthesizer
+	List<DisplayServer::TTSUtterance> queue;
+}
+
+- (void)pauseSpeaking;
+- (void)resumeSpeaking;
+- (void)stopSpeaking;
+- (bool)isSpeaking;
+- (bool)isPaused;
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt;
+- (Array)getVoices;
+@end
+
+#endif // TTS_OSX_H

+ 266 - 0
platform/osx/tts_osx.mm

@@ -0,0 +1,266 @@
+/*************************************************************************/
+/*  tts_osx.mm                                                           */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_osx.h"
+
+@implementation TTS_OSX
+
+- (id)init {
+	self = [super init];
+	self->speaking = false;
+	self->have_utterance = false;
+	self->last_utterance = -1;
+	self->paused = false;
+	if (@available(macOS 10.14, *)) {
+		self->synth = [[AVSpeechSynthesizer alloc] init];
+		[self->synth setDelegate:self];
+		print_verbose("Text-to-Speech: AVSpeechSynthesizer initialized.");
+	} else {
+		self->synth = [[NSSpeechSynthesizer alloc] init];
+		[self->synth setDelegate:self];
+		print_verbose("Text-to-Speech: NSSpeechSynthesizer initialized.");
+	}
+	return self;
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth willSpeakRangeOfSpeechString:(NSRange)characterRange utterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	NSString *string = [utterance speechString];
+
+	// Convert from UTF-16 to UTF-32 position.
+	int pos = 0;
+	for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+		unichar c = [string characterAtIndex:i];
+		if ((c & 0xfffffc00) == 0xd800) {
+			i++;
+		}
+		pos++;
+	}
+
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, ids[utterance], pos);
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didCancelSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+// AVSpeechSynthesizer callback (macOS 10.14+)
+
+- (void)speechSynthesizer:(AVSpeechSynthesizer *)av_synth didFinishSpeechUtterance:(AVSpeechUtterance *)utterance API_AVAILABLE(macosx(10.14)) {
+	DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, ids[utterance]);
+	ids.erase(utterance);
+	speaking = false;
+	[self update];
+}
+
+// NSSpeechSynthesizer callback (macOS 10.4+)
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth willSpeakWord:(NSRange)characterRange ofString:(NSString *)string {
+	if (!paused && have_utterance) {
+		// Convert from UTF-16 to UTF-32 position.
+		int pos = 0;
+		for (NSUInteger i = 0; i < MIN(characterRange.location, string.length); i++) {
+			unichar c = [string characterAtIndex:i];
+			if ((c & 0xfffffc00) == 0xd800) {
+				i++;
+			}
+			pos++;
+		}
+
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, last_utterance, pos);
+	}
+}
+
+- (void)speechSynthesizer:(NSSpeechSynthesizer *)ns_synth didFinishSpeaking:(BOOL)success {
+	if (!paused && have_utterance) {
+		if (success) {
+			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, last_utterance);
+		} else {
+			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance);
+		}
+		have_utterance = false;
+	}
+	speaking = false;
+	[self update];
+}
+
+- (void)update {
+	if (!speaking && queue.size() > 0) {
+		DisplayServer::TTSUtterance &message = queue.front()->get();
+
+		if (@available(macOS 10.14, *)) {
+			AVSpeechSynthesizer *av_synth = synth;
+			AVSpeechUtterance *new_utterance = [[AVSpeechUtterance alloc] initWithString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+			[new_utterance setVoice:[AVSpeechSynthesisVoice voiceWithIdentifier:[NSString stringWithUTF8String:message.voice.utf8().get_data()]]];
+			if (message.rate > 1.f) {
+				[new_utterance setRate:Math::range_lerp(message.rate, 1.f, 10.f, AVSpeechUtteranceDefaultSpeechRate, AVSpeechUtteranceMaximumSpeechRate)];
+			} else if (message.rate < 1.f) {
+				[new_utterance setRate:Math::range_lerp(message.rate, 0.1f, 1.f, AVSpeechUtteranceMinimumSpeechRate, AVSpeechUtteranceDefaultSpeechRate)];
+			}
+			[new_utterance setPitchMultiplier:message.pitch];
+			[new_utterance setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+
+			ids[new_utterance] = message.id;
+			[av_synth speakUtterance:new_utterance];
+		} else {
+			NSSpeechSynthesizer *ns_synth = synth;
+			[ns_synth setObject:nil forProperty:NSSpeechResetProperty error:nil];
+			[ns_synth setVoice:[NSString stringWithUTF8String:message.voice.utf8().get_data()]];
+			int base_pitch = [[ns_synth objectForProperty:NSSpeechPitchBaseProperty error:nil] intValue];
+			[ns_synth setObject:[NSNumber numberWithInt:(base_pitch * (message.pitch / 2.f + 0.5f))] forProperty:NSSpeechPitchBaseProperty error:nullptr];
+			[ns_synth setVolume:(Math::range_lerp(message.volume, 0.f, 100.f, 0.f, 1.f))];
+			[ns_synth setRate:(message.rate * 200)];
+
+			last_utterance = message.id;
+			have_utterance = true;
+			[ns_synth startSpeakingString:[NSString stringWithUTF8String:message.text.utf8().get_data()]];
+		}
+		queue.pop_front();
+
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, message.id);
+		speaking = true;
+	}
+}
+
+- (void)pauseSpeaking {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth pauseSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		[ns_synth pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
+	}
+	paused = true;
+}
+
+- (void)resumeSpeaking {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth continueSpeaking];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		[ns_synth continueSpeaking];
+	}
+	paused = false;
+}
+
+- (void)stopSpeaking {
+	for (DisplayServer::TTSUtterance &message : queue) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		[av_synth stopSpeakingAtBoundary:AVSpeechBoundaryImmediate];
+	} else {
+		NSSpeechSynthesizer *ns_synth = synth;
+		if (have_utterance) {
+			DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, last_utterance);
+		}
+		[ns_synth stopSpeaking];
+	}
+	have_utterance = false;
+	speaking = false;
+	paused = false;
+}
+
+- (bool)isSpeaking {
+	return speaking || (queue.size() > 0);
+}
+
+- (bool)isPaused {
+	if (@available(macOS 10.14, *)) {
+		AVSpeechSynthesizer *av_synth = synth;
+		return [av_synth isPaused];
+	} else {
+		return paused;
+	}
+}
+
+- (void)speak:(const String &)text voice:(const String &)voice volume:(int)volume pitch:(float)pitch rate:(float)rate utterance_id:(int)utterance_id interrupt:(bool)interrupt {
+	if (interrupt) {
+		[self stopSpeaking];
+	}
+
+	if (text.is_empty()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, utterance_id);
+		return;
+	}
+
+	DisplayServer::TTSUtterance message;
+	message.text = text;
+	message.voice = voice;
+	message.volume = CLAMP(volume, 0, 100);
+	message.pitch = CLAMP(pitch, 0.f, 2.f);
+	message.rate = CLAMP(rate, 0.1f, 10.f);
+	message.id = utterance_id;
+	queue.push_back(message);
+
+	if ([self isPaused]) {
+		[self resumeSpeaking];
+	} else {
+		[self update];
+	}
+}
+
+- (Array)getVoices {
+	Array list;
+	if (@available(macOS 10.14, *)) {
+		for (AVSpeechSynthesisVoice *voice in [AVSpeechSynthesisVoice speechVoices]) {
+			NSString *voiceIdentifierString = [voice identifier];
+			NSString *voiceLocaleIdentifier = [voice language];
+			NSString *voiceName = [voice name];
+			Dictionary voice_d;
+			voice_d["name"] = String::utf8([voiceName UTF8String]);
+			voice_d["id"] = String::utf8([voiceIdentifierString UTF8String]);
+			voice_d["language"] = String::utf8([voiceLocaleIdentifier UTF8String]);
+			list.push_back(voice_d);
+		}
+	} else {
+		for (NSString *voiceIdentifierString in [NSSpeechSynthesizer availableVoices]) {
+			NSString *voiceLocaleIdentifier = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceLocaleIdentifier];
+			NSString *voiceName = [[NSSpeechSynthesizer attributesForVoice:voiceIdentifierString] objectForKey:NSVoiceName];
+			Dictionary voice_d;
+			voice_d["name"] = String([voiceName UTF8String]);
+			voice_d["id"] = String([voiceIdentifierString UTF8String]);
+			voice_d["language"] = String([voiceLocaleIdentifier UTF8String]);
+			list.push_back(voice_d);
+		}
+	}
+	return list;
+}
+
+@end

+ 1 - 0
platform/windows/SCsub

@@ -13,6 +13,7 @@ common_win = [
     "display_server_windows.cpp",
     "key_mapping_windows.cpp",
     "joypad_windows.cpp",
+    "tts_windows.cpp",
     "windows_terminal_logger.cpp",
     "vulkan_context_win.cpp",
     "gl_manager_windows.cpp",

+ 2 - 0
platform/windows/detect.py

@@ -252,6 +252,7 @@ def configure_msvc(env, manual_msvc_config):
         "kernel32",
         "ole32",
         "oleaut32",
+        "sapi",
         "user32",
         "gdi32",
         "IPHLPAPI",
@@ -426,6 +427,7 @@ def configure_mingw(env):
             "ws2_32",
             "kernel32",
             "oleaut32",
+            "sapi",
             "dinput8",
             "dxguid",
             "ksuser",

+ 43 - 0
platform/windows/display_server_windows.cpp

@@ -84,6 +84,7 @@ bool DisplayServerWindows::has_feature(Feature p_feature) const {
 		case FEATURE_NATIVE_ICON:
 		case FEATURE_SWAP_BUFFERS:
 		case FEATURE_KEEP_SCREEN_ON:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default:
 			return false;
@@ -133,6 +134,41 @@ void DisplayServerWindows::_set_mouse_mode_impl(MouseMode p_mode) {
 	}
 }
 
+bool DisplayServerWindows::tts_is_speaking() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_speaking();
+}
+
+bool DisplayServerWindows::tts_is_paused() const {
+	ERR_FAIL_COND_V(!tts, false);
+	return tts->is_paused();
+}
+
+Array DisplayServerWindows::tts_get_voices() const {
+	ERR_FAIL_COND_V(!tts, Array());
+	return tts->get_voices();
+}
+
+void DisplayServerWindows::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!tts);
+	tts->speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void DisplayServerWindows::tts_pause() {
+	ERR_FAIL_COND(!tts);
+	tts->pause();
+}
+
+void DisplayServerWindows::tts_resume() {
+	ERR_FAIL_COND(!tts);
+	tts->resume();
+}
+
+void DisplayServerWindows::tts_stop() {
+	ERR_FAIL_COND(!tts);
+	tts->stop();
+}
+
 void DisplayServerWindows::mouse_set_mode(MouseMode p_mode) {
 	_THREAD_SAFE_METHOD_
 
@@ -3497,6 +3533,9 @@ DisplayServerWindows::DisplayServerWindows(const String &p_rendering_driver, Win
 
 	rendering_driver = p_rendering_driver;
 
+	// Init TTS
+	tts = memnew(TTS_Windows);
+
 	// Note: Wacom WinTab driver API for pen input, for devices incompatible with Windows Ink.
 	HMODULE wintab_lib = LoadLibraryW(L"wintab32.dll");
 	if (wintab_lib) {
@@ -3739,4 +3778,8 @@ DisplayServerWindows::~DisplayServerWindows() {
 		gl_manager = nullptr;
 	}
 #endif
+	if (tts) {
+		memdelete(tts);
+	}
+	CoUninitialize();
 }

+ 12 - 0
platform/windows/display_server_windows.h

@@ -46,6 +46,7 @@
 #include "servers/rendering/renderer_compositor.h"
 #include "servers/rendering/renderer_rd/renderer_compositor_rd.h"
 #include "servers/rendering_server.h"
+#include "tts_windows.h"
 
 #ifdef XAUDIO2_ENABLED
 #include "drivers/xaudio2/audio_driver_xaudio2.h"
@@ -320,6 +321,8 @@ class DisplayServerWindows : public DisplayServer {
 	String rendering_driver;
 	bool app_focused = false;
 
+	TTS_Windows *tts = nullptr;
+
 	struct WindowData {
 		HWND hWnd;
 		//layered window
@@ -454,6 +457,15 @@ public:
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	virtual void mouse_set_mode(MouseMode p_mode) override;
 	virtual MouseMode mouse_get_mode() const override;
 

+ 269 - 0
platform/windows/tts_windows.cpp

@@ -0,0 +1,269 @@
+/*************************************************************************/
+/*  tts_windows.cpp                                                      */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#include "tts_windows.h"
+
+TTS_Windows *TTS_Windows::singleton = nullptr;
+
+void __stdcall TTS_Windows::speech_event_callback(WPARAM wParam, LPARAM lParam) {
+	TTS_Windows *tts = TTS_Windows::get_singleton();
+	SPEVENT event;
+	while (tts->synth->GetEvents(1, &event, NULL) == S_OK) {
+		if (tts->ids.has(event.ulStreamNum)) {
+			if (event.eEventId == SPEI_START_INPUT_STREAM) {
+				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_STARTED, tts->ids[event.ulStreamNum].id);
+			} else if (event.eEventId == SPEI_END_INPUT_STREAM) {
+				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_ENDED, tts->ids[event.ulStreamNum].id);
+				tts->ids.erase(event.ulStreamNum);
+				tts->_update_tts();
+			} else if (event.eEventId == SPEI_WORD_BOUNDARY) {
+				const Char16String &string = tts->ids[event.ulStreamNum].string;
+				int pos = 0;
+				for (int i = 0; i < MIN(event.lParam, string.length()); i++) {
+					char16_t c = string[i];
+					if ((c & 0xfffffc00) == 0xd800) {
+						i++;
+					}
+					pos++;
+				}
+				DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_BOUNDARY, tts->ids[event.ulStreamNum].id, pos - tts->ids[event.ulStreamNum].offset);
+			}
+		}
+	}
+}
+
+void TTS_Windows::_update_tts() {
+	if (!is_speaking() && !paused && queue.size() > 0) {
+		DisplayServer::TTSUtterance &message = queue.front()->get();
+
+		String text;
+		DWORD flags = SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_IS_XML;
+		String pitch_tag = String("<pitch absmiddle=\"") + String::num_int64(message.pitch * 10 - 10, 10) + String("\">");
+		text = pitch_tag + message.text + String("</pitch>");
+
+		IEnumSpObjectTokens *cpEnum;
+		ISpObjectToken *cpVoiceToken;
+		ULONG ulCount = 0;
+		ULONG stream_number = 0;
+		ISpObjectTokenCategory *cpCategory;
+		HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory);
+		if (SUCCEEDED(hr)) {
+			hr = cpCategory->SetId(SPCAT_VOICES, false);
+			if (SUCCEEDED(hr)) {
+				hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum);
+				if (SUCCEEDED(hr)) {
+					hr = cpEnum->GetCount(&ulCount);
+					while (SUCCEEDED(hr) && ulCount--) {
+						wchar_t *w_id = 0L;
+						hr = cpEnum->Next(1, &cpVoiceToken, nullptr);
+						cpVoiceToken->GetId(&w_id);
+						if (String::utf16((const char16_t *)w_id) == message.voice) {
+							synth->SetVoice(cpVoiceToken);
+							cpVoiceToken->Release();
+							break;
+						}
+						cpVoiceToken->Release();
+					}
+					cpEnum->Release();
+				}
+			}
+			cpCategory->Release();
+		}
+
+		UTData ut;
+		ut.string = text.utf16();
+		ut.offset = pitch_tag.length(); // Substract injected <pitch> tag offset.
+		ut.id = message.id;
+
+		synth->SetVolume(message.volume);
+		synth->SetRate(10.f * log10(message.rate) / log10(3.f));
+		synth->Speak((LPCWSTR)ut.string.get_data(), flags, &stream_number);
+
+		ids[stream_number] = ut;
+
+		queue.pop_front();
+	}
+}
+
+bool TTS_Windows::is_speaking() const {
+	ERR_FAIL_COND_V(!synth, false);
+
+	SPVOICESTATUS status;
+	synth->GetStatus(&status, nullptr);
+	return (status.dwRunningState == SPRS_IS_SPEAKING);
+}
+
+bool TTS_Windows::is_paused() const {
+	ERR_FAIL_COND_V(!synth, false);
+	return paused;
+}
+
+Array TTS_Windows::get_voices() const {
+	Array list;
+	IEnumSpObjectTokens *cpEnum;
+	ISpObjectToken *cpVoiceToken;
+	ISpDataKey *cpDataKeyAttribs;
+	ULONG ulCount = 0;
+	ISpObjectTokenCategory *cpCategory;
+	HRESULT hr = CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_INPROC_SERVER, IID_ISpObjectTokenCategory, (void **)&cpCategory);
+	if (SUCCEEDED(hr)) {
+		hr = cpCategory->SetId(SPCAT_VOICES, false);
+		if (SUCCEEDED(hr)) {
+			hr = cpCategory->EnumTokens(nullptr, nullptr, &cpEnum);
+			if (SUCCEEDED(hr)) {
+				hr = cpEnum->GetCount(&ulCount);
+				while (SUCCEEDED(hr) && ulCount--) {
+					hr = cpEnum->Next(1, &cpVoiceToken, nullptr);
+					HRESULT hr_attr = cpVoiceToken->OpenKey(SPTOKENKEY_ATTRIBUTES, &cpDataKeyAttribs);
+					if (SUCCEEDED(hr_attr)) {
+						wchar_t *w_id = nullptr;
+						wchar_t *w_lang = nullptr;
+						wchar_t *w_name = nullptr;
+						cpVoiceToken->GetId(&w_id);
+						cpDataKeyAttribs->GetStringValue(L"Language", &w_lang);
+						cpDataKeyAttribs->GetStringValue(nullptr, &w_name);
+						LCID locale = wcstol(w_lang, nullptr, 16);
+
+						int locale_chars = GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, nullptr, 0);
+						int region_chars = GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, nullptr, 0);
+						wchar_t *w_lang_code = new wchar_t[locale_chars];
+						wchar_t *w_reg_code = new wchar_t[region_chars];
+						GetLocaleInfoW(locale, LOCALE_SISO639LANGNAME, w_lang_code, locale_chars);
+						GetLocaleInfoW(locale, LOCALE_SISO3166CTRYNAME, w_reg_code, region_chars);
+
+						Dictionary voice_d;
+						voice_d["id"] = String::utf16((const char16_t *)w_id);
+						if (w_name) {
+							voice_d["name"] = String::utf16((const char16_t *)w_name);
+						} else {
+							voice_d["name"] = voice_d["id"].operator String().replace("HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech\\Voices\\Tokens\\", "");
+						}
+						voice_d["language"] = String::utf16((const char16_t *)w_lang_code) + "_" + String::utf16((const char16_t *)w_reg_code);
+						list.push_back(voice_d);
+
+						delete[] w_lang_code;
+						delete[] w_reg_code;
+
+						cpDataKeyAttribs->Release();
+					}
+					cpVoiceToken->Release();
+				}
+				cpEnum->Release();
+			}
+		}
+		cpCategory->Release();
+	}
+	return list;
+}
+
+void TTS_Windows::speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	ERR_FAIL_COND(!synth);
+	if (p_interrupt) {
+		stop();
+	}
+
+	if (p_text.is_empty()) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id);
+		return;
+	}
+
+	DisplayServer::TTSUtterance message;
+	message.text = p_text;
+	message.voice = p_voice;
+	message.volume = CLAMP(p_volume, 0, 100);
+	message.pitch = CLAMP(p_pitch, 0.f, 2.f);
+	message.rate = CLAMP(p_rate, 0.1f, 10.f);
+	message.id = p_utterance_id;
+	queue.push_back(message);
+
+	if (is_paused()) {
+		resume();
+	} else {
+		_update_tts();
+	}
+}
+
+void TTS_Windows::pause() {
+	ERR_FAIL_COND(!synth);
+	if (!paused) {
+		if (synth->Pause() == S_OK) {
+			paused = true;
+		}
+	}
+}
+
+void TTS_Windows::resume() {
+	ERR_FAIL_COND(!synth);
+	synth->Resume();
+	paused = false;
+}
+
+void TTS_Windows::stop() {
+	ERR_FAIL_COND(!synth);
+
+	SPVOICESTATUS status;
+	synth->GetStatus(&status, nullptr);
+	if (ids.has(status.ulCurrentStream)) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, ids[status.ulCurrentStream].id);
+		ids.erase(status.ulCurrentStream);
+	}
+	for (DisplayServer::TTSUtterance &message : queue) {
+		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, message.id);
+	}
+	queue.clear();
+	synth->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr);
+	synth->Resume();
+	paused = false;
+}
+
+TTS_Windows *TTS_Windows::get_singleton() {
+	return singleton;
+}
+
+TTS_Windows::TTS_Windows() {
+	singleton = this;
+	CoInitialize(nullptr);
+
+	if (SUCCEEDED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice, (void **)&synth))) {
+		ULONGLONG event_mask = SPFEI(SPEI_END_INPUT_STREAM) | SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_WORD_BOUNDARY);
+		synth->SetInterest(event_mask, event_mask);
+		synth->SetNotifyCallbackFunction(&speech_event_callback, (WPARAM)(this), 0);
+		print_verbose("Text-to-Speech: SAPI initialized.");
+	} else {
+		print_verbose("Text-to-Speech: Cannot initialize ISpVoice!");
+	}
+}
+
+TTS_Windows::~TTS_Windows() {
+	if (synth) {
+		synth->Release();
+	}
+	singleton = nullptr;
+}

+ 80 - 0
platform/windows/tts_windows.h

@@ -0,0 +1,80 @@
+/*************************************************************************/
+/*  tts_windows.h                                                        */
+/*************************************************************************/
+/*                       This file is part of:                           */
+/*                           GODOT ENGINE                                */
+/*                      https://godotengine.org                          */
+/*************************************************************************/
+/* Copyright (c) 2007-2022 Juan Linietsky, Ariel Manzur.                 */
+/* Copyright (c) 2014-2022 Godot Engine contributors (cf. AUTHORS.md).   */
+/*                                                                       */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the       */
+/* "Software"), to deal in the Software without restriction, including   */
+/* without limitation the rights to use, copy, modify, merge, publish,   */
+/* distribute, sublicense, and/or sell copies of the Software, and to    */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions:                                             */
+/*                                                                       */
+/* The above copyright notice and this permission notice shall be        */
+/* included in all copies or substantial portions of the Software.       */
+/*                                                                       */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY  */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,  */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE     */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                */
+/*************************************************************************/
+
+#ifndef TTS_WINDOWS_H
+#define TTS_WINDOWS_H
+
+#include "core/string/ustring.h"
+#include "core/templates/list.h"
+#include "core/templates/map.h"
+#include "core/variant/array.h"
+#include "servers/display_server.h"
+
+#include <objbase.h>
+#include <sapi.h>
+#include <wchar.h>
+#include <winnls.h>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+class TTS_Windows {
+	List<DisplayServer::TTSUtterance> queue;
+	ISpVoice *synth = nullptr;
+	bool paused = false;
+	struct UTData {
+		Char16String string;
+		int offset;
+		int id;
+	};
+	Map<ULONG, UTData> ids;
+
+	static void __stdcall speech_event_callback(WPARAM wParam, LPARAM lParam);
+	void _update_tts();
+
+	static TTS_Windows *singleton;
+
+public:
+	static TTS_Windows *get_singleton();
+
+	bool is_speaking() const;
+	bool is_paused() const;
+	Array get_voices() const;
+
+	void speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void pause();
+	void resume();
+	void stop();
+
+	TTS_Windows();
+	~TTS_Windows();
+};
+
+#endif // TTS_WINDOWS_H

+ 94 - 0
servers/display_server.cpp

@@ -220,6 +220,81 @@ void DisplayServer::global_menu_clear(const String &p_menu_root) {
 	WARN_PRINT("Global menus not supported by this display server.");
 }
 
+bool DisplayServer::tts_is_speaking() const {
+	WARN_PRINT("TTS is not supported by this display server.");
+	return false;
+}
+
+bool DisplayServer::tts_is_paused() const {
+	WARN_PRINT("TTS is not supported by this display server.");
+	return false;
+}
+
+void DisplayServer::tts_pause() {
+	WARN_PRINT("TTS is not supported by this display server.");
+}
+
+void DisplayServer::tts_resume() {
+	WARN_PRINT("TTS is not supported by this display server.");
+}
+
+Array DisplayServer::tts_get_voices() const {
+	WARN_PRINT("TTS is not supported by this display server.");
+	return Array();
+}
+
+PackedStringArray DisplayServer::tts_get_voices_for_language(const String &p_language) const {
+	PackedStringArray ret;
+	Array voices = tts_get_voices();
+	for (int i = 0; i < voices.size(); i++) {
+		const Dictionary &voice = voices[i];
+		if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) {
+			ret.push_back(voice["id"]);
+		}
+	}
+	return ret;
+}
+
+void DisplayServer::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	WARN_PRINT("TTS is not supported by this display server.");
+}
+
+void DisplayServer::tts_stop() {
+	WARN_PRINT("TTS is not supported by this display server.");
+}
+
+void DisplayServer::tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable) {
+	ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX);
+	utterance_callback[p_event] = p_callable;
+}
+
+void DisplayServer::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) {
+	ERR_FAIL_INDEX(p_event, DisplayServer::TTS_UTTERANCE_MAX);
+	switch (p_event) {
+		case DisplayServer::TTS_UTTERANCE_STARTED:
+		case DisplayServer::TTS_UTTERANCE_ENDED:
+		case DisplayServer::TTS_UTTERANCE_CANCELED: {
+			if (utterance_callback[p_event].is_valid()) {
+				Variant args[1];
+				args[0] = p_id;
+				const Variant *argp[] = { &args[0] };
+				utterance_callback[p_event].call_deferred(argp, 1); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession.
+			}
+		} break;
+		case DisplayServer::TTS_UTTERANCE_BOUNDARY: {
+			if (utterance_callback[p_event].is_valid()) {
+				Variant args[2];
+				args[0] = p_pos;
+				args[1] = p_id;
+				const Variant *argp[] = { &args[0], &args[1] };
+				utterance_callback[p_event].call_deferred(argp, 2); // Should be deferred, on some platforms utterance events can be called from different threads in a rapid succession.
+			}
+		} break;
+		default:
+			break;
+	}
+}
+
 void DisplayServer::mouse_set_mode(MouseMode p_mode) {
 	WARN_PRINT("Mouse is not supported by this display server.");
 }
@@ -478,6 +553,19 @@ void DisplayServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu_root", "idx"), &DisplayServer::global_menu_remove_item);
 	ClassDB::bind_method(D_METHOD("global_menu_clear", "menu_root"), &DisplayServer::global_menu_clear);
 
+	ClassDB::bind_method(D_METHOD("tts_is_speaking"), &DisplayServer::tts_is_speaking);
+	ClassDB::bind_method(D_METHOD("tts_is_paused"), &DisplayServer::tts_is_paused);
+	ClassDB::bind_method(D_METHOD("tts_get_voices"), &DisplayServer::tts_get_voices);
+	ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &DisplayServer::tts_get_voices_for_language);
+
+	ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &DisplayServer::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false));
+	ClassDB::bind_method(D_METHOD("tts_pause"), &DisplayServer::tts_pause);
+	ClassDB::bind_method(D_METHOD("tts_resume"), &DisplayServer::tts_resume);
+	ClassDB::bind_method(D_METHOD("tts_stop"), &DisplayServer::tts_stop);
+
+	ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "callable"), &DisplayServer::tts_set_utterance_callback);
+	ClassDB::bind_method(D_METHOD("_tts_post_utterance_event", "event", "id", "char_pos"), &DisplayServer::tts_post_utterance_event);
+
 	ClassDB::bind_method(D_METHOD("mouse_set_mode", "mouse_mode"), &DisplayServer::mouse_set_mode);
 	ClassDB::bind_method(D_METHOD("mouse_get_mode"), &DisplayServer::mouse_get_mode);
 
@@ -621,6 +709,7 @@ void DisplayServer::_bind_methods() {
 	BIND_ENUM_CONSTANT(FEATURE_ORIENTATION);
 	BIND_ENUM_CONSTANT(FEATURE_SWAP_BUFFERS);
 	BIND_ENUM_CONSTANT(FEATURE_CLIPBOARD_PRIMARY);
+	BIND_ENUM_CONSTANT(FEATURE_TEXT_TO_SPEECH);
 
 	BIND_ENUM_CONSTANT(MOUSE_MODE_VISIBLE);
 	BIND_ENUM_CONSTANT(MOUSE_MODE_HIDDEN);
@@ -689,6 +778,11 @@ void DisplayServer::_bind_methods() {
 	BIND_ENUM_CONSTANT(DISPLAY_HANDLE);
 	BIND_ENUM_CONSTANT(WINDOW_HANDLE);
 	BIND_ENUM_CONSTANT(WINDOW_VIEW);
+
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY);
 }
 
 void DisplayServer::register_create_function(const char *p_name, CreateFunction p_function, GetRenderingDriversFunction p_get_drivers) {

+ 36 - 0
servers/display_server.h

@@ -121,6 +121,7 @@ public:
 		FEATURE_SWAP_BUFFERS,
 		FEATURE_KEEP_SCREEN_ON,
 		FEATURE_CLIPBOARD_PRIMARY,
+		FEATURE_TEXT_TO_SPEECH,
 	};
 
 	virtual bool has_feature(Feature p_feature) const = 0;
@@ -172,6 +173,40 @@ public:
 	virtual void global_menu_remove_item(const String &p_menu_root, int p_idx);
 	virtual void global_menu_clear(const String &p_menu_root);
 
+	struct TTSUtterance {
+		String text;
+		String voice;
+		int volume = 50;
+		float pitch = 1.f;
+		float rate = 1.f;
+		int id = 0;
+	};
+
+	enum TTSUtteranceEvent {
+		TTS_UTTERANCE_STARTED,
+		TTS_UTTERANCE_ENDED,
+		TTS_UTTERANCE_CANCELED,
+		TTS_UTTERANCE_BOUNDARY,
+		TTS_UTTERANCE_MAX,
+	};
+
+private:
+	Callable utterance_callback[TTS_UTTERANCE_MAX];
+
+public:
+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+	virtual PackedStringArray tts_get_voices_for_language(const String &p_language) const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
+	virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, const Callable &p_callable);
+	virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0);
+
 	enum MouseMode {
 		MOUSE_MODE_VISIBLE,
 		MOUSE_MODE_HIDDEN,
@@ -431,5 +466,6 @@ VARIANT_ENUM_CAST(DisplayServer::WindowFlags)
 VARIANT_ENUM_CAST(DisplayServer::HandleType)
 VARIANT_ENUM_CAST(DisplayServer::CursorShape)
 VARIANT_ENUM_CAST(DisplayServer::VSyncMode)
+VARIANT_ENUM_CAST(DisplayServer::TTSUtteranceEvent)
 
 #endif // DISPLAY_SERVER_H

+ 10 - 0
servers/text/text_server_extension.cpp

@@ -293,6 +293,8 @@ void TextServerExtension::_bind_methods() {
 
 	GDVIRTUAL_BIND(strip_diacritics, "string");
 
+	GDVIRTUAL_BIND(string_get_word_breaks, "string", "language");
+
 	GDVIRTUAL_BIND(string_to_upper, "string", "language");
 	GDVIRTUAL_BIND(string_to_lower, "string", "language");
 
@@ -1503,6 +1505,14 @@ Array TextServerExtension::parse_structured_text(StructuredTextParser p_parser_t
 	return Array();
 }
 
+PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const {
+	PackedInt32Array ret;
+	if (GDVIRTUAL_CALL(string_get_word_breaks, p_string, p_language, ret)) {
+		return ret;
+	}
+	return PackedInt32Array();
+}
+
 TextServerExtension::TextServerExtension() {
 	//NOP
 }

+ 3 - 0
servers/text/text_server_extension.h

@@ -485,6 +485,9 @@ public:
 	virtual String strip_diacritics(const String &p_string) const override;
 	GDVIRTUAL1RC(String, strip_diacritics, const String &);
 
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
+	GDVIRTUAL2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
+
 	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;
 	virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;
 	GDVIRTUAL2RC(String, string_to_upper, const String &, const String &);

+ 2 - 0
servers/text_server.cpp

@@ -439,6 +439,8 @@ void TextServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL(""));
 	ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL(""));
 
+	ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL(""));
+
 	ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics);
 
 	ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL(""));

+ 3 - 0
servers/text_server.h

@@ -431,6 +431,9 @@ public:
 	virtual String parse_number(const String &p_string, const String &p_language = "") const = 0;
 	virtual String percent_sign(const String &p_language = "") const = 0;
 
+	// String functions.
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0;
+
 	virtual String strip_diacritics(const String &p_string) const;
 
 	// Other string operations.

+ 43 - 0
tests/servers/test_text_server.h

@@ -514,6 +514,49 @@ TEST_SUITE("[[TextServer]") {
 				CHECK(ts->strip_diacritics(U"ṽṿ ẁẃẅẇẉ ẋẍ ẏ ẑẓẕ ẖ ẗẘẙẛ") == U"vv wwwww xx y zzz h twys");
 			}
 		}
+
+		SUBCASE("[TextServer] Word break") {
+			for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) {
+				Ref<TextServer> ts = TextServerManager::get_singleton()->get_interface(i);
+
+				if (!ts->has_feature(TextServer::FEATURE_SIMPLE_LAYOUT)) {
+					continue;
+				}
+
+				TEST_FAIL_COND(ts.is_null(), "Invalid TS interface.");
+				{
+					String text1 = U"linguistically similar and effectively form";
+					//                           14^     22^ 26^         38^
+					PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en");
+					CHECK(breaks.size() == 4);
+					if (breaks.size() == 4) {
+						CHECK(breaks[0] == 14);
+						CHECK(breaks[1] == 22);
+						CHECK(breaks[2] == 26);
+						CHECK(breaks[3] == 38);
+					}
+				}
+
+				if (ts->has_feature(TextServer::FEATURE_BREAK_ITERATORS)) {
+					String text2 = U"เป็นภาษาราชการและภาษาประจำชาติของประเทศไทย";
+					//				 เป็น ภาษา ราชการ และ ภาษา ประจำ ชาติ ของ ประเทศไทย
+					//                 3^   7^    13^ 16^  20^   25^ 29^ 32^
+
+					PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th");
+					CHECK(breaks.size() == 8);
+					if (breaks.size() == 8) {
+						CHECK(breaks[0] == 3);
+						CHECK(breaks[1] == 7);
+						CHECK(breaks[2] == 13);
+						CHECK(breaks[3] == 16);
+						CHECK(breaks[4] == 20);
+						CHECK(breaks[5] == 25);
+						CHECK(breaks[6] == 29);
+						CHECK(breaks[7] == 32);
+					}
+				}
+			}
+		}
 	}
 }
 }; // namespace TestTextServer