Browse Source

Add a text-to-speech demo (#744)

Co-authored-by: Hugo Locurcio <[email protected]>
bruvzg 2 years ago
parent
commit
78dffe0d04

+ 16 - 0
audio/text_to_speech/README.md

@@ -0,0 +1,16 @@
+# Text-to-Speech Demo
+
+This is a demo showing text-to-speech support.
+
+Language: GDScript
+
+Renderer: Compatibility
+
+## How does it work?
+
+It uses `tts_*()` methods of the [`DisplayServer`](https://docs.godotengine.org/en/latest/classes/class_displayserver.html) singleton
+to enumerate voice information, send utterances to the OS TTS API, and receive callback signals.
+
+## Screenshots
+
+![Screenshot](screenshots/text_to_speech.webp)

+ 244 - 0
audio/text_to_speech/control.tscn

@@ -0,0 +1,244 @@
+[gd_scene load_steps=2 format=3 uid="uid://u5emvyeyodyh"]
+
+[ext_resource type="Script" path="res://voice_list.gd" id="1_0bwjs"]
+
+[node name="Control" type="Control"]
+layout_mode = 3
+anchors_preset = 8
+anchor_left = 0.5
+anchor_top = 0.5
+anchor_right = 0.5
+anchor_bottom = 0.5
+offset_left = -576.0
+offset_top = -312.0
+offset_right = -576.0
+offset_bottom = -312.0
+grow_horizontal = 2
+grow_vertical = 2
+size_flags_horizontal = 4
+size_flags_vertical = 4
+script = ExtResource("1_0bwjs")
+
+[node name="LineEditFilterLang" type="LineEdit" parent="."]
+layout_mode = 0
+offset_left = 416.0
+offset_top = 304.0
+offset_right = 704.0
+offset_bottom = 337.0
+theme_override_font_sizes/font_size = 16
+placeholder_text = "Language"
+
+[node name="LineEditFilterName" type="LineEdit" parent="."]
+layout_mode = 0
+offset_left = 96.0
+offset_top = 304.0
+offset_right = 408.0
+offset_bottom = 337.0
+theme_override_font_sizes/font_size = 16
+placeholder_text = "Name"
+
+[node name="Label" type="Label" parent="LineEditFilterName"]
+layout_mode = 0
+offset_left = -76.0
+offset_top = 3.0
+offset_right = -20.0
+offset_bottom = 35.0
+text = "Filter:"
+
+[node name="Tree" type="Tree" parent="."]
+layout_mode = 0
+offset_left = 16.0
+offset_top = 56.0
+offset_right = 704.0
+offset_bottom = 296.0
+columns = 2
+
+[node name="Utterance" type="TextEdit" parent="."]
+layout_mode = 0
+offset_left = 264.0
+offset_top = 472.0
+offset_right = 704.0
+offset_bottom = 584.0
+theme_override_font_sizes/font_size = 16
+text = "Beware the Jabberwock, my son! The jaws that bite, the claws that catch!"
+wrap_mode = 1
+draw_spaces = true
+
+[node name="ButtonSpeak" type="Button" parent="."]
+layout_mode = 0
+offset_left = 16.0
+offset_top = 472.0
+offset_right = 128.0
+offset_bottom = 504.0
+text = "Speak"
+
+[node name="ButtonIntSpeak" type="Button" parent="."]
+layout_mode = 0
+offset_left = 144.0
+offset_top = 472.0
+offset_right = 256.0
+offset_bottom = 504.0
+text = "Interrupt"
+
+[node name="ButtonStop" type="Button" parent="."]
+layout_mode = 0
+offset_left = 16.0
+offset_top = 512.0
+offset_right = 128.0
+offset_bottom = 544.0
+text = "Stop"
+
+[node name="ButtonPause" type="Button" parent="."]
+layout_mode = 0
+offset_left = 144.0
+offset_top = 512.0
+offset_right = 256.0
+offset_bottom = 544.0
+toggle_mode = true
+text = "Pause"
+
+[node name="HSliderRate" type="HSlider" parent="."]
+layout_mode = 0
+offset_left = 96.0
+offset_top = 352.0
+offset_right = 440.0
+offset_bottom = 368.0
+min_value = 0.1
+max_value = 10.0
+step = 0.05
+value = 1.0
+exp_edit = true
+
+[node name="Label" type="Label" parent="HSliderRate"]
+layout_mode = 0
+offset_left = -76.0
+offset_top = -5.0
+offset_right = -20.0
+offset_bottom = 27.0
+text = "Rate:"
+
+[node name="Value" type="Label" parent="HSliderRate"]
+layout_mode = 0
+offset_left = 352.0
+offset_top = -8.0
+offset_right = 416.0
+offset_bottom = 24.0
+text = "1.00x"
+
+[node name="HSliderPitch" type="HSlider" parent="."]
+layout_mode = 0
+offset_left = 96.0
+offset_top = 392.0
+offset_right = 440.0
+offset_bottom = 408.0
+max_value = 2.0
+step = 0.05
+value = 1.0
+
+[node name="Label" type="Label" parent="HSliderPitch"]
+layout_mode = 0
+offset_left = -76.0
+offset_top = -5.0
+offset_right = -28.0
+offset_bottom = 27.0
+text = "Pitch:"
+
+[node name="Value" type="Label" parent="HSliderPitch"]
+layout_mode = 0
+offset_left = 352.0
+offset_top = -8.0
+offset_right = 416.0
+offset_bottom = 24.0
+text = "1.00x"
+
+[node name="HSliderVolume" type="HSlider" parent="."]
+layout_mode = 0
+offset_left = 96.0
+offset_top = 432.0
+offset_right = 440.0
+offset_bottom = 448.0
+min_value = 1.0
+value = 50.0
+
+[node name="Label" type="Label" parent="HSliderVolume"]
+layout_mode = 0
+offset_left = -76.0
+offset_top = -5.0
+offset_right = -12.0
+offset_bottom = 27.0
+text = "Volume:"
+
+[node name="Value" type="Label" parent="HSliderVolume"]
+layout_mode = 0
+offset_left = 352.0
+offset_top = -8.0
+offset_right = 416.0
+offset_bottom = 24.0
+text = "50%"
+
+[node name="ColorRect" type="ColorRect" parent="."]
+layout_mode = 0
+offset_left = 16.0
+offset_top = 16.0
+offset_right = 144.0
+offset_bottom = 40.0
+
+[node name="Label" type="Label" parent="ColorRect"]
+layout_mode = 0
+offset_right = 128.0
+offset_bottom = 32.0
+theme_override_font_sizes/font_size = 16
+text = "Speaking..."
+
+[node name="Log" type="TextEdit" parent="."]
+layout_mode = 0
+offset_left = 712.0
+offset_top = 56.0
+offset_right = 1138.0
+offset_bottom = 584.0
+editable = false
+context_menu_enabled = false
+shortcut_keys_enabled = false
+virtual_keyboard_enabled = false
+middle_mouse_paste_enabled = false
+
+[node name="ButtonClearLog" type="Button" parent="Log"]
+layout_mode = 0
+offset_left = 346.0
+offset_top = 8.0
+offset_right = 418.0
+offset_bottom = 39.0
+theme_override_font_sizes/font_size = 16
+text = "Clear"
+
+[node name="RichTextLabel" type="RichTextLabel" parent="."]
+layout_mode = 0
+offset_left = 152.0
+offset_top = 16.0
+offset_right = 1008.0
+offset_bottom = 40.0
+theme_override_font_sizes/normal_font_size = 16
+bbcode_enabled = true
+scroll_active = false
+
+[node name="ButtonDemo" type="Button" parent="."]
+layout_mode = 0
+offset_left = 16.0
+offset_top = 552.0
+offset_right = 256.0
+offset_bottom = 581.0
+theme_override_font_sizes/font_size = 16
+text = "Demo"
+
+[connection signal="text_changed" from="LineEditFilterLang" to="." method="_on_LineEditFilterName_text_changed"]
+[connection signal="text_changed" from="LineEditFilterName" to="." method="_on_LineEditFilterName_text_changed"]
+[connection signal="item_activated" from="Tree" to="." method="_on_ItemList_item_activated"]
+[connection signal="pressed" from="ButtonSpeak" to="." method="_on_ButtonSpeak_pressed"]
+[connection signal="pressed" from="ButtonIntSpeak" to="." method="_on_ButtonIntSpeak_pressed"]
+[connection signal="pressed" from="ButtonStop" to="." method="_on_ButtonStop_pressed"]
+[connection signal="pressed" from="ButtonPause" to="." method="_on_ButtonPause_pressed"]
+[connection signal="value_changed" from="HSliderRate" to="." method="_on_HSliderRate_value_changed"]
+[connection signal="value_changed" from="HSliderPitch" to="." method="_on_HSliderPitch_value_changed"]
+[connection signal="value_changed" from="HSliderVolume" to="." method="_on_HSliderVolume_value_changed"]
+[connection signal="pressed" from="Log/ButtonClearLog" to="." method="_on_ButtonClearLog_pressed"]
+[connection signal="pressed" from="ButtonDemo" to="." method="_on_Button_pressed"]

BIN
audio/text_to_speech/icon.png


+ 34 - 0
audio/text_to_speech/icon.png.import

@@ -0,0 +1,34 @@
+[remap]
+
+importer="texture"
+type="CompressedTexture2D"
+uid="uid://53lrswe56fov"
+path="res://.godot/imported/icon.png-487276ed1e3a0c39cad0279d744ee560.ctex"
+metadata={
+"vram_texture": false
+}
+
+[deps]
+
+source_file="res://icon.png"
+dest_files=["res://.godot/imported/icon.png-487276ed1e3a0c39cad0279d744ee560.ctex"]
+
+[params]
+
+compress/mode=0
+compress/high_quality=false
+compress/lossy_quality=0.7
+compress/hdr_compression=1
+compress/normal_map=0
+compress/channel_pack=0
+mipmaps/generate=false
+mipmaps/limit=-1
+roughness/mode=0
+roughness/src_normal=""
+process/fix_alpha_border=true
+process/premult_alpha=false
+process/normal_map_invert_y=false
+process/hdr_as_srgb=false
+process/hdr_clamp_exposure=false
+process/size_limit=0
+detect_3d/compress_to=1

+ 28 - 0
audio/text_to_speech/project.godot

@@ -0,0 +1,28 @@
+; Engine configuration file.
+; It's best edited using the editor UI and not directly,
+; since the parameters that go here are not all obvious.
+;
+; Format:
+;   [section] ; section goes between []
+;   param=value ; assign values to parameters
+
+config_version=5
+
+[application]
+
+config/name="Text-to-speech demo"
+config/description="This is a demo showing text-to-speech support."
+run/main_scene="res://control.tscn"
+config/features=PackedStringArray("4.0")
+config/icon="res://icon.png"
+
+[display]
+
+window/stretch/mode="canvas_items"
+window/stretch/aspect="expand"
+
+[rendering]
+
+renderer/rendering_method="gl_compatibility"
+renderer/rendering_method.mobile="gl_compatibility"
+environment/defaults/default_clear_color=Color(0.2, 0.2, 0.2, 1)

+ 0 - 0
audio/text_to_speech/screenshots/.gdignore


BIN
audio/text_to_speech/screenshots/text_to_speech.webp


+ 127 - 0
audio/text_to_speech/voice_list.gd

@@ -0,0 +1,127 @@
+extends Control
+
+var id = 0 #utterance id
+var ut_map = {}
+var vs
+
+func _ready():
+	# get voice data
+	vs = DisplayServer.tts_get_voices()
+	var root = $Tree.create_item()
+	$Tree.set_hide_root(true)
+	$Tree.set_column_title(0, "Name")
+	$Tree.set_column_title(1, "Language")
+	$Tree.set_column_titles_visible(true)
+	for v in vs:
+		var child = $Tree.create_item(root)
+		child.set_text(0, v["name"])
+		child.set_metadata(0, v["id"])
+		child.set_text(1, v["language"])
+	$Log.text += "%d voices available\n" % [vs.size()]
+	$Log.text += "=======\n"
+
+	# add callbacks
+	DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_STARTED, Callable(self, "_on_utterance_start"))
+	DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_ENDED, Callable(self, "_on_utterance_end"))
+	DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_CANCELED, Callable(self, "_on_utterance_error"))
+	DisplayServer.tts_set_utterance_callback(DisplayServer.TTS_UTTERANCE_BOUNDARY, Callable(self, "_on_utterance_boundary"))
+	set_process(true)
+
+func _process(delta):
+	$ButtonPause.button_pressed = DisplayServer.tts_is_paused()
+	if DisplayServer.tts_is_speaking():
+		$ColorRect.color = Color(1, 0, 0)
+	else:
+		$ColorRect.color = Color(1, 1, 1)
+
+func _on_utterance_boundary(pos, id):
+	$RichTextLabel.text = "[bgcolor=yellow][color=black]" + ut_map[id].substr(0, pos) + "[/color][/bgcolor]" + ut_map[id].substr(pos, -1)
+
+func _on_utterance_start(id):
+	$Log.text += "utterance %d started\n" % [id]
+
+func _on_utterance_end(id):
+	$RichTextLabel.text = "[bgcolor=yellow][color=black]" + ut_map[id] + "[/color][/bgcolor]"
+	$Log.text += "utterance %d ended\n" % [id]
+	ut_map.erase(id)
+
+func _on_utterance_error(id):
+	$RichTextLabel.text = ""
+	$Log.text += "utterance %d canceled/failed\n" % [id]
+	ut_map.erase(id)
+
+func _on_ButtonStop_pressed():
+	DisplayServer.tts_stop()
+
+func _on_ButtonPause_pressed():
+	if $ButtonPause.pressed:
+		DisplayServer.tts_pause()
+	else:
+		DisplayServer.tts_resume()
+
+func _on_ButtonSpeak_pressed():
+	if $Tree.get_selected():
+		$Log.text += "utterance %d queried\n" % [id]
+		ut_map[id] = $Utterance.text
+		DisplayServer.tts_speak($Utterance.text, $Tree.get_selected().get_metadata(0), $HSliderVolume.value, $HSliderPitch.value, $HSliderRate.value, id, false)
+		id += 1
+	else:
+		OS.alert("No voice selected.\nSelect a voice in the list, then try using Speak again.")
+
+func _on_ButtonIntSpeak_pressed():
+	if $Tree.get_selected():
+		$Log.text += "utterance %d interrupt\n" % [id]
+		ut_map[id] = $Utterance.text
+		DisplayServer.tts_speak($Utterance.text, $Tree.get_selected().get_metadata(0), $HSliderVolume.value, $HSliderPitch.value, $HSliderRate.value, id, true)
+		id += 1
+	else:
+		OS.alert("No voice selected.\nSelect a voice in the list, then try using Interrupt again.")
+
+func _on_ButtonClearLog_pressed():
+	$Log.text = ""
+
+func _on_HSliderRate_value_changed(value):
+	$HSliderRate/Value.text = "%.2fx" % [value]
+
+func _on_HSliderPitch_value_changed(value):
+	$HSliderPitch/Value.text = "%.2fx" % [value]
+
+func _on_HSliderVolume_value_changed(value):
+	$HSliderVolume/Value.text = "%d%%" % [value]
+
+func _on_Button_pressed():
+	var vc
+	#demo - en
+	vc = DisplayServer.tts_get_voices_for_language("en")
+	if !vc.is_empty():
+		ut_map[id] = "Beware the Jabberwock, my son!"
+		ut_map[id + 1] = "The jaws that bite, the claws that catch!"
+		DisplayServer.tts_speak("Beware the Jabberwock, my son!", vc[0], 50, 1, 1, id)
+		DisplayServer.tts_speak("The jaws that bite, the claws that catch!", vc[0], 50, 1, 1, id + 1)
+		id += 2
+	#demo - es
+	vc = DisplayServer.tts_get_voices_for_language("es")
+	if !vc.is_empty():
+		ut_map[id] = "¡Cuidado, hijo, con el Fablistanón!"
+		ut_map[id + 1] = "¡Con sus dientes y garras, muerde, apresa!"
+		DisplayServer.tts_speak("¡Cuidado, hijo, con el Fablistanón!", vc[0], 50, 1, 1, id)
+		DisplayServer.tts_speak("¡Con sus dientes y garras, muerde, apresa!", vc[0], 50, 1, 1, id + 1)
+		id += 2
+	#demo - ru
+	vc = DisplayServer.tts_get_voices_for_language("ru")
+	if !vc.is_empty():
+		ut_map[id] = "О, бойся Бармаглота, сын!"
+		ut_map[id + 1] = "Он так свирлеп и дик!"
+		DisplayServer.tts_speak("О, бойся Бармаглота, сын!", vc[0], 50, 1, 1, id)
+		DisplayServer.tts_speak("Он так свирлеп и дик!", vc[0], 50, 1, 1, id + 1)
+		id += 2
+
+func _on_LineEditFilterName_text_changed(new_text):
+	$Tree.clear()
+	var root = $Tree.create_item()
+	for v in vs:
+		if ($LineEditFilterName.text.is_empty() || $LineEditFilterName.text.to_lower() in v["name"].to_lower()) && ($LineEditFilterLang.text.is_empty() || $LineEditFilterLang.text.to_lower() in v["language"].to_lower()):
+			var child = $Tree.create_item(root)
+			child.set_text(0, v["name"])
+			child.set_metadata(0, v["id"])
+			child.set_text(1, v["language"])