godotengine · akien-mga · Apr 28, 2022 · Nov 4, 2021
@@ -87,7 +87,7 @@ jobs:
           sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \
               libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \
               libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \
-              llvm
+              llvm libspeechd-dev speech-dispatcher
 
       - name: Setup Godot build cache
         uses: ./.github/actions/godot-cache

@@ -814,6 +814,93 @@
 				[b]Note:[/b] This method is implemented on Windows.
 			</description>
 		</method>
+		<method name="tts_get_voices" qualifiers="const">
+			<return type="Array" />
+			<description>
+				Returns an [Array] of voice information dictionaries.
+				Each [Dictionary] contains two [String] entries:
+				- [code]name[/code] is voice name.
+				- [code]id[/code] is voice identifier.
+				- [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_get_voices_for_language" qualifiers="const">
+			<return type="PackedStringArray" />
+			<argument index="0" name="language" type="String" />
+			<description>
+				Returns an [PackedStringArray] of voice identifiers for the [code]language[/code].
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_paused" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is in a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_speaking" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_pause">
+			<return type="void" />
+			<description>
+				Puts the synthesizer into a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_resume">
+			<return type="void" />
+			<description>
+				Resumes the synthesizer if it was paused.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_set_utterance_callback">
+			<return type="void" />
+			<argument index="0" name="event" type="int" enum="DisplayServer.TTSUtteranceEvent" />
+			<argument index="1" name="callable" type="Callable" />
+			<description>
+				Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary.
+				- [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id.
+				- [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id.
+				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_speak">
+			<return type="void" />
+			<argument index="0" name="text" type="String" />
+			<argument index="1" name="voice" type="String" />
+			<argument index="2" name="volume" type="int" default="50" />
+			<argument index="3" name="pitch" type="float" default="1.0" />
+			<argument index="4" name="rate" type="float" default="1.0" />
+			<argument index="5" name="utterance_id" type="int" default="0" />
+			<argument index="6" name="interrupt" type="bool" default="false" />
+			<description>
+				Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first.
+				- [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language].
+				- [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest).
+				- [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice.
+				- [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative.
+				- [code]utterance_id[/code] is passed as a parameter to the callback functions.
+				[b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
+				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_stop">
+			<return type="void" />
+			<description>
+				Stops synthesis in progress and removes all utterances from the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
 		<method name="virtual_keyboard_get_height" qualifiers="const">
 			<return type="int" />
 			<description>
@@ -1184,6 +1271,9 @@
 		</constant>
 		<constant name="FEATURE_CLIPBOARD_PRIMARY" value="18" enum="Feature">
 		</constant>
+		<constant name="FEATURE_TEXT_TO_SPEECH" value="19" enum="Feature">
+			Display server supports text-to-speech. See [code]tts_*[/code] methods.
+		</constant>
 		<constant name="MOUSE_MODE_VISIBLE" value="0" enum="MouseMode">
 			Makes the mouse cursor visible if it is hidden.
 		</constant>
@@ -1335,5 +1425,17 @@
 			- MacOS: [code]NSView*[/code] for the window main view.
 			- iOS: [code]UIView*[/code] for the window main view.
 		</constant>
+		<constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent">
+			Utterance has begun to be spoken.
+		</constant>
+		<constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent">
+			Utterance was successfully finished.
+		</constant>
+		<constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent">
+			Utterance was canceled, or TTS service was unable to process it.
+		</constant>
+		<constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent">
+			Utterance reached a word or sentence boundary.
+		</constant>
 	</constants>
 </class>
@@ -1441,6 +1441,14 @@
 				Aligns shaped text to the given tab-stops.
 			</description>
 		</method>
+		<method name="string_get_word_breaks" qualifiers="const">
+			<return type="PackedInt32Array" />
+			<argument index="0" name="string" type="String" />
+			<argument index="1" name="language" type="String" default="&quot;&quot;" />
+			<description>
+				Returns array of the word break character offsets.
+			</description>
+		</method>
 		<method name="string_to_lower" qualifiers="const">
 			<return type="String" />
 			<argument index="0" name="string" type="String" />

@@ -1461,6 +1461,14 @@
 				[b]Note:[/b] This method is used by default line/word breaking methods, and its implementation might be omitted if custom line breaking in implemented.
 			</description>
 		</method>
+		<method name="string_get_word_breaks" qualifiers="virtual const">
+			<return type="PackedInt32Array" />
+			<argument index="0" name="string" type="String" />
+			<argument index="1" name="language" type="String" />
+			<description>
+				Returns array of the word break character offsets.
+			</description>
+		</method>
 		<method name="string_to_lower" qualifiers="virtual const">
 			<return type="String" />
 			<argument index="0" name="string" type="String" />

@@ -3255,6 +3255,19 @@ void TextServerAdvanced::font_set_global_oversampling(double p_oversampling) {
 /* Shaped text buffer interface                                          */
 /*************************************************************************/
 
+int64_t TextServerAdvanced::_convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const {
+	int64_t limit = p_pos;
+	if (p_utf32.length() != p_utf16.length()) {
+		const UChar *data = p_utf16.ptr();
+		for (int i = 0; i < p_pos; i++) {
+			if (U16_IS_LEAD(data[i])) {
+				limit--;
+			}
+		}
+	}
+	return limit;
+}
+
 int64_t TextServerAdvanced::_convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const {
 	int64_t limit = p_pos;
 	if (p_sd->text.length() != p_sd->utf16.length()) {
@@ -5555,6 +5568,53 @@ String TextServerAdvanced::string_to_lower(const String &p_string, const String
 	return String::utf16(lower.ptr(), len);
 }
 
+PackedInt32Array TextServerAdvanced::string_get_word_breaks(const String &p_string, const String &p_language) const {
+	// Convert to UTF-16.
+	Char16String utf16 = p_string.utf16();
+
+	Set<int> breaks;
+	UErrorCode err = U_ZERO_ERROR;
+	UBreakIterator *bi = ubrk_open(UBRK_LINE, p_language.ascii().get_data(), (const UChar *)utf16.ptr(), utf16.length(), &err);
+	if (U_FAILURE(err)) {
+		// No data loaded - use fallback.
+		for (int i = 0; i < p_string.length(); i++) {
+			char32_t c = p_string[i];
+			if (is_whitespace(c) || is_linebreak(c)) {
+				breaks.insert(i);
+			}
+		}
+	} else {
+		while (ubrk_next(bi) != UBRK_DONE) {
+			int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
+			if (pos != p_string.length() - 1) {
+				breaks.insert(pos);
+			}
+		}
+	}
+	ubrk_close(bi);
+
+	PackedInt32Array ret;
+	for (int i = 0; i < p_string.length(); i++) {
+		char32_t c = p_string[i];
+		if (c == 0xfffc) {
+			continue;
+		}
+		if (u_ispunct(c) && c != 0x005F) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_underscore(c)) {
+			ret.push_back(i);
+			continue;
+		}
+		if (breaks.has(i)) {
+			ret.push_back(i);
+			continue;
+		}
+	}
+	return ret;
+}
+
 TextServerAdvanced::TextServerAdvanced() {
 	_insert_num_systems_lang();
 	_insert_feature_sets();

@@ -393,11 +393,13 @@ class TextServerAdvanced : public TextServerExtension {
 	mutable RID_PtrOwner<ShapedTextDataAdvanced> shaped_owner;
 
 	void _realign(ShapedTextDataAdvanced *p_sd) const;
+	int64_t _convert_pos(const String &p_utf32, const Char16String &p_utf16, int64_t p_pos) const;
 	int64_t _convert_pos(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;
 	int64_t _convert_pos_inv(const ShapedTextDataAdvanced *p_sd, int64_t p_pos) const;
 	bool _shape_substr(ShapedTextDataAdvanced *p_new_sd, const ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_length) const;
 	void _shape_run(ShapedTextDataAdvanced *p_sd, int64_t p_start, int64_t p_end, hb_script_t p_script, hb_direction_t p_direction, Array p_fonts, int64_t p_span, int64_t p_fb_index);
 	Glyph _shape_single_glyph(ShapedTextDataAdvanced *p_sd, char32_t p_char, hb_script_t p_script, hb_direction_t p_direction, const RID &p_font, int64_t p_font_size);
+
 	_FORCE_INLINE_ void _add_featuers(const Dictionary &p_source, Vector<hb_feature_t> &r_ftrs);
 
 	// HarfBuzz bitmap font interface.
@@ -686,6 +688,8 @@ class TextServerAdvanced : public TextServerExtension {
 	virtual String parse_number(const String &p_string, const String &p_language = "") const override;
 	virtual String percent_sign(const String &p_language = "") const override;
 
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
+
 	virtual String strip_diacritics(const String &p_string) const override;
 
 	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;

@@ -3079,7 +3079,7 @@ bool TextServerFallback::shaped_text_update_breaks(const RID &p_shaped) {
 		if (sd_glyphs[i].count > 0) {
 			char32_t c = sd->text[sd_glyphs[i].start - sd->start];
 			if (c_punct_size == 0) {
-				if (is_punct(c)) {
+				if (is_punct(c) && c != 0x005F) {
 					sd_glyphs[i].flags |= GRAPHEME_IS_PUNCTUATION;
 				}
 			} else {
@@ -3623,6 +3623,29 @@ String TextServerFallback::string_to_lower(const String &p_string, const String
 	return lower;
 }
 
+PackedInt32Array TextServerFallback::string_get_word_breaks(const String &p_string, const String &p_language) const {
+	PackedInt32Array ret;
+	for (int i = 0; i < p_string.length(); i++) {
+		char32_t c = p_string[i];
+		if (c == 0xfffc) {
+			continue;
+		}
+		if (is_punct(c) && c != 0x005F) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_underscore(c)) {
+			ret.push_back(i);
+			continue;
+		}
+		if (is_whitespace(c) || is_linebreak(c)) {
+			ret.push_back(i);
+			continue;
+		}
+	}
+	return ret;
+}
+
 TextServerFallback::TextServerFallback() {
 	_insert_feature_sets();
 };

@@ -573,6 +573,8 @@ class TextServerFallback : public TextServerExtension {
 	virtual double shaped_text_get_underline_position(const RID &p_shaped) const override;
 	virtual double shaped_text_get_underline_thickness(const RID &p_shaped) const override;
 
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
+
 	virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;
 	virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;
 

@@ -8,6 +8,7 @@ android_files = [
     "file_access_android.cpp",
     "audio_driver_opensl.cpp",
     "dir_access_jandroid.cpp",
+    "tts_android.cpp",
     "thread_jandroid.cpp",
     "net_socket_android.cpp",
     "java_godot_lib_jni.cpp",

@@ -34,6 +34,7 @@
 #include "java_godot_io_wrapper.h"
 #include "java_godot_wrapper.h"
 #include "os_android.h"
+#include "tts_android.h"
 
 #if defined(VULKAN_ENABLED)
 #include "drivers/vulkan/rendering_device_vulkan.h"
@@ -63,6 +64,7 @@ bool DisplayServerAndroid::has_feature(Feature p_feature) const {
 		case FEATURE_ORIENTATION:
 		case FEATURE_TOUCHSCREEN:
 		case FEATURE_VIRTUAL_KEYBOARD:
+		case FEATURE_TEXT_TO_SPEECH:
 			return true;
 		default:
 			return false;
@@ -73,6 +75,34 @@ String DisplayServerAndroid::get_name() const {
 	return "Android";
 }
 
+bool DisplayServerAndroid::tts_is_speaking() const {
+	return TTS_Android::is_speaking();
+}
+
+bool DisplayServerAndroid::tts_is_paused() const {
+	return TTS_Android::is_paused();
+}
+
+Array DisplayServerAndroid::tts_get_voices() const {
+	return TTS_Android::get_voices();
+}
+
+void DisplayServerAndroid::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	TTS_Android::speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void DisplayServerAndroid::tts_pause() {
+	TTS_Android::pause();
+}
+
+void DisplayServerAndroid::tts_resume() {
+	TTS_Android::resume();
+}
+
+void DisplayServerAndroid::tts_stop() {
+	TTS_Android::stop();
+}
+
 void DisplayServerAndroid::clipboard_set(const String &p_text) {
 	GodotJavaWrapper *godot_java = OS_Android::get_singleton()->get_godot_java();
 	ERR_FAIL_COND(!godot_java);

@@ -91,6 +91,15 @@ class DisplayServerAndroid : public DisplayServer {
 	virtual bool has_feature(Feature p_feature) const override;
 	virtual String get_name() const override;
 
+	virtual bool tts_is_speaking() const override;
+	virtual bool tts_is_paused() const override;
+	virtual Array tts_get_voices() const override;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
+	virtual void tts_pause() override;
+	virtual void tts_resume() override;
+	virtual void tts_stop() override;
+
 	virtual void clipboard_set(const String &p_text) override;
 	virtual String clipboard_get() const override;
 	virtual bool clipboard_has() const override;