Skip to content

Commit

Permalink
feat: [texttospeech] Add low latency journey option to proto (#5742)
Browse files Browse the repository at this point in the history
* feat: Add low latency journey option to proto
feat: Add CustomPronunciationParams for upcoming feature work

PiperOrigin-RevId: 684207737

Source-Link: googleapis/googleapis@d6f9dbc

Source-Link: googleapis/googleapis-gen@42ac4d7
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiNDJhYzRkNzViZWMxYjlkZWViYjA2OGZmODQyYzk1YTgxZGE5YjRkMCJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: Add low latency journey option to proto
feat: Add CustomPronunciationParams for upcoming feature work

PiperOrigin-RevId: 684229364

Source-Link: googleapis/googleapis@10b8dc3

Source-Link: googleapis/googleapis-gen@deaa9f3
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiZGVhYTlmMzE5ZjFjNmMxOWE5ZDM2ZWEyMzNhZTMwNzU2NmI2MGMwOSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] authored Oct 10, 2024
1 parent cec22ab commit d34249c
Show file tree
Hide file tree
Showing 12 changed files with 4,842 additions and 2,103 deletions.
4 changes: 2 additions & 2 deletions packages/google-cloud-texttospeech/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Google APIs Client Libraries, in [Client Libraries Explained][explained].
1. [Select or create a Cloud Platform project][projects].
1. [Enable billing for your project][billing].
1. [Enable the Google Cloud Text-to-Speech API][enable_api].
1. [Set up authentication with a service account][auth] so you can access the
1. [Set up authentication][auth] so you can access the
API from your local workstation.

### Installing the client library
Expand Down Expand Up @@ -175,4 +175,4 @@ See [LICENSE](https://github.com/googleapis/google-cloud-node/blob/main/LICENSE)
[projects]: https://console.cloud.google.com/project
[billing]: https://support.google.com/cloud/answer/6293499#enable-billing
[enable_api]: https://console.cloud.google.com/flows/enableapi?apiid=texttospeech.googleapis.com
[auth]: https://cloud.google.com/docs/authentication/getting-started
[auth]: https://cloud.google.com/docs/authentication/external/set-up-adc-local
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,13 @@ message Voice {
int32 natural_sample_rate_hertz = 4;
}

// Used for advanced voice options.
message AdvancedVoiceOptions {
// Only for Journey voices. If false, the synthesis will be context aware
// and have higher latency.
optional bool low_latency_journey_synthesis = 1;
}

// The top-level message sent by the client for the `SynthesizeSpeech` method.
message SynthesizeSpeechRequest {
// Required. The Synthesizer requires either plain text or SSML as input.
Expand All @@ -161,6 +168,44 @@ message SynthesizeSpeechRequest {

// Required. The configuration of the synthesized audio.
AudioConfig audio_config = 3 [(google.api.field_behavior) = REQUIRED];

// Advanced voice options.
optional AdvancedVoiceOptions advanced_voice_options = 8;
}

// Pronunciation customization for a phrase.
message CustomPronunciationParams {
// The phonetic encoding of the phrase.
enum PhoneticEncoding {
// Not specified.
PHONETIC_ENCODING_UNSPECIFIED = 0;

// IPA. (e.g. apple -> ˈæpəl )
// https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
PHONETIC_ENCODING_IPA = 1;

// X-SAMPA (e.g. apple -> "{p@l" )
// https://en.wikipedia.org/wiki/X-SAMPA
PHONETIC_ENCODING_X_SAMPA = 2;
}

// The phrase to which the customization will be applied.
// The phrase can be multiple words (in the case of proper nouns etc), but
// should not span to a whole sentence.
optional string phrase = 1;

// The phonetic encoding of the phrase.
optional PhoneticEncoding phonetic_encoding = 2;

// The pronunciation of the phrase. This must be in the phonetic encoding
// specified above.
optional string pronunciation = 3;
}

// A collection of pronunciation customizations.
message CustomPronunciations {
// The pronunciation customizations to be applied.
repeated CustomPronunciationParams pronunciations = 1;
}

// Contains text input to be synthesized. Either `text` or `ssml` must be
Expand All @@ -180,6 +225,21 @@ message SynthesisInput {
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
string ssml = 2;
}

// Optional. The pronunciation customizations to be applied to the input. If
// this is set, the input will be synthesized using the given pronunciation
// customizations.
//
// The initial support will be for EFIGS (English, French,
// Italian, German, Spanish) languages, as provided in
// VoiceSelectionParams. Journey and Instant Clone voices are
// not supported yet.
//
// In order to customize the pronunciation of a phrase, there must be an exact
// match of the phrase in the input types. If using SSML, the phrase must not
// be inside a phoneme tag (entirely or partially).
CustomPronunciations custom_pronunciations = 3
[(google.api.field_behavior) = OPTIONAL];
}

// Description of which voice to use for a synthesis request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,13 @@ message Voice {
int32 natural_sample_rate_hertz = 4;
}

// Used for advanced voice options.
message AdvancedVoiceOptions {
// Only for Journey voices. If false, the synthesis will be context aware
// and have higher latency.
optional bool low_latency_journey_synthesis = 1;
}

// The top-level message sent by the client for the `SynthesizeSpeech` method.
message SynthesizeSpeechRequest {
// The type of timepoint information that is returned in the response.
Expand All @@ -176,6 +183,44 @@ message SynthesizeSpeechRequest {

// Whether and what timepoints are returned in the response.
repeated TimepointType enable_time_pointing = 4;

// Advanced voice options.
optional AdvancedVoiceOptions advanced_voice_options = 8;
}

// Pronunciation customization for a phrase.
message CustomPronunciationParams {
// The phonetic encoding of the phrase.
enum PhoneticEncoding {
// Not specified.
PHONETIC_ENCODING_UNSPECIFIED = 0;

// IPA. (e.g. apple -> ˈæpəl )
// https://en.wikipedia.org/wiki/International_Phonetic_Alphabet
PHONETIC_ENCODING_IPA = 1;

// X-SAMPA (e.g. apple -> "{p@l" )
// https://en.wikipedia.org/wiki/X-SAMPA
PHONETIC_ENCODING_X_SAMPA = 2;
}

// The phrase to which the customization will be applied.
// The phrase can be multiple words (in the case of proper nouns etc), but
// should not span to a whole sentence.
optional string phrase = 1;

// The phonetic encoding of the phrase.
optional PhoneticEncoding phonetic_encoding = 2;

// The pronunciation of the phrase. This must be in the phonetic encoding
// specified above.
optional string pronunciation = 3;
}

// A collection of pronunciation customizations.
message CustomPronunciations {
// The pronunciation customizations to be applied.
repeated CustomPronunciationParams pronunciations = 1;
}

// Contains text input to be synthesized. Either `text` or `ssml` must be
Expand All @@ -195,6 +240,21 @@ message SynthesisInput {
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
string ssml = 2;
}

// Optional. The pronunciation customizations to be applied to the input. If
// this is set, the input will be synthesized using the given pronunciation
// customizations.
//
// The initial support will be for EFIGS (English, French,
// Italian, German, Spanish) languages, as provided in
// VoiceSelectionParams. Journey and Instant Clone voices are
// not supported yet.
//
// In order to customize the pronunciation of a phrase, there must be an exact
// match of the phrase in the input types. If using SSML, the phrase must not
// be inside a phoneme tag (entirely or partially).
CustomPronunciations custom_pronunciations = 3
[(google.api.field_behavior) = OPTIONAL];
}

// Description of which voice to use for a synthesis request.
Expand Down
Loading

0 comments on commit d34249c

Please sign in to comment.