Skip to content

Commit

Permalink
feat: [texttospeech] add multi-speaker markup, which allows generatin…
Browse files Browse the repository at this point in the history
…g dialogue between multiple speakers (#5760)

* feat: Add brand voice lite

PiperOrigin-RevId: 687058189

Source-Link: googleapis/googleapis@5ebb5c1

Source-Link: googleapis/googleapis-gen@293badf
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiMjkzYmFkZmQ2MTQ1OTdiYTFkZDE1MTZkYjU4MDk0NTU3ZjdiNGM3ZSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add brand voice lite, which lets you clone a voice with just 10 seconds of audio

PiperOrigin-RevId: 688491221

Source-Link: googleapis/googleapis@60688dc

Source-Link: googleapis/googleapis-gen@4072c99
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiNDA3MmM5OTliMzZiYWEyMWNjYjI5YzVmNWQ1NjA4OTYyODQwYjMxYSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add multi-speaker markup, which allows generating dialogue between multiple speakers

PiperOrigin-RevId: 689444598

Source-Link: googleapis/googleapis@56f5fa4

Source-Link: googleapis/googleapis-gen@532d282
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiNTMyZDI4MjkxMmIxZWFmMTQxNTgxN2MwNjhlY2UyZTM1NGYwMDU1ZSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add multi-speaker markup, which allows generating dialogue between multiple speakers

PiperOrigin-RevId: 690597076

Source-Link: googleapis/googleapis@3bc81b7

Source-Link: googleapis/googleapis-gen@a2253a0
Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiYTIyNTNhMDFlNmU5M2U3OWExZGVjN2UwYjJhZWVhNzA4YjYzNGQxZSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: sofisl <55454395+sofisl@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 30, 2024
1 parent 96bb1b5 commit f9dea89
Show file tree
Hide file tree
Showing 7 changed files with 2,353 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.TextToSpeech.V1";
option go_package = "cloud.google.com/go/texttospeech/apiv1/texttospeechpb;texttospeechpb";
option java_multiple_files = true;
Expand Down Expand Up @@ -208,6 +207,22 @@ message CustomPronunciations {
repeated CustomPronunciationParams pronunciations = 1;
}

// A collection of turns for multi-speaker synthesis.
message MultiSpeakerMarkup {
// A Multi-speaker turn.
message Turn {
// Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer
// to documentation for available speakers.
string speaker = 1 [(google.api.field_behavior) = REQUIRED];

// Required. The text to speak.
string text = 2 [(google.api.field_behavior) = REQUIRED];
}

// Required. Speaker turns.
repeated Turn turns = 1 [(google.api.field_behavior) = REQUIRED];
}

// Contains text input to be synthesized. Either `text` or `ssml` must be
// supplied. Supplying both or neither returns
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. The
Expand All @@ -224,6 +239,10 @@ message SynthesisInput {
// more information, see
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
string ssml = 2;

// The multi-speaker input to be synthesized. Only applicable for
// multi-speaker synthesis.
MultiSpeakerMarkup multi_speaker_markup = 4;
}

// Optional. The pronunciation customizations to be applied to the input. If
Expand Down Expand Up @@ -273,6 +292,11 @@ message VoiceSelectionParams {
// the service will choose the custom voice matching the specified
// configuration.
CustomVoiceParams custom_voice = 4;

// Optional. The configuration for a voice clone. If
// [VoiceCloneParams.voice_clone_key] is set, the service will choose the
// voice clone matching the specified configuration.
VoiceCloneParams voice_clone = 5 [(google.api.field_behavior) = OPTIONAL];
}

// Description of audio data to be synthesized.
Expand Down Expand Up @@ -360,6 +384,12 @@ message CustomVoiceParams {
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
}

// The configuration of Voice Clone feature.
message VoiceCloneParams {
// Required. Created by GenerateVoiceCloningKey.
string voice_cloning_key = 1 [(google.api.field_behavior) = REQUIRED];
}

// The message returned to the client by the `SynthesizeSpeech` method.
message SynthesizeSpeechResponse {
// The audio data bytes encoded as specified in the request, including the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import "google/cloud/texttospeech/v1/cloud_tts.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.TextToSpeech.V1";
option go_package = "cloud.google.com/go/texttospeech/apiv1/texttospeechpb;texttospeechpb";
option java_multiple_files = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.TextToSpeech.V1Beta1";
option go_package = "cloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;texttospeechpb";
option java_multiple_files = true;
Expand Down Expand Up @@ -223,6 +222,22 @@ message CustomPronunciations {
repeated CustomPronunciationParams pronunciations = 1;
}

// A collection of turns for multi-speaker synthesis.
message MultiSpeakerMarkup {
// A Multi-speaker turn.
message Turn {
// Required. The speaker of the turn, for example, 'O' or 'Q'. Please refer
// to documentation for available speakers.
string speaker = 1 [(google.api.field_behavior) = REQUIRED];

// Required. The text to speak.
string text = 2 [(google.api.field_behavior) = REQUIRED];
}

// Required. Speaker turns.
repeated Turn turns = 1 [(google.api.field_behavior) = REQUIRED];
}

// Contains text input to be synthesized. Either `text` or `ssml` must be
// supplied. Supplying both or neither returns
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. The
Expand All @@ -239,6 +254,10 @@ message SynthesisInput {
// more information, see
// [SSML](https://cloud.google.com/text-to-speech/docs/ssml).
string ssml = 2;

// The multi-speaker input to be synthesized. Only applicable for
// multi-speaker synthesis.
MultiSpeakerMarkup multi_speaker_markup = 4;
}

// Optional. The pronunciation customizations to be applied to the input. If
Expand Down Expand Up @@ -288,6 +307,11 @@ message VoiceSelectionParams {
// the service will choose the custom voice matching the specified
// configuration.
CustomVoiceParams custom_voice = 4;

// Optional. The configuration for a voice clone. If
// [VoiceCloneParams.voice_clone_key] is set, the service will choose the
// voice clone matching the specified configuration.
VoiceCloneParams voice_clone = 5 [(google.api.field_behavior) = OPTIONAL];
}

// Description of audio data to be synthesized.
Expand Down Expand Up @@ -375,6 +399,12 @@ message CustomVoiceParams {
[deprecated = true, (google.api.field_behavior) = OPTIONAL];
}

// The configuration of Voice Clone feature.
message VoiceCloneParams {
// Required. Created by GenerateVoiceCloningKey.
string voice_cloning_key = 1 [(google.api.field_behavior) = REQUIRED];
}

// The message returned to the client by the `SynthesizeSpeech` method.
message SynthesizeSpeechResponse {
// The audio data bytes encoded as specified in the request, including the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import "google/cloud/texttospeech/v1beta1/cloud_tts.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";

option cc_enable_arenas = true;
option csharp_namespace = "Google.Cloud.TextToSpeech.V1Beta1";
option go_package = "cloud.google.com/go/texttospeech/apiv1beta1/texttospeechpb;texttospeechpb";
option java_multiple_files = true;
Expand Down
Loading

0 comments on commit f9dea89

Please sign in to comment.