Skip to content

Commit

Permalink
feat: GenAI - Added audio_timestamp to GenerationConfig.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 689902378
  • Loading branch information
vertex-sdk-bot authored and copybara-github committed Oct 25, 2024
1 parent 1f3b2d8 commit 91c2120
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tests/system/vertexai/test_generative_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,18 @@ def test_generate_content_from_text_and_remote_video(self, api_endpoint_env_name
assert response.text
assert "Zootopia" in response.text

def test_generate_content_from_text_and_remote_audio(self, api_endpoint_env_name):
vision_model = generative_models.GenerativeModel(GEMINI_VISION_MODEL_NAME)
audio_part = generative_models.Part.from_uri(
uri="gs://cloud-samples-data/audio/speech_16k.wav",
mime_type="audio/wav",
)
response = vision_model.generate_content(
contents=["What is in the audio?", audio_part],
generation_config=generative_models.GenerationConfig(audio_timestamp=True),
)
assert response.text

def test_grounding_google_search_retriever(self, api_endpoint_env_name):
model = preview_generative_models.GenerativeModel(GEMINI_MODEL_NAME)
google_search_retriever_tool = (
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/vertexai/test_generative_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,33 @@ def test_generate_content(self, generative_models: generative_models):
)
assert response4.text

model5 = generative_models.GenerativeModel("gemini-1.5-pro-002")
response5 = model5.generate_content(
contents=[
generative_models.Part.from_uri(
"gs://cloud-samples-data/generative-ai/audio/pixel.mp3",
mime_type="audio/mpeg",
),
"What is the audio about?",
],
generation_config=generative_models.GenerationConfig(
audio_timestamp=True,
),
safety_settings=[
generative_models.SafetySetting(
category=generative_models.SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=generative_models.SafetySetting.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
method=generative_models.SafetySetting.HarmBlockMethod.SEVERITY,
),
generative_models.SafetySetting(
category=generative_models.SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=generative_models.SafetySetting.HarmBlockThreshold.BLOCK_ONLY_HIGH,
method=generative_models.SafetySetting.HarmBlockMethod.PROBABILITY,
),
],
)
assert response5.text

@mock.patch.object(
target=prediction_service.PredictionServiceClient,
attribute="generate_content",
Expand Down
4 changes: 4 additions & 0 deletions vertexai/generative_models/_generative_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,7 @@ def __init__(
response_mime_type: Optional[str] = None,
response_schema: Optional[Dict[str, Any]] = None,
seed: Optional[int] = None,
audio_timestamp: Optional[bool] = None,
routing_config: Optional["RoutingConfig"] = None,
logprobs: Optional[int] = None,
response_logprobs: Optional[bool] = None,
Expand Down Expand Up @@ -1712,6 +1713,7 @@ def __init__(
The model needs to be prompted to output the appropriate
response type, otherwise the behavior is undefined.
response_schema: Output response schema of the genreated candidate text.
audio_timestamp: If true, the timestamp of the audio will be included in the response.
routing_config: Model routing preference set in the request.
logprobs: Logit probabilities.
reponse_logprobs: If true, export the logprobs results in response.
Expand All @@ -1728,6 +1730,7 @@ def __init__(
max_output_tokens=100,
stop_sequences=["\n\n\n"],
seed=5,
audio_timestamp=True,
)
)
```
Expand All @@ -1750,6 +1753,7 @@ def __init__(
response_mime_type=response_mime_type,
response_schema=raw_schema,
seed=seed,
audio_timestamp=audio_timestamp,
logprobs=logprobs,
response_logprobs=response_logprobs,
)
Expand Down

0 comments on commit 91c2120

Please sign in to comment.