Skip to content

Commit

Permalink
Support audio input for OpenAI models
Browse files Browse the repository at this point in the history
Reviewed By: laurendeason

Differential Revision: D67126541

fbshipit-source-id: f6a229ae41a3fb127f9c00874609e9d2953bf23b
  • Loading branch information
onionymous authored and facebook-github-bot committed Dec 12, 2024
1 parent e1db0e1 commit 51fdd2c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
7 changes: 7 additions & 0 deletions CybersecurityBenchmarks/benchmark/benchmark_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import io
import re
import subprocess
from pathlib import Path

from typing import Optional

Expand Down Expand Up @@ -93,6 +94,12 @@ def image_to_b64(image_path: str, image_format: str = "PNG") -> str:
return img_base64.decode("utf-8")


def audio_to_b64(audio_path: str) -> str:
audio_bytes = Path(audio_path).read_bytes()
audio_b64 = base64.b64encode(audio_bytes)
return audio_b64.decode("utf-8")


def get_mime_type(file_path: str) -> Optional[str]:
result = subprocess.run(
["file", "--mime-type", "-b", file_path], capture_output=True, text=True
Expand Down
12 changes: 10 additions & 2 deletions CybersecurityBenchmarks/benchmark/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from typing_extensions import override

from .benchmark_utils import image_to_b64
from .benchmark_utils import audio_to_b64, image_to_b64
from .cache_handler import CacheHandler


Expand Down Expand Up @@ -625,7 +625,15 @@ def query_multimodal(
)

if audio_path:
raise NotImplementedError("Audio input in OpenAI is not yet supported")
user_message_content.append(
{
"type": "input_audio",
"input_audio": {
"data": audio_to_b64(str(audio_path)),
"format": "wav",
},
}
)

messages.append(
{
Expand Down

0 comments on commit 51fdd2c

Please sign in to comment.