Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ContentPart image media option updates #113

Merged
merged 1 commit into from
May 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion lib/chat_models/chat_anthropic.ex
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,32 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

def for_api(%ContentPart{type: :image} = part) do
media =
case Keyword.fetch!(part.options || [], :media) do
:png ->
"image/png"

:jpg ->
"image/jpeg"

:jpeg ->
"image/jpeg"

value when is_binary(value) ->
value

other ->
message = "Received unsupported media type for ContentPart: #{inspect(other)}"
Logger.error(message)
raise LangChainError, message
end

%{
"type" => "image",
"source" => %{
"type" => "base64",
"data" => part.content,
"media_type" => Keyword.fetch!(part.options, :media)
"media_type" => media
}
}
end
Expand Down
6 changes: 6 additions & 0 deletions lib/chat_models/chat_open_ai.ex
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ defmodule LangChain.ChatModels.ChatOpenAI do
type when is_binary(type) ->
"data:#{type};base64,"

type when type in [:jpeg, :jpg] ->
"data:image/jpg;base64,"

:png ->
"data:image/png;base64,"

other ->
message = "Received unsupported media type for ContentPart: #{inspect(other)}"
Logger.error(message)
Expand Down
21 changes: 19 additions & 2 deletions lib/message/content_part.ex
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,25 @@ defmodule LangChain.Message.ContentPart do

- `:content` - Text content.
- `:options` - Options that may be specific to the LLM for a particular
message type. For example, Anthropic requires an image's `media_type` to be
provided by the caller. This can be provided using `media: "image/png"`.
message type. For example, multi-modal message (ones that include image
data) use the `:media` option to specify the mimetype information.

## Image mime types

The `:media` option is used to specify the mime type of the image. Various
LLMs handle this differently or perhaps not at all.

Examples:

- `media: :jpg` - turns into `"image/jpeg"` or `"image/jpg"`, depending on
what the LLM accepts.
- `media: :png` - turns into `"image/png"`
- `media: "image/webp" - stays as `"image/webp"`. Any specified string value
is passed through unchanged. This allows for future formats to be supported
quickly.
- When omitted, the LLM may error or some will accept it but may require the
`base64` encoded content data to be prefixed with the mime type information.
Basically, you must handle the content needs yourself.

"""
use Ecto.Schema
Expand Down
13 changes: 7 additions & 6 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
%{
"abacus": {:hex, :abacus, "2.0.0", "bfc3a382e9d557198a82f3949e440ff297cae41dd22c1d3939272f5b7ef46ae1", [:mix], [], "hexpm", "5ce1a085c1182341a7924ce436f493b464688469024c4fad716da8744b458ecd"},
"castore": {:hex, :castore, "1.0.5", "9eeebb394cc9a0f3ae56b813459f990abb0a3dedee1be6b27fdb50301930502f", [:mix], [], "hexpm", "8d7c597c3e4a64c395980882d4bca3cebb8d74197c590dc272cfd3b6a6310578"},
"castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
"complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
"ecto": {:hex, :ecto, "3.11.1", "4b4972b717e7ca83d30121b12998f5fcdc62ba0ed4f20fd390f16f3270d85c3e", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebd3d3772cd0dfcd8d772659e41ed527c28b2a8bde4b00fe03e0463da0f1983b"},
"elixir_make": {:hex, :elixir_make, "0.7.8", "505026f266552ee5aabca0b9f9c229cbb496c689537c9f922f3eb5431157efc7", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "7a71945b913d37ea89b06966e1342c85cfe549b15e6d6d081e8081c493062c07"},
"ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"},
"expo": {:hex, :expo, "0.4.1", "1c61d18a5df197dfda38861673d392e642649a9cef7694d2f97a587b2cfb319b", [:mix], [], "hexpm", "2ff7ba7a798c8c543c12550fa0e2cbc81b95d4974c65855d8d15ba7b37a1ce47"},
"finch": {:hex, :finch, "0.17.0", "17d06e1d44d891d20dbd437335eebe844e2426a0cd7e3a3e220b461127c73f70", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "8d014a661bb6a437263d4b5abf0bcbd3cf0deb26b1e8596f2a271d22e48934c7"},
"finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"},
"gettext": {:hex, :gettext, "0.22.3", "c8273e78db4a0bb6fba7e9f0fd881112f349a3117f7f7c598fa18c66c888e524", [:mix], [{:expo, "~> 0.4.0", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "935f23447713954a6866f1bb28c3a878c4c011e802bcd68a726f5e558e4b64bd"},
"hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"},
"hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"},
"jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
"makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"},
"mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
"mint": {:hex, :mint, "1.5.2", "4805e059f96028948870d23d7783613b7e6b0e2fb4e98d720383852a760067fd", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "d77d9e9ce4eb35941907f1d3df38d8f750c357865353e21d335bdcdf6d892a02"},
"mint": {:hex, :mint, "1.6.0", "88a4f91cd690508a04ff1c3e28952f322528934be541844d54e0ceb765f01d5e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "3c5ae85d90a5aca0a49c0d8b67360bbe407f3b54f1030a111047ff988e8fefaa"},
"nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"},
"nimble_ownership": {:hex, :nimble_ownership, "0.3.1", "99d5244672fafdfac89bfad3d3ab8f0d367603ce1dc4855f86a1c75008bce56f", [:mix], [], "hexpm", "4bf510adedff0449a1d6e200e43e57a814794c8b5b6439071274d248d272a549"},
"nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
"nimble_pool": {:hex, :nimble_pool, "1.0.0", "5eb82705d138f4dd4423f69ceb19ac667b3b492ae570c9f5c900bb3d2f50a847", [:mix], [], "hexpm", "80be3b882d2d351882256087078e1b1952a28bf98d0a287be87e4a24a710b67a"},
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
"nx": {:hex, :nx, "0.7.1", "5f6376e3d18408116e8a84b8f4ac851fb07dfe61764a5410ebf0b5dcb69c1b7e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e3ddd6a3f2a9bac79c67b3933368c25bb5ec814a883fc68aba8fd8a236751777"},
"req": {:hex, :req, "0.4.8", "2b754a3925ddbf4ad78c56f30208ced6aefe111a7ea07fb56c23dccc13eb87ae", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "7146e51d52593bb7f20d00b5308a5d7d17d663d6e85cd071452b613a8277100c"},
"req": {:hex, :req, "0.4.14", "103de133a076a31044e5458e0f850d5681eef23dfabf3ea34af63212e3b902e2", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0 or ~> 0.3.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "2ddd3d33f9ab714ced8d3c15fd03db40c14dbf129003c4a3eb80fac2cc0b1b08"},
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
}
55 changes: 52 additions & 3 deletions test/chat_models/chat_anthropic_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
ChatAnthropic.for_api(
Message.new_user!([
ContentPart.text!("Tell me about this image:"),
ContentPart.image!("base64-text-data", media: "image/jpeg")
ContentPart.image!("base64-text-data", media: :jpeg)
])
)

Expand All @@ -715,11 +715,25 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
}

result =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: "image/png"))
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :png))

assert result == expected
end

test "turns image ContentPart's media_type into the expected value" do
assert %{"source" => %{"media_type" => "image/png"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :png))

assert %{"source" => %{"media_type" => "image/jpeg"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :jpg))

assert %{"source" => %{"media_type" => "image/jpeg"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :jpeg))

assert %{"source" => %{"media_type" => "image/webp"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: "image/webp"))
end

test "errors on ContentPart type image_url" do
assert_raise LangChain.LangChainError, "Anthropic does not support image_url", fn ->
ChatAnthropic.for_api(ContentPart.image_url!("url-to-image"))
Expand Down Expand Up @@ -1011,7 +1025,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
message =
Message.new_user!([
ContentPart.text!("Identify what this is a picture of:"),
ContentPart.image!(image_data, media: "image/jpeg")
ContentPart.image!(image_data, media: :jpg)
])

{:ok, response} = ChatAnthropic.call(chat, [message], [])
Expand Down Expand Up @@ -1176,5 +1190,40 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
assert_received {:streamed_fn, data}
assert %MessageDelta{role: :assistant} = data
end

@tag live_call: true, live_anthropic: true
test "supports starting the assistant's response message and continuing it" do
test_pid = self()

callback_fn = fn data ->
# IO.inspect(data, label: "DATA")
send(test_pid, {:streamed_fn, data})
end

{:ok, result_chain, last_message} =
LLMChain.new!(%{llm: %ChatAnthropic{model: @test_model, stream: true}})
|> LLMChain.add_message(Message.new_system!("You are a helpful and concise assistant."))
|> LLMChain.add_message(
Message.new_user!(
"What's the capitol of Norway? Please respond with the answer <answer>{{ANSWER}}</answer>."
)
)
|> LLMChain.add_message(Message.new_assistant!("<answer>"))
|> LLMChain.run(callback_fn: callback_fn)

assert last_message.content =~ "Oslo"
assert last_message.status == :complete
assert last_message.role == :assistant

# TODO: MERGE A CONTINUED Assistant message with the one we provided.

IO.inspect(result_chain, label: "FINAL CHAIN")
IO.inspect(last_message)

assert_received {:streamed_fn, data}
assert %MessageDelta{role: :assistant} = data

assert false
end
end
end
33 changes: 17 additions & 16 deletions test/chat_models/chat_open_ai_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -177,22 +177,23 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
assert result == expected
end

test "turns an image ContentPart with base64 media into the expected JSON format" do
expected = %{
"type" => "image_url",
"image_url" => %{"url" => "data:image/jpeg;base64,image_base64_data"}
}

result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/jpeg"))
assert result == expected

expected = %{
"type" => "image_url",
"image_url" => %{"url" => "data:image/png;base64,image_base64_data"}
}

result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/png"))
assert result == expected
test "turns ContentPart's media type the expected JSON values" do
expected = "data:image/jpg;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpg))
assert %{"image_url" => %{"url" => ^expected}} = result

expected = "data:image/jpg;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpeg))
assert %{"image_url" => %{"url" => ^expected}} = result

expected = "data:image/png;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :png))
assert %{"image_url" => %{"url" => ^expected}} = result

# an string value is passed through
expected = "data:file/pdf;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "file/pdf"))
assert %{"image_url" => %{"url" => ^expected}} = result
end

test "turns an image_url ContentPart into the expected JSON format" do
Expand Down
Loading