Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
- updated to use newer req
- ContentPart image :media option now supports abstractions for :png, :jpg, and :jpeg

Improves cross model support for images
  • Loading branch information
brainlid committed May 3, 2024
1 parent 8befbd2 commit 4faf4e3
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 28 deletions.
22 changes: 21 additions & 1 deletion lib/chat_models/chat_anthropic.ex
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,32 @@ defmodule LangChain.ChatModels.ChatAnthropic do
end

def for_api(%ContentPart{type: :image} = part) do
media =
case Keyword.fetch!(part.options || [], :media) do
:png ->
"image/png"

:jpg ->
"image/jpeg"

:jpeg ->
"image/jpeg"

value when is_binary(value) ->
value

other ->
message = "Received unsupported media type for ContentPart: #{inspect(other)}"
Logger.error(message)
raise LangChainError, message
end

%{
"type" => "image",
"source" => %{
"type" => "base64",
"data" => part.content,
"media_type" => Keyword.fetch!(part.options, :media)
"media_type" => media
}
}
end
Expand Down
6 changes: 6 additions & 0 deletions lib/chat_models/chat_open_ai.ex
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ defmodule LangChain.ChatModels.ChatOpenAI do
type when is_binary(type) ->
"data:#{type};base64,"

type when type in [:jpeg, :jpg] ->
"data:image/jpg;base64,"

:png ->
"data:image/png;base64,"

other ->
message = "Received unsupported media type for ContentPart: #{inspect(other)}"
Logger.error(message)
Expand Down
21 changes: 19 additions & 2 deletions lib/message/content_part.ex
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,25 @@ defmodule LangChain.Message.ContentPart do
- `:content` - Text content.
- `:options` - Options that may be specific to the LLM for a particular
message type. For example, Anthropic requires an image's `media_type` to be
provided by the caller. This can be provided using `media: "image/png"`.
message type. For example, multi-modal message (ones that include image
data) use the `:media` option to specify the mimetype information.
## Image mime types
The `:media` option is used to specify the mime type of the image. Various
LLMs handle this differently or perhaps not at all.
Examples:
- `media: :jpg` - turns into `"image/jpeg"` or `"image/jpg"`, depending on
what the LLM accepts.
- `media: :png` - turns into `"image/png"`
- `media: "image/webp" - stays as `"image/webp"`. Any specified string value
is passed through unchanged. This allows for future formats to be supported
quickly.
- When omitted, the LLM may error or some will accept it but may require the
`base64` encoded content data to be prefixed with the mime type information.
Basically, you must handle the content needs yourself.
"""
use Ecto.Schema
Expand Down
13 changes: 7 additions & 6 deletions mix.lock
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
%{
"abacus": {:hex, :abacus, "2.0.0", "bfc3a382e9d557198a82f3949e440ff297cae41dd22c1d3939272f5b7ef46ae1", [:mix], [], "hexpm", "5ce1a085c1182341a7924ce436f493b464688469024c4fad716da8744b458ecd"},
"castore": {:hex, :castore, "1.0.5", "9eeebb394cc9a0f3ae56b813459f990abb0a3dedee1be6b27fdb50301930502f", [:mix], [], "hexpm", "8d7c597c3e4a64c395980882d4bca3cebb8d74197c590dc272cfd3b6a6310578"},
"castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
"complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"},
"decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"},
"earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
"ecto": {:hex, :ecto, "3.11.1", "4b4972b717e7ca83d30121b12998f5fcdc62ba0ed4f20fd390f16f3270d85c3e", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ebd3d3772cd0dfcd8d772659e41ed527c28b2a8bde4b00fe03e0463da0f1983b"},
"elixir_make": {:hex, :elixir_make, "0.7.8", "505026f266552ee5aabca0b9f9c229cbb496c689537c9f922f3eb5431157efc7", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "7a71945b913d37ea89b06966e1342c85cfe549b15e6d6d081e8081c493062c07"},
"ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"},
"expo": {:hex, :expo, "0.4.1", "1c61d18a5df197dfda38861673d392e642649a9cef7694d2f97a587b2cfb319b", [:mix], [], "hexpm", "2ff7ba7a798c8c543c12550fa0e2cbc81b95d4974c65855d8d15ba7b37a1ce47"},
"finch": {:hex, :finch, "0.17.0", "17d06e1d44d891d20dbd437335eebe844e2426a0cd7e3a3e220b461127c73f70", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "8d014a661bb6a437263d4b5abf0bcbd3cf0deb26b1e8596f2a271d22e48934c7"},
"finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"},
"gettext": {:hex, :gettext, "0.22.3", "c8273e78db4a0bb6fba7e9f0fd881112f349a3117f7f7c598fa18c66c888e524", [:mix], [{:expo, "~> 0.4.0", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "935f23447713954a6866f1bb28c3a878c4c011e802bcd68a726f5e558e4b64bd"},
"hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"},
"hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"},
"jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
"makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.1", "cc9e3ca312f1cfeccc572b37a09980287e243648108384b97ff2b76e505c3555", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "e127a341ad1b209bd80f7bd1620a15693a9908ed780c3b763bccf7d200c767c6"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"},
"mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
"mint": {:hex, :mint, "1.5.2", "4805e059f96028948870d23d7783613b7e6b0e2fb4e98d720383852a760067fd", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "d77d9e9ce4eb35941907f1d3df38d8f750c357865353e21d335bdcdf6d892a02"},
"mint": {:hex, :mint, "1.6.0", "88a4f91cd690508a04ff1c3e28952f322528934be541844d54e0ceb765f01d5e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "3c5ae85d90a5aca0a49c0d8b67360bbe407f3b54f1030a111047ff988e8fefaa"},
"nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"},
"nimble_ownership": {:hex, :nimble_ownership, "0.3.1", "99d5244672fafdfac89bfad3d3ab8f0d367603ce1dc4855f86a1c75008bce56f", [:mix], [], "hexpm", "4bf510adedff0449a1d6e200e43e57a814794c8b5b6439071274d248d272a549"},
"nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
"nimble_pool": {:hex, :nimble_pool, "1.0.0", "5eb82705d138f4dd4423f69ceb19ac667b3b492ae570c9f5c900bb3d2f50a847", [:mix], [], "hexpm", "80be3b882d2d351882256087078e1b1952a28bf98d0a287be87e4a24a710b67a"},
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
"nx": {:hex, :nx, "0.7.1", "5f6376e3d18408116e8a84b8f4ac851fb07dfe61764a5410ebf0b5dcb69c1b7e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e3ddd6a3f2a9bac79c67b3933368c25bb5ec814a883fc68aba8fd8a236751777"},
"req": {:hex, :req, "0.4.8", "2b754a3925ddbf4ad78c56f30208ced6aefe111a7ea07fb56c23dccc13eb87ae", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "7146e51d52593bb7f20d00b5308a5d7d17d663d6e85cd071452b613a8277100c"},
"req": {:hex, :req, "0.4.14", "103de133a076a31044e5458e0f850d5681eef23dfabf3ea34af63212e3b902e2", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0 or ~> 0.3.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "2ddd3d33f9ab714ced8d3c15fd03db40c14dbf129003c4a3eb80fac2cc0b1b08"},
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
}
55 changes: 52 additions & 3 deletions test/chat_models/chat_anthropic_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
ChatAnthropic.for_api(
Message.new_user!([
ContentPart.text!("Tell me about this image:"),
ContentPart.image!("base64-text-data", media: "image/jpeg")
ContentPart.image!("base64-text-data", media: :jpeg)
])
)

Expand All @@ -715,11 +715,25 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
}

result =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: "image/png"))
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :png))

assert result == expected
end

test "turns image ContentPart's media_type into the expected value" do
assert %{"source" => %{"media_type" => "image/png"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :png))

assert %{"source" => %{"media_type" => "image/jpeg"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :jpg))

assert %{"source" => %{"media_type" => "image/jpeg"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: :jpeg))

assert %{"source" => %{"media_type" => "image/webp"}} =
ChatAnthropic.for_api(ContentPart.image!("image_base64_data", media: "image/webp"))
end

test "errors on ContentPart type image_url" do
assert_raise LangChain.LangChainError, "Anthropic does not support image_url", fn ->
ChatAnthropic.for_api(ContentPart.image_url!("url-to-image"))
Expand Down Expand Up @@ -1011,7 +1025,7 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
message =
Message.new_user!([
ContentPart.text!("Identify what this is a picture of:"),
ContentPart.image!(image_data, media: "image/jpeg")
ContentPart.image!(image_data, media: :jpg)
])

{:ok, response} = ChatAnthropic.call(chat, [message], [])
Expand Down Expand Up @@ -1176,5 +1190,40 @@ data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text
assert_received {:streamed_fn, data}
assert %MessageDelta{role: :assistant} = data
end

@tag live_call: true, live_anthropic: true
test "supports starting the assistant's response message and continuing it" do
test_pid = self()

callback_fn = fn data ->
# IO.inspect(data, label: "DATA")
send(test_pid, {:streamed_fn, data})
end

{:ok, result_chain, last_message} =
LLMChain.new!(%{llm: %ChatAnthropic{model: @test_model, stream: true}})
|> LLMChain.add_message(Message.new_system!("You are a helpful and concise assistant."))
|> LLMChain.add_message(
Message.new_user!(
"What's the capitol of Norway? Please respond with the answer <answer>{{ANSWER}}</answer>."
)
)
|> LLMChain.add_message(Message.new_assistant!("<answer>"))
|> LLMChain.run(callback_fn: callback_fn)

assert last_message.content =~ "Oslo"
assert last_message.status == :complete
assert last_message.role == :assistant

# TODO: MERGE A CONTINUED Assistant message with the one we provided.

IO.inspect(result_chain, label: "FINAL CHAIN")
IO.inspect(last_message)

assert_received {:streamed_fn, data}
assert %MessageDelta{role: :assistant} = data

assert false
end
end
end
33 changes: 17 additions & 16 deletions test/chat_models/chat_open_ai_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -177,22 +177,23 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
assert result == expected
end

test "turns an image ContentPart with base64 media into the expected JSON format" do
expected = %{
"type" => "image_url",
"image_url" => %{"url" => "data:image/jpeg;base64,image_base64_data"}
}

result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/jpeg"))
assert result == expected

expected = %{
"type" => "image_url",
"image_url" => %{"url" => "data:image/png;base64,image_base64_data"}
}

result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "image/png"))
assert result == expected
test "turns ContentPart's media type the expected JSON values" do
expected = "data:image/jpg;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpg))
assert %{"image_url" => %{"url" => ^expected}} = result

expected = "data:image/jpg;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :jpeg))
assert %{"image_url" => %{"url" => ^expected}} = result

expected = "data:image/png;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: :png))
assert %{"image_url" => %{"url" => ^expected}} = result

# an string value is passed through
expected = "data:file/pdf;base64,image_base64_data"
result = ChatOpenAI.for_api(ContentPart.image!("image_base64_data", media: "file/pdf"))
assert %{"image_url" => %{"url" => ^expected}} = result
end

test "turns an image_url ContentPart into the expected JSON format" do
Expand Down

0 comments on commit 4faf4e3

Please sign in to comment.