langchain-ai · isahers1 · Jan 16, 2025 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/js/src/client.ts b/js/src/client.ts
@@ -1,4 +1,4 @@
 import * as uuid from "uuid";

 import { AsyncCaller, AsyncCallerParams } from "./utils/async_caller.js";
 import {
@@ -429,7 +429,7 @@
    // If there is an item on the queue we were unable to pop,
    // just return it as a single batch.
    if (popped.length === 0 && this.items.length > 0) {
      const item = this.items.shift()!;
      popped.push(item);
      poppedSizeBytes += item.size;
      this.sizeBytes -= item.size;
@@ -862,7 +862,7 @@
        if (this._serverInfo === undefined) {
          try {
            this._serverInfo = await this._getServerInfo();
          } catch (e) {
            console.warn(
              `[WARNING]: LangSmith failed to fetch info on supported operations. Falling back to batch operations and default limits.`
            );
@@ -1597,7 +1597,7 @@
    treeFilter?: string;
    isRoot?: boolean;
    dataSourceType?: string;
  }): Promise<any> {
    let projectIds_ = projectIds || [];
    if (projectNames) {
      projectIds_ = [
@@ -1885,7 +1885,7 @@
        `Failed to list shared examples: ${response.status} ${response.statusText}`
      );
    }
    return result.map((example: any) => ({
      ...example,
      _hostUrl: this.getHostUrl(),
    }));
@@ -2022,7 +2022,7 @@
      }
      // projectId querying
      return true;
    } catch (e) {
      return false;
    }
  }
@@ -2770,6 +2770,7 @@
         (acc, [key, value]) => {
           acc[key.slice("attachment.".length)] = {
             presigned_url: value.presigned_url,
+            mime_type: value.mime_type,
           };
           return acc;
         },
@@ -2867,6 +2868,7 @@
             (acc, [key, value]) => {
               acc[key.slice("attachment.".length)] = {
                 presigned_url: value.presigned_url,
+                mime_type: value.mime_type || undefined,
               };
               return acc;
             },
@@ -3397,7 +3399,7 @@
  async _logEvaluationFeedback(
    evaluatorResponse: EvaluationResult | EvaluationResults,
    run?: Run,
    sourceInfo?: { [key: string]: any }
  ): Promise<[results: EvaluationResult[], feedbacks: Feedback[]]> {
    const evalResults: Array<EvaluationResult> =
      this._selectEvalResults(evaluatorResponse);
@@ -3436,7 +3438,7 @@
  public async logEvaluationFeedback(
    evaluatorResponse: EvaluationResult | EvaluationResults,
    run?: Run,
    sourceInfo?: { [key: string]: any }
  ): Promise<EvaluationResult[]> {
    const [results] = await this._logEvaluationFeedback(
      evaluatorResponse,
@@ -3932,7 +3934,7 @@

  public async createCommit(
    promptIdentifier: string,
    object: any,
    options?: {
      parentCommitHash?: string;
    }
@@ -4164,7 +4166,7 @@
      isPublic?: boolean;
      isArchived?: boolean;
    }
  ): Promise<Record<string, any>> {
    if (!(await this.promptExists(promptIdentifier))) {
      throw new Error("Prompt does not exist, you must create it first.");
    }
@@ -4175,7 +4177,7 @@
      throw await this._ownerConflictError("update a prompt", owner);
    }

    const payload: Record<string, any> = {};

    if (options?.description !== undefined)
      payload.description = options.description;

diff --git a/js/src/schemas.ts b/js/src/schemas.ts
@@ -65,6 +65,7 @@ export interface BaseExample {
 
 export interface AttachmentInfo {
   presigned_url: string;
+  mime_type?: string;
 }
 
 export type AttachmentData = Uint8Array | ArrayBuffer;
@@ -300,6 +301,7 @@ export interface Example extends BaseExample {
 interface RawAttachmentInfo {
   presigned_url: string;
   s3_url: string;
+  mime_type?: string;
 }
 export interface RawExample extends BaseExample {
   id: string;

diff --git a/js/src/tests/evaluate_attachments.int.test.ts b/js/src/tests/evaluate_attachments.int.test.ts
@@ -34,18 +34,22 @@ test("evaluate can handle examples with attachments", async () => {
     config?: TargetConfigT
   ) => {
     // Verify we receive the attachment data
-    if (!config?.attachments?.["image"]) {
+    if (!config?.attachments?.image) {
       throw new Error("Image attachment not found");
     }
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
+    const attachmentMimeType = config?.attachments?.image.mime_type;
+    if (attachmentMimeType !== "image/png") {
+      throw new Error("Image attachment has incorrect mime type");
+    }
     const attachmentData: Uint8Array | undefined = config?.attachments?.[
       "image"
     ].presigned_url
       ? new Uint8Array(
-          (await fetch(config?.attachments?.["image"].presigned_url).then(
-            (res) => res.arrayBuffer()
+          (await fetch(config?.attachments?.image.presigned_url).then((res) =>
+            res.arrayBuffer()
           )) as ArrayBuffer
         )
       : undefined;
@@ -57,14 +61,18 @@ test("evaluate can handle examples with attachments", async () => {
 
   const customEvaluator = async ({ attachments }: { attachments?: any }) => {
     expect(attachments).toBeDefined();
-    expect(attachments?.["image"]).toBeDefined();
+    expect(attachments?.image).toBeDefined();
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
-    const attachmentData: Uint8Array | undefined = attachments?.["image"]
+    const attachmentMimeType = attachments?.image.mime_type;
+    if (attachmentMimeType !== "image/png") {
+      throw new Error("Image attachment has incorrect mime type");
+    }
+    const attachmentData: Uint8Array | undefined = attachments?.image
       .presigned_url
       ? new Uint8Array(
-          (await fetch(attachments?.["image"].presigned_url).then((res) =>
+          (await fetch(attachments?.image.presigned_url).then((res) =>
             res.arrayBuffer()
           )) as ArrayBuffer
         )
@@ -134,14 +142,14 @@ test("evaluate with attachments not in target function", async () => {
 
   const customEvaluator = async ({ attachments }: { attachments?: any }) => {
     expect(attachments).toBeDefined();
-    expect(attachments?.["image"]).toBeDefined();
+    expect(attachments?.image).toBeDefined();
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
-    const attachmentData: Uint8Array | undefined = attachments?.["image"]
+    const attachmentData: Uint8Array | undefined = attachments?.image
       .presigned_url
       ? new Uint8Array(
-          (await fetch(attachments?.["image"].presigned_url).then((res) =>
+          (await fetch(attachments?.image.presigned_url).then((res) =>
             res.arrayBuffer()
           )) as ArrayBuffer
         )
@@ -210,7 +218,7 @@ test("multiple evaluators with attachments", async () => {
     config?: TargetConfigT
   ) => {
     // Verify we receive the attachment data
-    if (!config?.attachments?.["image"]) {
+    if (!config?.attachments?.image) {
       throw new Error("Image attachment not found");
     }
     const expectedData = new Uint8Array(
@@ -220,8 +228,8 @@ test("multiple evaluators with attachments", async () => {
       "image"
     ].presigned_url
       ? new Uint8Array(
-          (await fetch(config?.attachments?.["image"].presigned_url).then(
-            (res) => res.arrayBuffer()
+          (await fetch(config?.attachments?.image.presigned_url).then((res) =>
+            res.arrayBuffer()
           )) as ArrayBuffer
         )
       : undefined;
@@ -233,14 +241,14 @@ test("multiple evaluators with attachments", async () => {
 
   const customEvaluatorOne = async ({ attachments }: { attachments?: any }) => {
     expect(attachments).toBeDefined();
-    expect(attachments?.["image"]).toBeDefined();
+    expect(attachments?.image).toBeDefined();
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
-    const attachmentData: Uint8Array | undefined = attachments?.["image"]
+    const attachmentData: Uint8Array | undefined = attachments?.image
       .presigned_url
       ? new Uint8Array(
-          (await fetch(attachments?.["image"].presigned_url).then((res) =>
+          (await fetch(attachments?.image.presigned_url).then((res) =>
             res.arrayBuffer()
           )) as ArrayBuffer
         )
@@ -256,14 +264,14 @@ test("multiple evaluators with attachments", async () => {
 
   const customEvaluatorTwo = async ({ attachments }: { attachments?: any }) => {
     expect(attachments).toBeDefined();
-    expect(attachments?.["image"]).toBeDefined();
+    expect(attachments?.image).toBeDefined();
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
-    const attachmentData: Uint8Array | undefined = attachments?.["image"]
+    const attachmentData: Uint8Array | undefined = attachments?.image
       .presigned_url
       ? new Uint8Array(
-          (await fetch(attachments?.["image"].presigned_url).then((res) =>
+          (await fetch(attachments?.image.presigned_url).then((res) =>
             res.arrayBuffer()
           )) as ArrayBuffer
         )
@@ -329,7 +337,7 @@ test("evaluate with attachments runnable target function", async () => {
   await client.uploadExamplesMultipart(dataset.id, [example]);
 
   const myFunction = async (_input: any, config?: any) => {
-    if (!config?.attachments?.["image"]) {
+    if (!config?.attachments?.image) {
       throw new Error("Image attachment not found");
     }
     const expectedData = new Uint8Array(
@@ -339,8 +347,8 @@ test("evaluate with attachments runnable target function", async () => {
       "image"
     ].presigned_url
       ? new Uint8Array(
-          (await fetch(config?.attachments?.["image"].presigned_url).then(
-            (res) => res.arrayBuffer()
+          (await fetch(config?.attachments?.image.presigned_url).then((res) =>
+            res.arrayBuffer()
           )) as ArrayBuffer
         )
       : undefined;
@@ -355,14 +363,14 @@ test("evaluate with attachments runnable target function", async () => {
 
   const customEvaluator = async ({ attachments }: { attachments?: any }) => {
     expect(attachments).toBeDefined();
-    expect(attachments?.["image"]).toBeDefined();
+    expect(attachments?.image).toBeDefined();
     const expectedData = new Uint8Array(
       Buffer.from("fake image data for testing")
     );
-    const attachmentData: Uint8Array | undefined = attachments?.["image"]
+    const attachmentData: Uint8Array | undefined = attachments?.image
       .presigned_url
       ? new Uint8Array(
-          (await fetch(attachments?.["image"].presigned_url).then((res) =>
+          (await fetch(attachments?.image.presigned_url).then((res) =>
             res.arrayBuffer()
           )) as ArrayBuffer
         )

diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
@@ -2,8 +2,10 @@
 
 import itertools
 import logging
+import os
 import uuid
-from typing import Literal, Optional, Union, cast
+from io import BufferedReader
+from typing import Dict, Literal, Optional, Union, cast
 
 from langsmith import schemas as ls_schemas
 from langsmith._internal import _orjson
@@ -212,9 +214,9 @@ def serialized_feedback_operation_to_multipart_parts_and_context(
 
 def serialized_run_operation_to_multipart_parts_and_context(
     op: SerializedRunOperation,
-) -> MultipartPartsAndContext:
+) -> tuple[MultipartPartsAndContext, Dict[str, BufferedReader]]:
     acc_parts: list[MultipartPart] = []
-
+    opened_files_dict: Dict[str, BufferedReader] = {}
     # this is main object, minus inputs/outputs/events/attachments
     acc_parts.append(
         (
@@ -247,7 +249,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
             ),
         )
     if op.attachments:
-        for n, (content_type, valb) in op.attachments.items():
+        for n, (content_type, data_or_path) in op.attachments.items():
             if "." in n:
                 logger.warning(
                     f"Skipping logging of attachment '{n}' "
@@ -257,20 +259,39 @@ def serialized_run_operation_to_multipart_parts_and_context(
                 )
                 continue
 
-            acc_parts.append(
-                (
-                    f"attachment.{op.id}.{n}",
+            if isinstance(data_or_path, bytes):
+                acc_parts.append(
                     (
-                        None,
-                        valb,
-                        content_type,
-                        {"Content-Length": str(len(valb))},
-                    ),
+                        f"attachment.{op.id}.{n}",
+                        (
+                            None,
+                            data_or_path,
+                            content_type,
+                            {"Content-Length": str(len(data_or_path))},
+                        ),
+                    )
                 )
-            )
-    return MultipartPartsAndContext(
-        acc_parts,
-        f"trace={op.trace_id},id={op.id}",
+            else:
+                file_size = os.path.getsize(data_or_path)
+                file = open(data_or_path, "rb")
+                opened_files_dict[str(data_or_path) + str(uuid.uuid4())] = file
+                acc_parts.append(
+                    (
+                        f"attachment.{op.id}.{n}",
+                        (
+                            None,
+                            file,  # type: ignore[arg-type]
+                            f"{content_type}; length={file_size}",
+                            {},
+                        ),
+                    )
+                )
+    return (
+        MultipartPartsAndContext(
+            acc_parts,
+            f"trace={op.trace_id},id={op.id}",
+        ),
+        opened_files_dict,
     )
 
 

diff --git a/python/langsmith/_internal/_serde.py b/python/langsmith/_internal/_serde.py
@@ -79,6 +79,11 @@ def _simple_default(obj):
 ]
 
 
+# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization,
+#            in order to handle serializing these tricky Python types *from Rust*.
+#            Do not cause this function to become inaccessible (e.g. by deleting
+#            or renaming it) without also fixing the corresponding Rust code found in:
+#               rust/crates/langsmith-pyo3/src/serialization/mod.rs
 def _serialize_json(obj: Any) -> Any:
     try:
         if isinstance(obj, (set, tuple)):