Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add path as an option for uploading attachments #1331

Merged
merged 35 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e8189ae
remove attachment prefix
isahers1 Dec 11, 2024
7b2e73d
fmt
isahers1 Dec 11, 2024
347198e
wip
isahers1 Dec 11, 2024
9fbc0c8
x
isahers1 Dec 11, 2024
6275c1c
fmt
isahers1 Dec 11, 2024
52ff721
add icon
isahers1 Dec 11, 2024
cfc2f3e
x
isahers1 Dec 11, 2024
1976dcc
separate PRs
isahers1 Dec 11, 2024
9e01bfd
fixing runs
isahers1 Dec 12, 2024
34c0dd8
Merge branch 'main' into isaac/attachmentspath
isahers1 Dec 12, 2024
e802e31
fix test
isahers1 Dec 17, 2024
86e2a0f
typing
isahers1 Dec 17, 2024
1207bb0
fmt
isahers1 Dec 17, 2024
7d09a00
fmt
isahers1 Dec 17, 2024
78c93e8
add flag for examples
isahers1 Dec 18, 2024
5d0a956
flag
isahers1 Dec 18, 2024
ee32603
tests
isahers1 Dec 20, 2024
ff7f9e3
test
isahers1 Dec 20, 2024
aa255ff
ankush comments
isahers1 Dec 24, 2024
eb4a1a1
log statement
isahers1 Jan 6, 2025
4197688
Merge branch 'main' into isaac/attachmentspath
isahers1 Jan 6, 2025
14fd6cd
fmt
isahers1 Jan 6, 2025
e00e837
tests
isahers1 Jan 7, 2025
51ec5b6
add tests for same file in different attachments
isahers1 Jan 9, 2025
556e260
fmt
isahers1 Jan 9, 2025
b8c1b0b
comments
isahers1 Jan 11, 2025
25e17d9
Merge branch 'main' into isaac/attachmentspath
isahers1 Jan 11, 2025
4864506
surface mime type when passing attachments to user (#1409)
isahers1 Jan 13, 2025
89733fb
Serialize pydantic and other tricky objects correctly from Rust. (#1392)
obi1kenobi Jan 14, 2025
f4598a7
fmt
isahers1 Jan 14, 2025
fa0a2e7
Merge branch 'main' into isaac/attachmentspath
isahers1 Jan 14, 2025
ffa562b
fmt
isahers1 Jan 14, 2025
bd9ac02
fix_runs
isahers1 Jan 14, 2025
e2dffa9
Update python/langsmith/run_helpers.py
isahers1 Jan 16, 2025
1a493b3
fmt
isahers1 Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions js/src/client.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import * as uuid from "uuid";

Check notice on line 1 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

........... create_5_000_run_trees: Mean +- std dev: 642 ms +- 40 ms ........... create_10_000_run_trees: Mean +- std dev: 1.35 sec +- 0.07 sec ........... create_20_000_run_trees: Mean +- std dev: 1.37 sec +- 0.08 sec ........... dumps_class_nested_py_branch_and_leaf_200x400: Mean +- std dev: 718 us +- 15 us ........... dumps_class_nested_py_leaf_50x100: Mean +- std dev: 25.2 ms +- 0.3 ms ........... dumps_class_nested_py_leaf_100x200: Mean +- std dev: 105 ms +- 2 ms ........... dumps_dataclass_nested_50x100: Mean +- std dev: 25.6 ms +- 0.3 ms ........... WARNING: the benchmark result may be unstable * the standard deviation (17.1 ms) is 24% of the mean (72.4 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. dumps_pydantic_nested_50x100: Mean +- std dev: 72.4 ms +- 17.1 ms ........... dumps_pydanticv1_nested_50x100: Mean +- std dev: 198 ms +- 3 ms

Check notice on line 1 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------------+----------+------------------------+ | Benchmark | main | changes | +===============================================+==========+========================+ | dumps_pydanticv1_nested_50x100 | 215 ms | 198 ms: 1.09x faster | +-----------------------------------------------+----------+------------------------+ | create_5_000_run_trees | 653 ms | 642 ms: 1.02x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_50x100 | 25.1 ms | 25.2 ms: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_branch_and_leaf_200x400 | 711 us | 718 us: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_dataclass_nested_50x100 | 25.3 ms | 25.6 ms: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_100x200 | 104 ms | 105 ms: 1.01x slower | +-----------------------------------------------+----------+------------------------+ | create_10_000_run_trees | 1.32 sec | 1.35 sec: 1.03x slower | +-----------------------------------------------+----------+------------------------+ | create_20_000_run_trees | 1.30 sec | 1.37 sec: 1.06x slower | +-----------------------------------------------+----------+------------------------+ | dumps_pydantic_nested_50x100 | 66.8 ms | 72.4 ms: 1.08x slower | +-----------------------------------------------+----------+------------------------+ | Geometric mean | (ref) | 1.01x slower | +-----------------------------------------------+----------+------------------------+

import { AsyncCaller, AsyncCallerParams } from "./utils/async_caller.js";
import {
Expand Down Expand Up @@ -429,7 +429,7 @@
// If there is an item on the queue we were unable to pop,
// just return it as a single batch.
if (popped.length === 0 && this.items.length > 0) {
const item = this.items.shift()!;

Check warning on line 432 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Forbidden non-null assertion
popped.push(item);
poppedSizeBytes += item.size;
this.sizeBytes -= item.size;
Expand Down Expand Up @@ -862,7 +862,7 @@
if (this._serverInfo === undefined) {
try {
this._serverInfo = await this._getServerInfo();
} catch (e) {

Check warning on line 865 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'e' is defined but never used. Allowed unused args must match /^_/u
console.warn(
`[WARNING]: LangSmith failed to fetch info on supported operations. Falling back to batch operations and default limits.`
);
Expand Down Expand Up @@ -1597,7 +1597,7 @@
treeFilter?: string;
isRoot?: boolean;
dataSourceType?: string;
}): Promise<any> {

Check warning on line 1600 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
let projectIds_ = projectIds || [];
if (projectNames) {
projectIds_ = [
Expand Down Expand Up @@ -1885,7 +1885,7 @@
`Failed to list shared examples: ${response.status} ${response.statusText}`
);
}
return result.map((example: any) => ({

Check warning on line 1888 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
...example,
_hostUrl: this.getHostUrl(),
}));
Expand Down Expand Up @@ -2022,7 +2022,7 @@
}
// projectId querying
return true;
} catch (e) {

Check warning on line 2025 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

'e' is defined but never used. Allowed unused args must match /^_/u
return false;
}
}
Expand Down Expand Up @@ -2770,6 +2770,7 @@
(acc, [key, value]) => {
acc[key.slice("attachment.".length)] = {
presigned_url: value.presigned_url,
mime_type: value.mime_type,
};
return acc;
},
Expand Down Expand Up @@ -2867,6 +2868,7 @@
(acc, [key, value]) => {
acc[key.slice("attachment.".length)] = {
presigned_url: value.presigned_url,
mime_type: value.mime_type || undefined,
};
return acc;
},
Expand Down Expand Up @@ -3397,7 +3399,7 @@
async _logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3402 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<[results: EvaluationResult[], feedbacks: Feedback[]]> {
const evalResults: Array<EvaluationResult> =
this._selectEvalResults(evaluatorResponse);
Expand Down Expand Up @@ -3436,7 +3438,7 @@
public async logEvaluationFeedback(
evaluatorResponse: EvaluationResult | EvaluationResults,
run?: Run,
sourceInfo?: { [key: string]: any }

Check warning on line 3441 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
): Promise<EvaluationResult[]> {
const [results] = await this._logEvaluationFeedback(
evaluatorResponse,
Expand Down Expand Up @@ -3932,7 +3934,7 @@

public async createCommit(
promptIdentifier: string,
object: any,

Check warning on line 3937 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
options?: {
parentCommitHash?: string;
}
Expand Down Expand Up @@ -4164,7 +4166,7 @@
isPublic?: boolean;
isArchived?: boolean;
}
): Promise<Record<string, any>> {

Check warning on line 4169 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type
if (!(await this.promptExists(promptIdentifier))) {
throw new Error("Prompt does not exist, you must create it first.");
}
Expand All @@ -4175,7 +4177,7 @@
throw await this._ownerConflictError("update a prompt", owner);
}

const payload: Record<string, any> = {};

Check warning on line 4180 in js/src/client.ts

View workflow job for this annotation

GitHub Actions / Check linting

Unexpected any. Specify a different type

if (options?.description !== undefined)
payload.description = options.description;
Expand Down
2 changes: 2 additions & 0 deletions js/src/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export interface BaseExample {

export interface AttachmentInfo {
presigned_url: string;
mime_type?: string;
}

export type AttachmentData = Uint8Array | ArrayBuffer;
Expand Down Expand Up @@ -300,6 +301,7 @@ export interface Example extends BaseExample {
interface RawAttachmentInfo {
presigned_url: string;
s3_url: string;
mime_type?: string;
}
export interface RawExample extends BaseExample {
id: string;
Expand Down
56 changes: 32 additions & 24 deletions js/src/tests/evaluate_attachments.int.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,22 @@ test("evaluate can handle examples with attachments", async () => {
config?: TargetConfigT
) => {
// Verify we receive the attachment data
if (!config?.attachments?.["image"]) {
if (!config?.attachments?.image) {
throw new Error("Image attachment not found");
}
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentMimeType = config?.attachments?.image.mime_type;
if (attachmentMimeType !== "image/png") {
throw new Error("Image attachment has incorrect mime type");
}
const attachmentData: Uint8Array | undefined = config?.attachments?.[
"image"
].presigned_url
? new Uint8Array(
(await fetch(config?.attachments?.["image"].presigned_url).then(
(res) => res.arrayBuffer()
(await fetch(config?.attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
: undefined;
Expand All @@ -57,14 +61,18 @@ test("evaluate can handle examples with attachments", async () => {

const customEvaluator = async ({ attachments }: { attachments?: any }) => {
expect(attachments).toBeDefined();
expect(attachments?.["image"]).toBeDefined();
expect(attachments?.image).toBeDefined();
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentData: Uint8Array | undefined = attachments?.["image"]
const attachmentMimeType = attachments?.image.mime_type;
if (attachmentMimeType !== "image/png") {
throw new Error("Image attachment has incorrect mime type");
}
const attachmentData: Uint8Array | undefined = attachments?.image
.presigned_url
? new Uint8Array(
(await fetch(attachments?.["image"].presigned_url).then((res) =>
(await fetch(attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
Expand Down Expand Up @@ -134,14 +142,14 @@ test("evaluate with attachments not in target function", async () => {

const customEvaluator = async ({ attachments }: { attachments?: any }) => {
expect(attachments).toBeDefined();
expect(attachments?.["image"]).toBeDefined();
expect(attachments?.image).toBeDefined();
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentData: Uint8Array | undefined = attachments?.["image"]
const attachmentData: Uint8Array | undefined = attachments?.image
.presigned_url
? new Uint8Array(
(await fetch(attachments?.["image"].presigned_url).then((res) =>
(await fetch(attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
Expand Down Expand Up @@ -210,7 +218,7 @@ test("multiple evaluators with attachments", async () => {
config?: TargetConfigT
) => {
// Verify we receive the attachment data
if (!config?.attachments?.["image"]) {
if (!config?.attachments?.image) {
throw new Error("Image attachment not found");
}
const expectedData = new Uint8Array(
Expand All @@ -220,8 +228,8 @@ test("multiple evaluators with attachments", async () => {
"image"
].presigned_url
? new Uint8Array(
(await fetch(config?.attachments?.["image"].presigned_url).then(
(res) => res.arrayBuffer()
(await fetch(config?.attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
: undefined;
Expand All @@ -233,14 +241,14 @@ test("multiple evaluators with attachments", async () => {

const customEvaluatorOne = async ({ attachments }: { attachments?: any }) => {
expect(attachments).toBeDefined();
expect(attachments?.["image"]).toBeDefined();
expect(attachments?.image).toBeDefined();
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentData: Uint8Array | undefined = attachments?.["image"]
const attachmentData: Uint8Array | undefined = attachments?.image
.presigned_url
? new Uint8Array(
(await fetch(attachments?.["image"].presigned_url).then((res) =>
(await fetch(attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
Expand All @@ -256,14 +264,14 @@ test("multiple evaluators with attachments", async () => {

const customEvaluatorTwo = async ({ attachments }: { attachments?: any }) => {
expect(attachments).toBeDefined();
expect(attachments?.["image"]).toBeDefined();
expect(attachments?.image).toBeDefined();
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentData: Uint8Array | undefined = attachments?.["image"]
const attachmentData: Uint8Array | undefined = attachments?.image
.presigned_url
? new Uint8Array(
(await fetch(attachments?.["image"].presigned_url).then((res) =>
(await fetch(attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
Expand Down Expand Up @@ -329,7 +337,7 @@ test("evaluate with attachments runnable target function", async () => {
await client.uploadExamplesMultipart(dataset.id, [example]);

const myFunction = async (_input: any, config?: any) => {
if (!config?.attachments?.["image"]) {
if (!config?.attachments?.image) {
throw new Error("Image attachment not found");
}
const expectedData = new Uint8Array(
Expand All @@ -339,8 +347,8 @@ test("evaluate with attachments runnable target function", async () => {
"image"
].presigned_url
? new Uint8Array(
(await fetch(config?.attachments?.["image"].presigned_url).then(
(res) => res.arrayBuffer()
(await fetch(config?.attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
: undefined;
Expand All @@ -355,14 +363,14 @@ test("evaluate with attachments runnable target function", async () => {

const customEvaluator = async ({ attachments }: { attachments?: any }) => {
expect(attachments).toBeDefined();
expect(attachments?.["image"]).toBeDefined();
expect(attachments?.image).toBeDefined();
const expectedData = new Uint8Array(
Buffer.from("fake image data for testing")
);
const attachmentData: Uint8Array | undefined = attachments?.["image"]
const attachmentData: Uint8Array | undefined = attachments?.image
.presigned_url
? new Uint8Array(
(await fetch(attachments?.["image"].presigned_url).then((res) =>
(await fetch(attachments?.image.presigned_url).then((res) =>
res.arrayBuffer()
)) as ArrayBuffer
)
Expand Down
53 changes: 37 additions & 16 deletions python/langsmith/_internal/_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import itertools
import logging
import os
import uuid
from typing import Literal, Optional, Union, cast
from io import BufferedReader
from typing import Dict, Literal, Optional, Union, cast

from langsmith import schemas as ls_schemas
from langsmith._internal import _orjson
Expand Down Expand Up @@ -212,9 +214,9 @@ def serialized_feedback_operation_to_multipart_parts_and_context(

def serialized_run_operation_to_multipart_parts_and_context(
op: SerializedRunOperation,
) -> MultipartPartsAndContext:
) -> tuple[MultipartPartsAndContext, Dict[str, BufferedReader]]:
acc_parts: list[MultipartPart] = []

opened_files_dict: Dict[str, BufferedReader] = {}
# this is main object, minus inputs/outputs/events/attachments
acc_parts.append(
(
Expand Down Expand Up @@ -247,7 +249,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
),
)
if op.attachments:
for n, (content_type, valb) in op.attachments.items():
for n, (content_type, data_or_path) in op.attachments.items():
if "." in n:
logger.warning(
f"Skipping logging of attachment '{n}' "
Expand All @@ -257,20 +259,39 @@ def serialized_run_operation_to_multipart_parts_and_context(
)
continue

acc_parts.append(
(
f"attachment.{op.id}.{n}",
if isinstance(data_or_path, bytes):
acc_parts.append(
(
None,
valb,
content_type,
{"Content-Length": str(len(valb))},
),
f"attachment.{op.id}.{n}",
(
None,
data_or_path,
content_type,
{"Content-Length": str(len(data_or_path))},
),
)
)
)
return MultipartPartsAndContext(
acc_parts,
f"trace={op.trace_id},id={op.id}",
else:
file_size = os.path.getsize(data_or_path)
file = open(data_or_path, "rb")
opened_files_dict[str(data_or_path) + str(uuid.uuid4())] = file
acc_parts.append(
(
f"attachment.{op.id}.{n}",
(
None,
file, # type: ignore[arg-type]
f"{content_type}; length={file_size}",
{},
),
)
)
return (
MultipartPartsAndContext(
acc_parts,
f"trace={op.trace_id},id={op.id}",
),
opened_files_dict,
)


Expand Down
5 changes: 5 additions & 0 deletions python/langsmith/_internal/_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ def _simple_default(obj):
]


# IMPORTANT: This function is used from Rust code in `langsmith-pyo3` serialization,
# in order to handle serializing these tricky Python types *from Rust*.
# Do not cause this function to become inaccessible (e.g. by deleting
# or renaming it) without also fixing the corresponding Rust code found in:
# rust/crates/langsmith-pyo3/src/serialization/mod.rs
def _serialize_json(obj: Any) -> Any:
try:
if isinstance(obj, (set, tuple)):
Expand Down
Loading
Loading