Skip to content

Commit

Permalink
Bug fix: passing document id to chunks (#1815)
Browse files Browse the repository at this point in the history
* Bug fix: passing document id to chunks

* Move to joining chunks
  • Loading branch information
NolanTrem authored Jan 14, 2025
1 parent 07f3967 commit d4aa834
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
22 changes: 22 additions & 0 deletions js/sdk/__tests__/ChunksIntegrationSuperUser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,21 @@ describe("r2rClient V3 Collections Integration Tests", () => {
});
});

test("Create a document from chunks with an id", async () => {
const response = await client.documents.create({
id: "1fb70f3b-37eb-4325-8c83-694a03144a67",
chunks: ["Hallo, Welt!"],
});

expect(response.results.documentId).toBe(
"1fb70f3b-37eb-4325-8c83-694a03144a67",
);
expect(response.results.message).toBe(
"Document created and ingested successfully.",
);
expect(response.results.taskId).toBeNull();
});

test("Retrieve document's chunks", async () => {
const response = await client.documents.listChunks({
id: documentId,
Expand Down Expand Up @@ -97,6 +112,13 @@ describe("r2rClient V3 Collections Integration Tests", () => {
expect(response.results.success).toBe(true);
});

test("Delete a document", async () => {
const response = await client.documents.delete({
id: "1fb70f3b-37eb-4325-8c83-694a03144a67",
});
expect(response.results.success).toBe(true);
});

// test("Delete a chunk that does not exist", async () => {
// await expect(client.chunks.delete({ id: chunkId })).rejects.toThrow(
// /Status 404/,
Expand Down
10 changes: 6 additions & 4 deletions js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { r2rClient } from "../src/index";
import { describe, test, beforeAll, expect, afterAll } from "@jest/globals";
import { assert } from "console";
import fs from "fs";
import path from "path";

const baseUrl = "http://localhost:7272";
const TEST_OUTPUT_DIR = path.join(__dirname, "test-output");

/**
* marmeladov.txt will have an id of 83ef5342-4275-5b75-92d6-692fa32f8523
* marmeladov.txt will have an id of 649d1072-7054-4e17-bd51-1af5f467d617
* The untitled document will have an id of 5556836e-a51c-57c7-916a-de76c79df2b6
* The default collection id is 122fdf6a-e116-546b-a8f6-e4cb2e2c0a09
*/
Expand Down Expand Up @@ -38,9 +37,12 @@ describe("r2rClient V3 Documents Integration Tests", () => {
const response = await client.documents.create({
file: { path: "examples/data/marmeladov.txt", name: "marmeladov.txt" },
metadata: { title: "marmeladov.txt", numericId: 123 },
id: "649d1072-7054-4e17-bd51-1af5f467d617",
});

expect(response.results.documentId).toBeDefined();
expect(response.results.documentId).toBe(
"649d1072-7054-4e17-bd51-1af5f467d617",
);
documentId = response.results.documentId;
}, 10000);

Expand Down Expand Up @@ -303,7 +305,7 @@ describe("r2rClient V3 Documents Integration Tests", () => {

test("Delete marmeladov.txt", async () => {
const response = await client.documents.delete({
id: "83ef5342-4275-5b75-92d6-692fa32f8523",
id: "649d1072-7054-4e17-bd51-1af5f467d617",
});

expect(response.results).toBeDefined();
Expand Down
6 changes: 3 additions & 3 deletions py/core/main/api/v3/documents_router.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import base64
import json
import logging
import mimetypes
import textwrap
Expand Down Expand Up @@ -403,8 +402,9 @@ async def create_document(
f"Maximum of {MAX_CHUNKS_PER_REQUEST} chunks per request",
400,
)
document_id = generate_document_id(
json.dumps(chunks), auth_user.id

document_id = id or generate_document_id(
"".join(chunks), auth_user.id
)

# FIXME: Metadata doesn't seem to be getting passed through
Expand Down

0 comments on commit d4aa834

Please sign in to comment.