Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[draft] Add Diskann index for Azure CosmosDB #7150

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 51 additions & 17 deletions libs/langchain-azure-cosmosdb/src/azure_cosmosdb_mongodb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export type AzureCosmosDBMongoDBIndexOptions = {
/** Skips automatic index creation. */
readonly skipCreate?: boolean;

readonly indexType?: "ivf" | "hnsw";
readonly indexType?: 'ivf' | 'hnsw' | 'diskann';
/** Number of clusters that the inverted file (IVF) index uses to group the vector data. */
readonly numLists?: number;
/** Number of dimensions for vector similarity. */
Expand All @@ -45,6 +45,13 @@ export type AzureCosmosDBMongoDBIndexOptions = {
readonly m?: number;
/** The size of the dynamic candidate list for constructing the graph with the HNSW index. */
readonly efConstruction?: number;
/** Max number of neighbors withe the Diskann idnex */
readonly maxDegree?: number;
/** L value for index building withe the Diskann idnex */
readonly lBuild?: number;
/** L value for index searching withe the Diskann idnex */
readonly lSearch?: number;

};

/** Azure Cosmos DB for MongoDB vCore delete Parameters. */
Expand Down Expand Up @@ -234,7 +241,7 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
*/
async createIndex(
dimensions: number | undefined = undefined,
indexType: "ivf" | "hnsw" = "ivf",
indexType:'ivf' | 'hnsw' | 'diskann' = 'ivf',
similarity: AzureCosmosDBMongoDBSimilarityType = AzureCosmosDBMongoDBSimilarityType.COS
): Promise<void> {
await this.connectPromise;
Expand All @@ -246,23 +253,35 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
vectorLength = queryEmbedding.length;
}

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const cosmosSearchOptions: any = {
kind: '',
similarity,
dimensions: vectorLength,
}

if (indexType === 'hnsw') {
cosmosSearchOptions.kind = 'vector-hnsw';
cosmosSearchOptions.m = this.indexOptions.m ?? 16;
cosmosSearchOptions.efConstruction = this.indexOptions.efConstruction ?? 200;
} else if (indexType === 'diskann') {
cosmosSearchOptions.kind = 'vector-diskann';
cosmosSearchOptions.maxDegree = this.indexOptions.maxDegree ?? 40;
cosmosSearchOptions.lBuild = this.indexOptions.lBuild ?? 50;
cosmosSearchOptions.lSearch = this.indexOptions.lSearch ?? 40;
/** Default to IVF index */
} else {
cosmosSearchOptions.kind = 'vector-ivf';
cosmosSearchOptions.numLists = this.indexOptions.numLists ?? 100;
}

const createIndexCommands = {
createIndexes: this.collection.collectionName,
indexes: [
{
name: this.indexName,
key: { [this.embeddingKey]: "cosmosSearch" },
cosmosSearchOptions: {
kind: indexType === "hnsw" ? "vector-hnsw" : "vector-ivf",
...(indexType === "hnsw"
? {
m: this.indexOptions.m ?? 16,
efConstruction: this.indexOptions.efConstruction ?? 200,
}
: { numLists: this.indexOptions.numLists ?? 100 }),
similarity,
dimensions: vectorLength,
},
cosmosSearchOptions,
},
],
};
Expand Down Expand Up @@ -357,7 +376,8 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
*/
async similaritySearchVectorWithScore(
queryVector: number[],
k = 4
k: number,
indexType?: 'ivf' | 'hnsw' | 'diskann'
): Promise<[Document, number][]> {
await this.initialize();

Expand All @@ -367,7 +387,8 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
cosmosSearch: {
vector: queryVector,
path: this.embeddingKey,
k,
k: k ?? 4,
...(indexType === 'diskann' ? { lSearch: this.indexOptions.lSearch ?? 40 } : {}),
},
returnStoredSource: true,
},
Expand Down Expand Up @@ -406,13 +427,26 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>
): Promise<Document[]>;

async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>,
indexType: 'ivf' | 'hnsw' | 'diskann'
): Promise<Document[]>;

async maxMarginalRelevanceSearch(
query: string,
options: MaxMarginalRelevanceSearchOptions<this["FilterType"]>,
indexType?: 'ivf' | 'hnsw' | 'diskann'
): Promise<Document[]> {
const { k, fetchK = 20, lambda = 0.5 } = options;

const queryEmbedding = await this.embeddings.embedQuery(query);
const docs = await this.similaritySearchVectorWithScore(
queryEmbedding,
fetchK
fetchK,
indexType
);
const embeddingList = docs.map((doc) => doc[0].metadata[this.embeddingKey]);

Expand Down Expand Up @@ -449,7 +483,7 @@ export class AzureCosmosDBMongoDBVectorStore extends VectorStore {
// Unless skipCreate is set, create the index
// This operation is no-op if the index already exists
if (!this.indexOptions.skipCreate) {
const indexType = this.indexOptions.indexType === "hnsw" ? "hnsw" : "ivf";
const indexType = this.indexOptions.indexType || 'ivf';
await this.createIndex(
this.indexOptions.dimensions,
indexType,
Expand Down