Skip to content

Commit

Permalink
split PQVectors into Mutable and Immutable implementations
Browse files Browse the repository at this point in the history
  • Loading branch information
jbellis committed Dec 19, 2024
1 parent 86f58f3 commit 549351e
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 162 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;

import io.github.jbellis.jvector.vector.types.ByteSequence;

public class ImmutablePQVectors extends PQVectors {
/**
* Construct an immutable PQVectors instance with the given ProductQuantization and compressed data chunks.
* @param pq the ProductQuantization to use
* @param compressedDataChunks the compressed data chunks
* @param vectorCount the number of vectors
* @param vectorsPerChunk the number of vectors per chunk
*/
public ImmutablePQVectors(ProductQuantization pq, ByteSequence<?>[] compressedDataChunks, int vectorCount, int vectorsPerChunk) {
super(pq);
this.compressedDataChunks = compressedDataChunks;
this.vectorCount = vectorCount;
this.vectorsPerChunk = vectorsPerChunk;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Copyright DataStax, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.github.jbellis.jvector.pq;

import io.github.jbellis.jvector.vector.VectorizationProvider;
import io.github.jbellis.jvector.vector.types.ByteSequence;
import io.github.jbellis.jvector.vector.types.VectorFloat;
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;

import static java.lang.Math.max;

public class MutablePQVectors extends PQVectors {
private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();

/**
* Construct a mutable PQVectors instance with the given ProductQuantization and maximum number of vectors that will be
* stored in this instance. The vectors are split into chunks to avoid exceeding the maximum array size.
* @param pq the ProductQuantization to use
* @param maximumVectorCount the maximum number of vectors that will be stored in this instance
*/
public MutablePQVectors(ProductQuantization pq, int maximumVectorCount) {
super(pq);
this.vectorCount = 0;

// Calculate if we need to split into multiple chunks
int compressedDimension = pq.compressedVectorSize();
long totalSize = (long) maximumVectorCount * compressedDimension;
this.vectorsPerChunk = totalSize <= MAX_CHUNK_SIZE ? maximumVectorCount : MAX_CHUNK_SIZE / compressedDimension;

int numChunks = maximumVectorCount / vectorsPerChunk;
ByteSequence<?>[] chunks = new ByteSequence<?>[numChunks];
int chunkSize = vectorsPerChunk * compressedDimension;
for (int i = 0; i < numChunks - 1; i++)
chunks[i] = vectorTypeSupport.createByteSequence(chunkSize);

// Last chunk might be smaller
int remainingVectors = maximumVectorCount - (vectorsPerChunk * (numChunks - 1));
chunks[numChunks - 1] = vectorTypeSupport.createByteSequence(remainingVectors * compressedDimension);

this.compressedDataChunks = chunks;
}

/**
* Encode the given vector and set it at the given ordinal. Done without unnecessary copying.
*
* It's the caller's responsibility to ensure there are no "holes" in the ordinals that are
* neither encoded nor set to zero.
*
* @param ordinal the ordinal to set
* @param vector the vector to encode and set
*/
public void encodeAndSet(int ordinal, VectorFloat<?> vector) {
vectorCount = max(vectorCount, ordinal + 1);
pq.encodeTo(vector, get(ordinal));
}

/**
* Set the vector at the given ordinal to zero.
*
* It's the caller's responsibility to ensure there are no "holes" in the ordinals that are
* neither encoded nor set to zero.
*
* @param ordinal the ordinal to set
*/
public void setZero(int ordinal) {
vectorCount = max(vectorCount, ordinal + 1);
get(ordinal).zero();
}
}
Loading

0 comments on commit 549351e

Please sign in to comment.