diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml deleted file mode 100644 index 0bcba5de..00000000 --- a/.github/workflows/docker.yaml +++ /dev/null @@ -1,109 +0,0 @@ -name: docker -on: - pull_request: - push: - branches: - - main - tags: - - "*" - -jobs: - docker: - runs-on: ubuntu-latest - outputs: - branch_name: ${{steps.metadata.outputs.branch_name}} - image_branch_name: ${{steps.metadata.outputs.image_branch_name}} - strategy: - fail-fast: false - matrix: - cloud_image_tag_prefix: - - pg16 - pgversion: - - 16 - tsversion: - - 2.13 - steps: - - name: Checkout Timescale Vector - uses: actions/checkout@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1-node16 - with: - aws-access-key-id: ${{ secrets.ORG_AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.ORG_AWS_SECRET_ACCESS_KEY }} - aws-region: us-east-1 - - - name: Login to Amazon ECR - uses: aws-actions/amazon-ecr-login@v1 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - - name: Gather metadata - id: metadata - run: | - tsmajor=$(echo ${{ matrix.tsversion }} | cut -d. -f1) - tsmajmin=$(echo ${{ matrix.tsversion }} | cut -d. -f1,2) - commit_sha=$(git rev-parse --short "${{ github.event.pull_request.head.sha || github.sha }}") - branch_name=$(echo ${{github.head_ref || github.ref_name}}) - image_branch_name=$(echo ${branch_name} | sed 's#/#-#') - base_cloud_image_tag=$(aws ecr describe-images --repository-name 'timescaledb-cloud' --region us-east-1 --query 'imageDetails[?imageTags[?starts_with(@,`${{ matrix.cloud_image_tag_prefix }}`) && contains(@, `ts${{ matrix.tsversion }}`) && contains(@, `amd64`)]].imageTags' --output text | sort -V | tail -n1) - echo "tsmajor=${tsmajor}" >> ${GITHUB_OUTPUT} - echo "tsmajmin=${tsmajmin}" >> ${GITHUB_OUTPUT} - echo "branch_name=${branch_name}" >> ${GITHUB_OUTPUT} - echo "image_branch_name=${image_branch_name}" >> ${GITHUB_OUTPUT} - echo "commit_sha=${commit_sha}" >> ${GITHUB_OUTPUT} - echo "base_cloud_image_tag=${base_cloud_image_tag}" >> ${GITHUB_OUTPUT} - - - name: Build and push - uses: docker/build-push-action@v3 - env: - DOCKER_PUSH_REQUIRED: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.owner.login == 'timescale' }} # Don't run docker push when this is a PR from a fork - with: - build-args: | - PG_VERSION=${{ matrix.pgversion }} - TIMESCALEDB_VERSION_MAJMIN=${{ steps.metadata.outputs.tsmajmin }} - BASE_IMAGE=142548018081.dkr.ecr.us-east-1.amazonaws.com/timescaledb-cloud:${{ steps.metadata.outputs.base_cloud_image_tag }} - secrets: | - "GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}" - context: . - push: ${{ env.DOCKER_PUSH_REQUIRED == 'true' }} - load: ${{ env.DOCKER_PUSH_REQUIRED != 'true' }} - tags: | - ghcr.io/timescale/dev_timescale_vector:${{steps.metadata.outputs.image_branch_name}}-ts${{matrix.tsversion}}-pg${{matrix.pgversion}} - labels: | - org.opencontainers.image.source= - org.opencontainers.image.revision= - org.opencontainers.image.created= - cache-from: type=gha,scope=${{matrix.pgversion}}-${{matrix.tsversion}} - cache-to: type=gha,mode=max,scope=${{matrix.pgversion}}-${{matrix.tsversion}} - - - name: Publish images to ECR - uses: akhilerm/tag-push-action@v2.0.0 - with: - src: ghcr.io/timescale/dev_timescale_vector:${{steps.metadata.outputs.image_branch_name}}-ts${{matrix.tsversion}}-pg${{matrix.pgversion}} - dst: 142548018081.dkr.ecr.us-east-1.amazonaws.com/timescale-vector:${{steps.metadata.outputs.image_branch_name}}-${{steps.metadata.outputs.commit_sha}}-ts${{matrix.tsversion}}-pg${{matrix.pgversion}} - - - # This allows us to set a single job which must pass in GitHub's branch protection rules, - # otherwise we have to keep updating them as we add or remove postgres versions etc. - docker-result: - name: docker result - if: always() - needs: - - docker - runs-on: ubuntu-latest - steps: - - name: Mark the job as a success - if: needs.docker.result == 'success' - run: exit 0 - - name: Mark the job as a failure - if: needs.docker.result != 'success' - run: exit 1 diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index fe389128..00000000 --- a/Dockerfile +++ /dev/null @@ -1,105 +0,0 @@ -# syntax=docker/dockerfile:1.3-labs -ARG PG_VERSION=16 -ARG TIMESCALEDB_VERSION_MAJMIN=2.13 -ARG PGRX_VERSION=0.11.1 -ARG BASE_IMAGE=timescale/timescaledb-ha:pg${PG_VERSION}-ts${TIMESCALEDB_VERSION_MAJMIN}-all - -FROM timescale/timescaledb-ha:pg${PG_VERSION}-ts${TIMESCALEDB_VERSION_MAJMIN}-all AS ha-build-tools -ARG PG_VERSION -ARG PGRX_VERSION - -ENV DEBIAN_FRONTEND=noninteractive -USER root - -RUN apt-get update -RUN apt-get install -y \ - clang \ - gcc \ - pkg-config \ - wget \ - lsb-release \ - libssl-dev \ - curl \ - gnupg2 \ - binutils \ - devscripts \ - equivs \ - git \ - libkrb5-dev \ - libopenblas-dev \ - libopenblas-base \ - libperl-dev \ - make \ - cmake - -RUN wget -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - -RUN for t in deb deb-src; do \ - echo "$t [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/postgresql.keyring] http://apt.postgresql.org/pub/repos/apt/ $(lsb_release -s -c)-pgdg main" >> /etc/apt/sources.list.d/pgdg.list; \ - done - -RUN apt-get update && apt-get install -y \ - postgresql-${PG_VERSION} \ - postgresql-server-dev-${PG_VERSION} - -USER postgres -WORKDIR /build - -ENV HOME=/build \ - PATH=/build/.cargo/bin:$PATH \ - CARGO_HOME=/build/.cargo \ - RUSTUP_HOME=/build/.rustup - -RUN chown postgres:postgres /build - -# if you need bleeding edge timescaledb -# RUN cd /build && git clone https://github.com/timescale/timescaledb.git /build/timescaledb \ -# && cd /build/timescaledb && rm -fr build \ -# && git checkout ${TS_VERSION} \ -# && ./bootstrap -DCMAKE_BUILD_TYPE=RelWithDebInfo -DREGRESS_CHECKS=OFF -DTAP_CHECKS=OFF -DGENERATE_DOWNGRADE_SCRIPT=OFF -DWARNINGS_AS_ERRORS=OFF \ -# && cd build && make install \ -# && cd ~ - -RUN curl https://sh.rustup.rs -sSf | bash -s -- -y --profile=minimal -c rustfmt -ENV PATH="${CARGO_HOME}/bin:${PATH}" - -RUN set -ex \ - && mkdir /build/timescale-vector \ - && mkdir /build/timescale-vector/scripts \ - && mkdir /build/timescale-vector/timescale_vector - -## Install pgrx taking into account selected rust toolchain version. -## Making this a separate step to improve layer caching -#COPY --chown=postgres:postgres timescale_vector/rust-toolchain.toml /build/timescale-vector/timescale_vector/rust-toolchain.toml -COPY --chown=postgres:postgres scripts /build/timescale-vector/scripts -USER postgres -WORKDIR /build/timescale-vector/timescale_vector -RUN set -ex \ - && rm -rf "${CARGO_HOME}/registry" "${CARGO_HOME}/git" \ - && chown postgres:postgres -R "${CARGO_HOME}" \ - && cargo install cargo-pgrx --version ${PGRX_VERSION} --config net.git-fetch-with-cli=true - -## Copy and build Vector itself -USER postgres -COPY --chown=postgres:postgres timescale_vector /build/timescale-vector/timescale_vector -COPY --chown=postgres:postgres Makefile /build/timescale-vector/Makefile - -WORKDIR /build/timescale-vector -RUN PG_CONFIG="/usr/lib/postgresql/${PG_VERSION}/bin/pg_config" make package - -## COPY over the new files to the image. Done as a seperate stage so we don't -## ship the build tools. Fixed pg16 image is intentional. The image ships with -## PG 12, 13, 14, 15 and 16 binaries. The PATH environment variable below is used -## to specify the runtime version. -FROM ${BASE_IMAGE} -ARG PG_VERSION - -## Copy old versions and/or bleeding edge timescaledb if any were installed -COPY --from=ha-build-tools --chown=root:postgres /usr/share/postgresql /usr/share/postgresql -COPY --from=ha-build-tools --chown=root:postgres /usr/lib/postgresql /usr/lib/postgresql - -## Copy freshly build current Vector version -COPY --from=ha-build-tools --chown=root:postgres /build/timescale-vector/timescale_vector/target/release/timescale_vector-pg${PG_VERSION}/usr/lib/postgresql /usr/lib/postgresql -COPY --from=ha-build-tools --chown=root:postgres /build/timescale-vector/timescale_vector/target/release/timescale_vector-pg${PG_VERSION}/usr/share/postgresql /usr/share/postgresql -ENV PATH="/usr/lib/postgresql/${PG_VERSION}/bin:${PATH}" - -USER postgres diff --git a/README.md b/README.md index b34ecee9..56995ba1 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,14 @@ Timescale Vector -Say something chat gpt. +A vector index for speeding up ANN search in `pgvector`. 🔧 Tools Setup -Building the extension requires valid rust (we build and test on 1.65), rustfmt, and clang installs, along with the postgres headers for whichever version of postgres you are running, and pgx. We recommend installing rust using the official instructions: + +Building the extension requires valid rust, rustfmt, and clang installs, along with the postgres headers for whichever version of postgres you are running, and pgx. We recommend installing rust using the official instructions: ```shell curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh ``` -and build tools, the postgres headers, in the preferred manner for your system. You may also need to install OpenSSl. For Ubuntu you can follow the postgres install instructions then run +and build tools, the postgres headers, in the preferred manner for your system. You may also need to install OpenSSL. For Ubuntu you can follow the postgres install instructions then run ```shell sudo apt-get install make gcc pkg-config clang postgresql-server-dev-16 libssl-dev @@ -18,7 +19,7 @@ Next you need cargo-pgx, which can be installed with cargo install --locked cargo-pgrx ``` -You must reinstall cargo-pgx whenever you update your Rust compiler, since cargo-pgx needs to be built with the same compiler as Toolkit. +You must reinstall cargo-pgx whenever you update your Rust compiler, since cargo-pgx needs to be built with the same compiler as Timescale Vector. Finally, setup the pgx development environment with ```shell @@ -28,10 +29,11 @@ cargo pgrx init --pg16 pg_config Installing from source is also available on macOS and requires the same set of prerequisites and set up commands listed above. 💾 Building and Installing the extension + Download or clone this repository, and switch to the extension subdirectory, e.g. ```shell git clone https://github.com/timescale/timescale-vector && \ -cd timescale-vector/extension +cd timescale-vector/timescale_vector ``` Then run @@ -41,9 +43,13 @@ cargo pgrx install --release To initialize the extension after installation, enter the following into psql: +```sql CREATE EXTENSION timescale_vector; +``` + ✏️ Get Involved -The Timescale Vecotr project is still in the initial planning stage as we decide our priorities and what to implement first. As such, now is a great time to help shape the project's direction! Have a look at the list of features we're thinking of working on and feel free to comment on the features, expand the list, or hop on the Discussions forum for more in-depth discussions. + +The Timescale Vector project is still in it's early stage as we decide our priorities and what to implement. As such, now is a great time to help shape the project's direction! Have a look at the list of features we're thinking of working on and feel free to comment on the features, expand the list, or hop on the Discussions forum for more in-depth discussions. 🔨 Testing See above for prerequisites and installation instructions. @@ -52,7 +58,14 @@ You can run tests against a postgres version pg16 using ```shell cargo pgrx test ${postgres_version} ``` + +To run all tests run: +```shell +cargo test -- --ignored && cargo pgrx test ${postgres_version} +``` + 🐯 About Timescale + TimescaleDB is a distributed time-series database built on PostgreSQL that scales to over 10 million of metrics per second, supports native compression, handles high cardinality, and offers native time-series capabilities, such as data retention policies, continuous aggregate views, downsampling, data gap-filling and interpolation. TimescaleDB also supports full SQL, a variety of data types (numerics, text, arrays, JSON, booleans), and ACID semantics. Operationally mature capabilities include high availability, streaming backups, upgrades over time, roles and permissions, and security. diff --git a/timescale_vector/Cargo.toml b/timescale_vector/Cargo.toml index c43b4803..b4b202e5 100644 --- a/timescale_vector/Cargo.toml +++ b/timescale_vector/Cargo.toml @@ -1,10 +1,10 @@ [package] name = "timescale_vector" -version = "0.0.2" +version = "0.0.3-dev" edition = "2021" [lib] -crate-type = ["cdylib"] +crate-type = ["cdylib", "rlib"] [features] default = ["pg16"] @@ -14,9 +14,9 @@ pg_test = [] [dependencies] memoffset = "0.9.0" -pgrx = "=0.11.1" +pgrx = "=0.11.4" rkyv = { version="0.7.42", features=["validation"]} -simdeez = {version = "1.0"} +simdeez = {version = "1.0.8"} reductive = { version = "0.9.0"} ndarray = { version = "0.15.0", features = ["blas"] } blas-src = { version = "0.8", features = ["openblas"] } @@ -26,10 +26,14 @@ rand_chacha = "0.3" rand_core = "0.6" rand_xorshift = "0.3" rayon = "1" - +timescale_vector_derive = { path = "timescale_vector_derive" } +semver = "1.0.22" [dev-dependencies] -pgrx-tests = "=0.11.1" +pgrx-tests = "=0.11.4" +pgrx-pg-config = "=0.11.4" +criterion = "0.5.1" +tempfile = "3.3.0" [profile.dev] panic = "unwind" @@ -39,3 +43,12 @@ panic = "unwind" opt-level = 3 lto = "fat" codegen-units = 1 +#debug = true + +[[bench]] +name = "distance" +harness = false + +[[bench]] +name = "lsr" +harness = false diff --git a/timescale_vector/benches/distance.rs b/timescale_vector/benches/distance.rs new file mode 100644 index 00000000..e823df94 --- /dev/null +++ b/timescale_vector/benches/distance.rs @@ -0,0 +1,351 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use timescale_vector::access_method::distance::{ + distance_cosine, distance_l2, distance_l2_optimized_for_few_dimensions, + distance_l2_unoptimized, distance_xor_optimized, +}; + +//copy and use qdrants simd code, purely for benchmarking purposes +//not used in the actual extension +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use std::arch::x86_64::*; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx")] +#[target_feature(enable = "fma")] +unsafe fn hsum256_ps_avx(x: __m256) -> f32 { + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + _mm_cvtss_f32(x32) +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "avx")] +#[target_feature(enable = "fma")] +pub unsafe fn dot_similarity_avx_qdrant(v1: &[f32], v2: &[f32]) -> f32 { + let n = v1.len(); + let m = n - (n % 32); + let mut ptr1: *const f32 = v1.as_ptr(); + let mut ptr2: *const f32 = v2.as_ptr(); + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + sum256_1 = _mm256_fmadd_ps(_mm256_loadu_ps(ptr1), _mm256_loadu_ps(ptr2), sum256_1); + sum256_2 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(8)), + _mm256_loadu_ps(ptr2.add(8)), + sum256_2, + ); + sum256_3 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(16)), + _mm256_loadu_ps(ptr2.add(16)), + sum256_3, + ); + sum256_4 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(24)), + _mm256_loadu_ps(ptr2.add(24)), + sum256_4, + ); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + + for i in 0..n - m { + result += (*ptr1.add(i)) * (*ptr2.add(i)); + } + result +} + +/// Copy of Diskann's distance function. again just for benchmarking +/// not used in the actual extension +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[inline(never)] +pub unsafe fn distance_l2_vector_f32(a: &[f32], b: &[f32]) -> f32 { + let n = a.len(); + + // make sure the addresses are bytes aligned + debug_assert_eq!(a.as_ptr().align_offset(32), 0); + debug_assert_eq!(b.as_ptr().align_offset(32), 0); + + unsafe { + let mut sum = _mm256_setzero_ps(); + + // Iterate over the elements in steps of 8 + for i in (0..n).step_by(8) { + let a_vec = _mm256_load_ps(&a[i]); + let b_vec = _mm256_load_ps(&b[i]); + let diff = _mm256_sub_ps(a_vec, b_vec); + sum = _mm256_fmadd_ps(diff, diff, sum); + } + + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(sum, 1), _mm256_castps256_ps128(sum)); + /* ( -, -, x1+x3+x5+x7, x0+x2+x4+x6 ) */ + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + /* ( -, -, -, x0+x1+x2+x3+x4+x5+x6+x7 ) */ + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + /* Conversion to float is a no-op on x86-64 */ + _mm_cvtss_f32(x32) + } +} + +//only used for alignment +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[repr(C, align(32))] +struct Vector32ByteAligned { + v: [f32; 2000], +} + +//the diskann version requires alignment so run benchmarks with aligned vectors +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn benchmark_distance_x86_aligned_vectors(c: &mut Criterion) { + let a = Box::new(Vector32ByteAligned { + v: [(); 2000].map(|_| 100.1), + }); + + let b = Box::new(Vector32ByteAligned { + v: [(); 2000].map(|_| 22.1), + }); + + let l = a.v; + let r = b.v; + + assert_eq!(r.as_ptr().align_offset(32), 0); + assert_eq!(l.as_ptr().align_offset(32), 0); + + c.bench_function("distance comparison qdrant (aligned)", |b| { + b.iter(|| unsafe { dot_similarity_avx_qdrant(black_box(&r), black_box(&l)) }) + }); + c.bench_function("distance comparison diskann (aligned)", |b| { + b.iter(|| unsafe { distance_l2_vector_f32(black_box(&r), black_box(&l)) }) + }); +} + +//compare qdrant on unaligned vectors (we don't have alignment so this is apples to apples with us) +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn benchmark_distance_x86_unaligned_vectors(c: &mut Criterion) { + let r: Vec = (0..2000).map(|v| v as f32 + 1000.1).collect(); + let l: Vec = (0..2000).map(|v| v as f32 + 2000.2).collect(); + + c.bench_function("distance comparison qdrant (unaligned)", |b| { + b.iter(|| unsafe { dot_similarity_avx_qdrant(black_box(&r), black_box(&l)) }) + }); +} + +fn benchmark_distance(c: &mut Criterion) { + let r: Vec = (0..2000).map(|v| v as f32 + 1000.1).collect(); + let l: Vec = (0..2000).map(|v| v as f32 + 2000.2).collect(); + + let mut group = c.benchmark_group("Distance"); + group.bench_function("distance l2", |b| { + b.iter(|| distance_l2(black_box(&r), black_box(&l))) + }); + group.bench_function("distance cosine", |b| { + b.iter(|| distance_cosine(black_box(&r), black_box(&l))) + }); +} + +#[inline(always)] +pub fn distance_l2_fixed_size_opt(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), 6); + let norm: f32 = a[..6] + .iter() + .zip(b[..6].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(); + assert!(norm >= 0.); + //don't sqrt for performance. These are only used for ordering so sqrt not needed + norm +} + +//PQ uses l2 distance on small vectors (6 dims or so). Benchmark that. +fn benchmark_distance_few_dimensions(c: &mut Criterion) { + let r: Vec = (0..6).map(|v| v as f32 + 1000.1).collect(); + let l: Vec = (0..6).map(|v| v as f32 + 2000.2).collect(); + + let mut group = c.benchmark_group("Distance"); + group.bench_function("pq distance l2 optimized for many dimensions", |b| { + b.iter(|| distance_l2(black_box(&r), black_box(&l))) + }); + group.bench_function("pq distance l2 unoptimized", |b| { + b.iter(|| distance_l2_unoptimized(black_box(&r), black_box(&l))) + }); + group.bench_function( + "pq distance l2 auto-vectorized for 6 dimensionl arrays", + |b| b.iter(|| distance_l2_fixed_size_opt(black_box(&r), black_box(&l))), + ); + group.bench_function( + "pq distance l2 optimized for few dimensions (what's used in the code now)", + |b| b.iter(|| distance_l2_optimized_for_few_dimensions(black_box(&r), black_box(&l))), + ); +} + +fn pack_bools_to_u8(bools: Vec) -> Vec { + let mut bytes = vec![0u8; (bools.len() + 7) / 8]; + + for (i, &b) in bools.iter().enumerate() { + let byte_index = i / 8; + let bit_index = i % 8; + + if b { + bytes[byte_index] |= 1 << bit_index; + } + } + + bytes +} + +fn pack_bools_to_u64(bools: Vec) -> Vec { + let mut u64s = vec![0u64; (bools.len() + 63) / 64]; + + for (i, &b) in bools.iter().enumerate() { + let u64_index = i / 64; + let bit_index = i % 64; + + if b { + u64s[u64_index] |= 1 << bit_index; + } + } + + u64s +} + +fn pack_bools_to_u128(bools: Vec) -> Vec { + let mut u128s = vec![0u128; (bools.len() + 127) / 128]; + + for (i, &b) in bools.iter().enumerate() { + let u128_index = i / 128; + let bit_index = i % 128; + + if b { + u128s[u128_index] |= 1 << bit_index; + } + } + + u128s +} + +fn xor_unoptimized_u8(v1: &[u8], v2: &[u8]) -> usize { + let mut result = 0; + for (b1, b2) in v1.iter().zip(v2.iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn xor_unoptimized_u8_fixed_size(v1: &[u8], v2: &[u8]) -> usize { + let mut result = 0; + for (b1, b2) in v1[..192].iter().zip(v2[..192].iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn xor_unoptimized_u64(v1: &[u64], v2: &[u64]) -> usize { + let mut result = 0; + for (b1, b2) in v1.iter().zip(v2.iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn xor_unoptimized_u64_fixed_size(v1: &[u64], v2: &[u64]) -> usize { + let mut result = 0; + for (b1, b2) in v1[..24].iter().zip(v2[..24].iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn xor_unoptimized_u64_fixed_size_map(v1: &[u64], v2: &[u64]) -> usize { + v1[..24] + .iter() + .zip(v2[..24].iter()) + .map(|(&l, &r)| (l ^ r).count_ones() as usize) + .sum() +} + +fn xor_unoptimized_u128(v1: &[u128], v2: &[u128]) -> usize { + let mut result = 0; + for (b1, b2) in v1.iter().zip(v2.iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn xor_unoptimized_u128_fixed_size(v1: &[u128], v2: &[u128]) -> usize { + let mut result = 0; + for (b1, b2) in v1[..12].iter().zip(v2[..12].iter()) { + result += (b1 ^ b2).count_ones() as usize; + } + result +} + +fn benchmark_distance_xor(c: &mut Criterion) { + let r: Vec = (0..1536).map(|v| v as u64 % 2 == 0).collect(); + let l: Vec = (0..1536).map(|v| v as u64 % 3 == 0).collect(); + let r_u8 = pack_bools_to_u8(r.clone()); + let l_u8 = pack_bools_to_u8(l.clone()); + let r_u64 = pack_bools_to_u64(r.clone()); + let l_u64 = pack_bools_to_u64(l.clone()); + let r_u128 = pack_bools_to_u128(r.clone()); + let l_u128 = pack_bools_to_u128(l.clone()); + + let mut group = c.benchmark_group("Distance xor"); + group.bench_function("xor unoptimized u8", |b| { + b.iter(|| xor_unoptimized_u8(black_box(&r_u8), black_box(&l_u8))) + }); + group.bench_function("xor unoptimized u64", |b| { + b.iter(|| xor_unoptimized_u64(black_box(&r_u64), black_box(&l_u64))) + }); + group.bench_function("xor unoptimized u128", |b| { + b.iter(|| xor_unoptimized_u128(black_box(&r_u128), black_box(&l_u128))) + }); + + assert!(r_u8.len() == 192); + group.bench_function("xor unoptimized u8 fixed size", |b| { + b.iter(|| xor_unoptimized_u8_fixed_size(black_box(&r_u8), black_box(&l_u8))) + }); + assert!(r_u64.len() == 24); + group.bench_function("xor unoptimized u64 fixed size", |b| { + b.iter(|| xor_unoptimized_u64_fixed_size(black_box(&r_u64), black_box(&l_u64))) + }); + group.bench_function("xor unoptimized u64 fixed size_map", |b| { + b.iter(|| xor_unoptimized_u64_fixed_size_map(black_box(&r_u64), black_box(&l_u64))) + }); + group.bench_function("xor optimized version we use in code", |b| { + b.iter(|| distance_xor_optimized(black_box(&r_u64), black_box(&l_u64))) + }); + assert!(r_u128.len() == 12); + group.bench_function("xor unoptimized u128 fixed size", |b| { + b.iter(|| xor_unoptimized_u128_fixed_size(black_box(&r_u128), black_box(&l_u128))) + }); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +criterion_group!( + benches, + benchmark_distance, + benchmark_distance_few_dimensions, + benchmark_distance_x86_unaligned_vectors, + benchmark_distance_x86_aligned_vectors, + benchmark_distance_xor, +); +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +criterion_group!( + benches, + benchmark_distance, + benchmark_distance_few_dimensions, + benchmark_distance_xor, +); + +criterion_main!(benches); diff --git a/timescale_vector/benches/lsr.rs b/timescale_vector/benches/lsr.rs new file mode 100644 index 00000000..53e31c9f --- /dev/null +++ b/timescale_vector/benches/lsr.rs @@ -0,0 +1,206 @@ +use std::{ + cmp::{Ordering, Reverse}, + collections::BinaryHeap, +}; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use rand::Rng; + +pub struct ListSearchNeighbor { + pub index_pointer: u64, + distance: f32, + visited: bool, + _private_data: u64, +} + +impl PartialOrd for ListSearchNeighbor { + fn partial_cmp(&self, other: &Self) -> Option { + self.distance.partial_cmp(&other.distance) + } +} + +impl PartialEq for ListSearchNeighbor { + fn eq(&self, other: &Self) -> bool { + self.index_pointer == other.index_pointer + } +} + +impl Eq for ListSearchNeighbor {} + +impl Ord for ListSearchNeighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance.partial_cmp(&other.distance).unwrap() + } +} + +pub struct ListSearchResult { + candidate_storage: Vec, //plain storage + best_candidate: Vec, //pos in candidate storage, sorted by distance +} + +impl ListSearchResult { + pub fn get_lsn_by_idx(&self, idx: usize) -> &ListSearchNeighbor { + &self.candidate_storage[idx] + } + + pub fn insert_neighbor(&mut self, n: ListSearchNeighbor) { + //insert while preserving sort order. + let idx = self + .best_candidate + .partition_point(|x| self.candidate_storage[*x] < n); + self.candidate_storage.push(n); + let pos = self.candidate_storage.len() - 1; + self.best_candidate.insert(idx, pos) + } + + fn visit_closest(&mut self, pos_limit: usize) -> Option { + //OPT: should we optimize this not to do a linear search each time? + let neighbor_position = self + .best_candidate + .iter() + .position(|n| !self.candidate_storage[*n].visited); + match neighbor_position { + Some(pos) => { + if pos > pos_limit { + return None; + } + let n = &mut self.candidate_storage[self.best_candidate[pos]]; + n.visited = true; + Some(self.best_candidate[pos]) + } + None => None, + } + } +} + +pub struct ListSearchResultMinHeap { + candidates: BinaryHeap>, + visited: Vec, +} + +impl ListSearchResultMinHeap { + pub fn insert_neighbor(&mut self, n: ListSearchNeighbor) { + //insert while preserving sort order. + // self.candidate_storage.push(n); + // let pos = self.candidate_storage.len() - 1; + self.candidates.push(Reverse(n)); + + /*let idx = self + .best_candidate + .partition_point(|x| self.candidate_storage[*x].distance < n.distance); + self.candidate_storage.push(n); + let pos = self.candidate_storage.len() - 1; + self.best_candidate.insert(idx, pos)*/ + } + + fn visit_closest(&mut self, pos_limit: usize) -> Option<&ListSearchNeighbor> { + //OPT: should we optimize this not to do a linear search each time? + if self.candidates.len() == 0 { + panic!("no candidates left"); + //return None; + } + + if self.visited.len() > pos_limit { + let node_at_pos = &self.visited[pos_limit - 1]; + let head = self.candidates.peek().unwrap(); + if head.0.distance >= node_at_pos.distance { + return None; + } + } + + let head = self.candidates.pop().unwrap(); + let idx = self + .visited + .partition_point(|x| x.distance < head.0.distance); + self.visited.insert(idx, head.0); + Some(&self.visited[idx]) + } +} + +fn run_lsr_min_heap(lsr: &mut ListSearchResultMinHeap) { + let item = lsr.visit_closest(100000000); + let lsn = item.unwrap(); + + let mut rng = rand::thread_rng(); + let delta: f64 = rng.gen(); // generates a float between 0 and 1 + let distance = lsn.distance + ((delta * 5.0) as f32); + + for _ in 0..20 { + lsr.insert_neighbor(ListSearchNeighbor { + index_pointer: 0, + distance: distance, + visited: false, + _private_data: 2, + }) + } +} + +fn run_lsr(lsr: &mut ListSearchResult) { + let item_idx = lsr.visit_closest(1000000); + let lsn = lsr.get_lsn_by_idx(item_idx.unwrap()); + + let mut rng = rand::thread_rng(); + let delta: f64 = rng.gen(); // generates a float between 0 and 1 + let distance = lsn.distance + ((delta * 5.0) as f32); + + for _ in 0..20 { + lsr.insert_neighbor(ListSearchNeighbor { + index_pointer: 0, + distance: distance, + visited: false, + _private_data: 2, + }) + } +} + +pub fn benchmark_lsr(c: &mut Criterion) { + let mut lsr = ListSearchResult { + candidate_storage: Vec::new(), + best_candidate: Vec::new(), + }; + + lsr.insert_neighbor(ListSearchNeighbor { + index_pointer: 0, + distance: 100.0, + visited: false, + _private_data: 1, + }); + + c.bench_function("lsr OG", |b| b.iter(|| run_lsr(black_box(&mut lsr)))); +} + +pub fn benchmark_lsr_min_heap(c: &mut Criterion) { + let mut lsr = ListSearchResultMinHeap { + candidates: BinaryHeap::new(), + visited: Vec::new(), + }; + + lsr.insert_neighbor(ListSearchNeighbor { + index_pointer: 0, + distance: 100.0, + visited: false, + _private_data: 1, + }); + + c.bench_function("lsr min heap", |b| { + b.iter(|| run_lsr_min_heap(black_box(&mut lsr))) + }); +} + +criterion_group!(benches_lsr, benchmark_lsr, benchmark_lsr_min_heap); + +criterion_main!(benches_lsr); +/* +fn fibonacci(n: u64) -> u64 { + match n { + 0 => 1, + 1 => 1, + n => fibonacci(n - 1) + fibonacci(n - 2), + } +} +pub fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("fib 20", |b| b.iter(|| fibonacci(black_box(20)))); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches);*/ diff --git a/timescale_vector/sql/timescale_vector--0.0.2--0.0.3-dev.sql b/timescale_vector/sql/timescale_vector--0.0.2--0.0.3-dev.sql new file mode 100644 index 00000000..bcafdb51 --- /dev/null +++ b/timescale_vector/sql/timescale_vector--0.0.2--0.0.3-dev.sql @@ -0,0 +1,48 @@ +/* +This file is auto generated by pgrx. + +The ordering of items is not stable, it is driven by a dependency graph. +*/ + +-- src/access_method/mod.rs:48 +-- timescale_vector::access_method::amhandler + + CREATE OR REPLACE FUNCTION tsv_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '$libdir/timescale_vector-0.0.3-dev', 'amhandler_wrapper'; + + DO $$ + DECLARE + c int; + BEGIN + SELECT count(*) + INTO c + FROM pg_catalog.pg_am a + WHERE a.amname = 'tsv'; + + IF c = 0 THEN + CREATE ACCESS METHOD tsv TYPE INDEX HANDLER tsv_amhandler; + END IF; + END; + $$; + + + + +-- src/access_method/mod.rs:91 + +DO $$ +DECLARE + c int; +BEGIN + SELECT count(*) + INTO c + FROM pg_catalog.pg_opclass c + WHERE c.opcname = 'vector_cosine_ops' + AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'tsv'); + + IF c = 0 THEN + CREATE OPERATOR CLASS vector_cosine_ops DEFAULT + FOR TYPE vector USING tsv AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; + END IF; +END; +$$; \ No newline at end of file diff --git a/timescale_vector/src/access_method/README.md b/timescale_vector/src/access_method/README.md deleted file mode 100644 index 839466d4..00000000 --- a/timescale_vector/src/access_method/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# On Disk Layout - -Meta Page -- basic metadata -- future proof -- page 0 -- start_node tid - -Graph pages --- start node first -- foreach node --- vector for node --- array of tids of neighbors --- array of distances? - -- in "special area" --- bitmap of deletes? - diff --git a/timescale_vector/src/access_method/build.rs b/timescale_vector/src/access_method/build.rs index 0408118f..7893346e 100644 --- a/timescale_vector/src/access_method/build.rs +++ b/timescale_vector/src/access_method/build.rs @@ -1,52 +1,60 @@ use std::time::Instant; +use pgrx::pg_sys::{pgstat_progress_update_param, AsPgCStr}; use pgrx::*; -use reductive::pq::Pq; -use crate::access_method::disk_index_graph::DiskIndexGraph; use crate::access_method::graph::Graph; -use crate::access_method::graph::InsertStats; -use crate::access_method::graph::VectorProvider; -use crate::access_method::model::PgVector; +use crate::access_method::graph_neighbor_store::GraphNeighborStore; use crate::access_method::options::TSVIndexOptions; -use crate::access_method::pq::{PgPq, PqTrainer}; -use crate::util::page; +use crate::access_method::pg_vector::PgVector; +use crate::access_method::stats::{InsertStats, WriteStats}; + +use crate::util::page::PageType; use crate::util::tape::Tape; use crate::util::*; -use super::builder_graph::BuilderGraph; +use self::ports::PROGRESS_CREATE_IDX_SUBPHASE; + +use super::graph_neighbor_store::BuilderNeighborCache; +use super::sbq::SbqSpeedupStorage; + use super::meta_page::MetaPage; -use super::model::{self}; + +use super::plain_storage::PlainStorage; +use super::storage::{Storage, StorageType}; + +enum StorageBuildState<'a, 'b, 'c, 'd, 'e> { + SbqSpeedup(&'a mut SbqSpeedupStorage<'b>, &'c mut BuildState<'d, 'e>), + Plain(&'a mut PlainStorage<'b>, &'c mut BuildState<'d, 'e>), +} struct BuildState<'a, 'b> { memcxt: PgMemoryContexts, meta_page: MetaPage, ntuples: usize, tape: Tape<'a>, //The tape is a memory abstraction over Postgres pages for writing data. - node_builder: BuilderGraph<'b>, + graph: Graph<'b>, started: Instant, stats: InsertStats, - pq_trainer: Option, } impl<'a, 'b> BuildState<'a, 'b> { - fn new(index_relation: &'a PgRelation, meta_page: MetaPage, bg: BuilderGraph<'b>) -> Self { - let tape = unsafe { Tape::new(index_relation, page::PageType::Node) }; - let pq = if meta_page.get_use_pq() { - Some(PqTrainer::new(&meta_page)) - } else { - None - }; - //TODO: some ways to get rid of meta_page.clone? + fn new( + index_relation: &'a PgRelation, + meta_page: MetaPage, + graph: Graph<'b>, + page_type: PageType, + ) -> Self { + let tape = unsafe { Tape::new(index_relation, page_type) }; + BuildState { memcxt: PgMemoryContexts::new("tsv build context"), ntuples: 0, - meta_page: meta_page.clone(), + meta_page: meta_page, tape, - node_builder: bg, + graph: graph, started: Instant::now(), stats: InsertStats::new(), - pq_trainer: pq, } } } @@ -62,36 +70,18 @@ pub extern "C" fn ambuild( let opt = TSVIndexOptions::from_relation(&index_relation); notice!( - "Starting index build. num_neighbors={} search_list_size={}, max_alpha={}, use_pq={}, pq_vector_length={}", - opt.num_neighbors, + "Starting index build. num_neighbors={} search_list_size={}, max_alpha={}, storage_layout={:?}", + opt.get_num_neighbors(), opt.search_list_size, opt.max_alpha, - opt.use_pq, - opt.pq_vector_length + opt.get_storage_type(), ); let dimensions = index_relation.tuple_desc().get(0).unwrap().atttypmod; - // PQ is only applicable to high dimension vectors. - if opt.use_pq { - if dimensions < opt.pq_vector_length as i32 { - error!("use_pq can only be applied to vectors with greater than {} dimensions. {} dimensions provided", opt.pq_vector_length, dimensions) - }; - if dimensions % opt.pq_vector_length as i32 != 0 { - error!("use_pq can only be applied to vectors where the number of dimensions {} is divisible by the pq_vector_length {} ", dimensions, opt.pq_vector_length) - }; - } assert!(dimensions > 0 && dimensions < 2000); - let meta_page = unsafe { MetaPage::create(&index_relation, dimensions as _, opt.clone()) }; - let (ntuples, pq_opt) = do_heap_scan(index_info, &heap_relation, &index_relation, meta_page); - - // When using PQ, we initialize a node to store the model we use to quantize the vectors. - unsafe { - if opt.use_pq { - let pq = pq_opt.unwrap(); - let index_pointer: IndexPointer = model::write_pq(pq, &index_relation); - super::meta_page::MetaPage::update_pq_pointer(&index_relation, index_pointer) - } - } + let meta_page = unsafe { MetaPage::create(&index_relation, dimensions as _, opt) }; + + let ntuples = do_heap_scan(index_info, &heap_relation, &index_relation, meta_page); let mut result = unsafe { PgBox::::alloc0() }; result.heap_tuples = ntuples as f64; @@ -109,43 +99,76 @@ pub unsafe extern "C" fn aminsert( heaprel: pg_sys::Relation, _check_unique: pg_sys::IndexUniqueCheck, _index_unchanged: bool, - index_info: *mut pg_sys::IndexInfo, + _index_info: *mut pg_sys::IndexInfo, ) -> bool { let index_relation = unsafe { PgRelation::from_pg(indexrel) }; let heap_relation = unsafe { PgRelation::from_pg(heaprel) }; - let vec = PgVector::from_pg_parts(values, isnull, 0); + let mut meta_page = MetaPage::fetch(&index_relation); + let vec = PgVector::from_pg_parts(values, isnull, 0, &meta_page, true, false); if let None = vec { //todo handle NULLs? return false; } let vec = vec.unwrap(); - let vector = (*vec).to_slice(); let heap_pointer = ItemPointer::with_item_pointer_data(*heap_tid); - let meta_page = MetaPage::read(&index_relation); - let vp = VectorProvider::new( - Some(&heap_relation), - Some(get_attribute_number(index_info)), - meta_page.get_use_pq(), - false, - ); - let mut graph = DiskIndexGraph::new(&index_relation, vp); - - let mut node = model::Node::new(vector.to_vec(), heap_pointer, &meta_page); - // Populate the PQ version of the vector if it exists. - let pq = PgPq::new(&meta_page, &index_relation); - match pq { - None => {} - Some(pq) => { - node.pq_vector = pq.quantize(vector.to_vec()); + let mut storage = meta_page.get_storage_type(); + let mut stats = InsertStats::new(); + match &mut storage { + StorageType::Plain => { + let plain = PlainStorage::load_for_insert( + &index_relation, + &heap_relation, + meta_page.get_distance_function(), + ); + insert_storage( + &plain, + &index_relation, + vec, + heap_pointer, + &mut meta_page, + &mut stats, + ); + } + StorageType::SbqSpeedup | StorageType::SbqCompression => { + let bq = SbqSpeedupStorage::load_for_insert( + &heap_relation, + &index_relation, + &meta_page, + &mut stats.quantizer_stats, + ); + insert_storage( + &bq, + &index_relation, + vec, + heap_pointer, + &mut meta_page, + &mut stats, + ); } } + false +} - let mut tape = unsafe { Tape::new(&index_relation, page::PageType::Node) }; - let index_pointer: IndexPointer = node.write(&mut tape); +unsafe fn insert_storage( + storage: &S, + index_relation: &PgRelation, + vector: PgVector, + heap_pointer: ItemPointer, + meta_page: &mut MetaPage, + stats: &mut InsertStats, +) { + let mut tape = Tape::new(&index_relation, S::page_type()); + let index_pointer = storage.create_node( + vector.to_index_slice(), + heap_pointer, + &meta_page, + &mut tape, + stats, + ); - let _stats = graph.insert(&index_relation, index_pointer, vector); - false + let mut graph = Graph::new(GraphNeighborStore::Disk, meta_page); + graph.insert(&index_relation, index_pointer, vector, storage, stats) } #[pg_guard] @@ -153,64 +176,187 @@ pub extern "C" fn ambuildempty(_index_relation: pg_sys::Relation) { panic!("ambuildempty: not yet implemented") } -fn get_attribute_number(index_info: *mut pg_sys::IndexInfo) -> pg_sys::AttrNumber { - unsafe { assert!((*index_info).ii_NumIndexAttrs == 1) }; - unsafe { (*index_info).ii_IndexAttrNumbers[0] } -} - fn do_heap_scan<'a>( index_info: *mut pg_sys::IndexInfo, heap_relation: &'a PgRelation, index_relation: &'a PgRelation, meta_page: MetaPage, -) -> (usize, Option>) { - let vp = VectorProvider::new( - Some(heap_relation), - Some(get_attribute_number(index_info)), - meta_page.get_use_pq(), - false, +) -> usize { + let storage = meta_page.get_storage_type(); + + let mut mp2 = meta_page.clone(); + let graph = Graph::new( + GraphNeighborStore::Builder(BuilderNeighborCache::new()), + &mut mp2, ); - let bg = BuilderGraph::new(meta_page.clone(), vp); - let mut state = BuildState::new(index_relation, meta_page.clone(), bg); - unsafe { - pg_sys::IndexBuildHeapScan( - heap_relation.as_ptr(), - index_relation.as_ptr(), - index_info, - Some(build_callback), - &mut state, - ); + let mut write_stats = WriteStats::new(); + match storage { + StorageType::Plain => { + let mut plain = PlainStorage::new_for_build( + index_relation, + heap_relation, + meta_page.get_distance_function(), + ); + plain.start_training(&meta_page); + let page_type = PlainStorage::page_type(); + let mut bs = BuildState::new(index_relation, meta_page, graph, page_type); + let mut state = StorageBuildState::Plain(&mut plain, &mut bs); + + unsafe { + pg_sys::IndexBuildHeapScan( + heap_relation.as_ptr(), + index_relation.as_ptr(), + index_info, + Some(build_callback), + &mut state, + ); + } + + finalize_index_build(&mut plain, &mut bs, write_stats) + } + StorageType::SbqSpeedup | StorageType::SbqCompression => { + let mut bq = + SbqSpeedupStorage::new_for_build(index_relation, heap_relation, &meta_page); + + let page_type = SbqSpeedupStorage::page_type(); + + unsafe { + pgstat_progress_update_param(PROGRESS_CREATE_IDX_SUBPHASE, BUILD_PHASE_TRAINING); + } + + bq.start_training(&meta_page); + + let mut bs = BuildState::new(index_relation, meta_page, graph, page_type); + let mut state = StorageBuildState::SbqSpeedup(&mut bq, &mut bs); + + unsafe { + pg_sys::IndexBuildHeapScan( + heap_relation.as_ptr(), + index_relation.as_ptr(), + index_info, + Some(build_callback_bq_train), + &mut state, + ); + } + bq.finish_training(&mut write_stats); + + unsafe { + pgstat_progress_update_param( + PROGRESS_CREATE_IDX_SUBPHASE, + BUILD_PHASE_BUILDING_GRAPH, + ); + } + + let mut state = StorageBuildState::SbqSpeedup(&mut bq, &mut bs); + + unsafe { + pg_sys::IndexBuildHeapScan( + heap_relation.as_ptr(), + index_relation.as_ptr(), + index_info, + Some(build_callback), + &mut state, + ); + } + + unsafe { + pgstat_progress_update_param( + PROGRESS_CREATE_IDX_SUBPHASE, + BUILD_PHASE_FINALIZING_GRAPH, + ); + } + finalize_index_build(&mut bq, &mut bs, write_stats) + } } +} - // we train the quantizer and add prepare to write quantized values to the nodes. - let pq = state.pq_trainer.map(|pq| pq.train_pq()); +fn finalize_index_build( + storage: &mut S, + state: &mut BuildState, + mut write_stats: WriteStats, +) -> usize { + match state.graph.get_neighbor_store() { + GraphNeighborStore::Builder(builder) => { + for (&index_pointer, neighbors) in builder.iter() { + write_stats.num_nodes += 1; + let prune_neighbors; + let neighbors = + if neighbors.len() > state.graph.get_meta_page().get_num_neighbors() as _ { + //OPT: get rid of this clone + prune_neighbors = state.graph.prune_neighbors( + neighbors.clone(), + storage, + &mut write_stats.prune_stats, + ); + &prune_neighbors + } else { + neighbors + }; + write_stats.num_neighbors += neighbors.len(); + + storage.finalize_node_at_end_of_build( + &state.meta_page, + index_pointer, + neighbors, + &mut write_stats, + ); + } + } + GraphNeighborStore::Disk => { + panic!("Should not be using the disk neighbor store during build"); + } + } - let write_stats = unsafe { state.node_builder.write(index_relation, &pq) }; + debug1!("write done"); assert_eq!(write_stats.num_nodes, state.ntuples); let writing_took = Instant::now() .duration_since(write_stats.started) .as_secs_f64(); if write_stats.num_nodes > 0 { - info!( + debug1!( "Writing took {}s or {}s/tuple. Avg neighbors: {}", writing_took, writing_took / write_stats.num_nodes as f64, write_stats.num_neighbors / write_stats.num_nodes ); } - if write_stats.num_prunes > 0 { - info!( + if write_stats.prune_stats.calls > 0 { + debug1!( "When pruned for cleanup: avg neighbors before/after {}/{} of {} prunes", - write_stats.num_neighbors_before_prune / write_stats.num_prunes, - write_stats.num_neighbors_after_prune / write_stats.num_prunes, - write_stats.num_prunes + write_stats.prune_stats.num_neighbors_before_prune / write_stats.prune_stats.calls, + write_stats.prune_stats.num_neighbors_after_prune / write_stats.prune_stats.calls, + write_stats.prune_stats.calls ); } let ntuples = state.ntuples; warning!("Indexed {} tuples", ntuples); - (ntuples, pq) + + ntuples +} + +#[pg_guard] +unsafe extern "C" fn build_callback_bq_train( + _index: pg_sys::Relation, + _ctid: pg_sys::ItemPointer, + values: *mut pg_sys::Datum, + isnull: *mut bool, + _tuple_is_alive: bool, + state: *mut std::os::raw::c_void, +) { + let state = (state as *mut StorageBuildState).as_mut().unwrap(); + match state { + StorageBuildState::SbqSpeedup(bq, state) => { + let vec = PgVector::from_pg_parts(values, isnull, 0, &state.meta_page, true, false); + if let Some(vec) = vec { + bq.add_sample(vec.to_index_slice()); + } + } + StorageBuildState::Plain(_, _) => { + panic!("Should not be training with plain storage"); + } + } } #[pg_guard] @@ -223,94 +369,115 @@ unsafe extern "C" fn build_callback( state: *mut std::os::raw::c_void, ) { let index_relation = unsafe { PgRelation::from_pg(index) }; - let vec = PgVector::from_pg_parts(values, isnull, 0); - if let Some(vec) = vec { - let state = (state as *mut BuildState).as_mut().unwrap(); + let state = (state as *mut StorageBuildState).as_mut().unwrap(); + match state { + StorageBuildState::SbqSpeedup(bq, state) => { + let vec = PgVector::from_pg_parts(values, isnull, 0, &state.meta_page, true, false); + if let Some(vec) = vec { + let heap_pointer = ItemPointer::with_item_pointer_data(*ctid); + build_callback_memory_wrapper(index_relation, heap_pointer, vec, state, *bq); + } + } + StorageBuildState::Plain(plain, state) => { + let vec = PgVector::from_pg_parts(values, isnull, 0, &state.meta_page, true, false); + if let Some(vec) = vec { + let heap_pointer = ItemPointer::with_item_pointer_data(*ctid); + build_callback_memory_wrapper(index_relation, heap_pointer, vec, state, *plain); + } + } + } +} - let mut old_context = state.memcxt.set_as_current(); - let heap_pointer = ItemPointer::with_item_pointer_data(*ctid); +#[inline(always)] +unsafe fn build_callback_memory_wrapper( + index: PgRelation, + heap_pointer: ItemPointer, + vector: PgVector, + state: &mut BuildState, + storage: &mut S, +) { + let mut old_context = state.memcxt.set_as_current(); - build_callback_internal(index_relation, heap_pointer, (*vec).to_slice(), state); + build_callback_internal(index, heap_pointer, vector, state, storage); - old_context.set_as_current(); - state.memcxt.reset(); - } - //todo: what do we do with nulls? + old_context.set_as_current(); + state.memcxt.reset(); } #[inline(always)] -fn build_callback_internal( +fn build_callback_internal( index: PgRelation, heap_pointer: ItemPointer, - vector: &[f32], + vector: PgVector, state: &mut BuildState, + storage: &mut S, ) { check_for_interrupts!(); state.ntuples = state.ntuples + 1; if state.ntuples % 1000 == 0 { - info!( + debug1!( "Processed {} tuples in {}s which is {}s/tuple. Dist/tuple: Prune: {} search: {}. Stats: {:?}", state.ntuples, Instant::now().duration_since(state.started).as_secs_f64(), (Instant::now().duration_since(state.started) / state.ntuples as u32).as_secs_f64(), state.stats.prune_neighbor_stats.distance_comparisons / state.ntuples, - state.stats.greedy_search_stats.distance_comparisons / state.ntuples, + state.stats.greedy_search_stats.get_total_distance_comparisons() / state.ntuples, state.stats, ); } - match &state.pq_trainer { - Some(_) => { - let pqt = state.pq_trainer.as_mut(); - pqt.expect("error adding sample") - .add_sample(vector.to_vec()) - } - None => {} - } + let index_pointer = storage.create_node( + vector.to_index_slice(), + heap_pointer, + &state.meta_page, + &mut state.tape, + &mut state.stats, + ); - let node = model::Node::new(vector.to_vec(), heap_pointer, &state.meta_page); - let index_pointer: IndexPointer = node.write(&mut state.tape); - let new_stats = state.node_builder.insert(&index, index_pointer, vector); - state.stats.combine(new_stats); + state + .graph + .insert(&index, index_pointer, vector, storage, &mut state.stats); +} + +const BUILD_PHASE_TRAINING: i64 = 0; +const BUILD_PHASE_BUILDING_GRAPH: i64 = 1; +const BUILD_PHASE_FINALIZING_GRAPH: i64 = 2; + +#[pg_guard] +pub unsafe extern "C" fn ambuildphasename(phasenum: i64) -> *mut ffi::c_char { + match phasenum { + BUILD_PHASE_TRAINING => "training quantizer".as_pg_cstr(), + BUILD_PHASE_BUILDING_GRAPH => "building graph".as_pg_cstr(), + BUILD_PHASE_FINALIZING_GRAPH => "finalizing graph".as_pg_cstr(), + _ => error!("Unknown phase number {}", phasenum), + } } #[cfg(any(test, feature = "pg_test"))] #[pgrx::pg_schema] -mod tests { +pub mod tests { + use std::collections::HashSet; + use pgrx::*; - #[pg_test] - unsafe fn test_index_creation() -> spi::Result<()> { - Spi::run(&format!( - "CREATE TABLE test(embedding vector(3)); + use crate::util::ItemPointer; - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); + //TODO: add test where inserting and querying with vectors that are all the same. - CREATE INDEX idxtest - ON test - USING tsv(embedding) - WITH (num_neighbors=30); - - set enable_seqscan =0; - select * from test order by embedding <=> '[0,0,0]'; - explain analyze select * from test order by embedding <=> '[0,0,0]'; - drop index idxtest; - ", - ))?; - Ok(()) - } - - #[pg_test] - unsafe fn test_pq_index_creation() -> spi::Result<()> { + #[cfg(any(test, feature = "pg_test"))] + pub unsafe fn test_index_creation_and_accuracy_scaffold( + index_options: &str, + ) -> spi::Result<()> { Spi::run(&format!( - "CREATE TABLE test_pq ( + "CREATE TABLE test_data ( embedding vector (1536) ); + select setseed(0.5); -- generate 300 vectors - INSERT INTO test_pq (embedding) + INSERT INTO test_data (embedding) SELECT * FROM ( @@ -321,52 +488,205 @@ mod tests { GROUP BY i % 300) g; - CREATE INDEX idx_tsv_pq ON test_pq USING tsv (embedding) WITH (num_neighbors = 64, search_list_size = 125, max_alpha = 1.0, use_pq = TRUE, pq_vector_length = 64); + CREATE INDEX idx_tsv_bq ON test_data USING tsv (embedding) WITH ({index_options}); - ; SET enable_seqscan = 0; -- perform index scans on the vectors SELECT * FROM - test_pq + test_data ORDER BY embedding <=> ( SELECT ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding - FROM generate_series(1, 1536)); + FROM generate_series(1, 1536));"))?; + + let test_vec: Option> = Spi::get_one(&format!( + "SELECT('{{' || array_to_string(array_agg(1.0), ',', '0') || '}}')::real[] AS embedding + FROM generate_series(1, 1536)" + ))?; + + let cnt: Option = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + SET tsv.query_search_list_size = 2; + WITH cte as (select * from test_data order by embedding <=> $1::vector) SELECT count(*) from cte; + ", + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + + //FIXME: should work in all cases + if !index_options.contains("num_neighbors=10") { + assert_eq!(cnt.unwrap(), 300, "initial count"); + } + + Spi::run(&format!(" + -- test insert 2 vectors + INSERT INTO test_data (embedding) + SELECT + * + FROM ( + SELECT + ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 1536 * 2) i + GROUP BY + i % 2) g; + EXPLAIN ANALYZE SELECT * FROM - test_pq + test_data ORDER BY embedding <=> ( SELECT ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding FROM generate_series(1, 1536)); - DROP INDEX idx_tsv_pq; + -- test insert 10 vectors to search for that aren't random + INSERT INTO test_data (embedding) + SELECT + * + FROM ( + SELECT + ('[' || array_to_string(array_agg(1.0), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 1536 * 10) i + GROUP BY + i % 10) g; + ", ))?; + + let with_index: Option> = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + SET tsv.query_search_list_size = 25; + WITH cte AS ( + SELECT + ctid + FROM + test_data + ORDER BY + embedding <=> $1::vector + LIMIT 10 + ) + SELECT array_agg(ctid) from cte;" + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + + /* Test that the explain plan is generated ok */ + let explain: Option = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + EXPLAIN (format json) WITH cte AS ( + SELECT + ctid + FROM + test_data + ORDER BY + embedding <=> $1::vector + LIMIT 10 + ) + SELECT array_agg(ctid) from cte;" + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + assert!(explain.is_some()); + //warning!("explain: {}", explain.unwrap().0); + + let without_index: Option> = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 1; + SET enable_indexscan = 0; + WITH cte AS ( + SELECT + ctid + FROM + test_data + ORDER BY + embedding <=> $1::vector + LIMIT 10 + ) + SELECT array_agg(ctid) from cte;" + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + + let set: HashSet<_> = without_index + .unwrap() + .iter() + .map(|&ctid| ItemPointer::with_item_pointer_data(ctid)) + .collect(); + + let mut matches = 0; + for ctid in with_index.unwrap() { + if set.contains(&ItemPointer::with_item_pointer_data(ctid)) { + matches += 1; + } + } + assert!(matches > 9, "Low number of matches: {}", matches); + + //FIXME: should work in all cases + if !index_options.contains("num_neighbors=10") { + //make sure you can scan entire table with index + let cnt: Option = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + SET tsv.query_search_list_size = 2; + WITH cte as (select * from test_data order by embedding <=> $1::vector) SELECT count(*) from cte; + ", + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.into_datum(), + )], + )?; + + assert_eq!(cnt.unwrap(), 312); + } + Ok(()) } - #[pg_test] - unsafe fn test_insert() -> spi::Result<()> { + #[cfg(any(test, feature = "pg_test"))] + pub unsafe fn test_empty_table_insert_scaffold(index_options: &str) -> spi::Result<()> { Spi::run(&format!( "CREATE TABLE test(embedding vector(3)); - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); - CREATE INDEX idxtest ON test USING tsv(embedding) - WITH (num_neighbors=30); + WITH ({index_options}); - INSERT INTO test(embedding) VALUES ('[11,12,13]'); + INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); ", ))?; @@ -374,33 +694,33 @@ mod tests { " set enable_seqscan = 0; WITH cte as (select * from test order by embedding <=> '[0,0,0]') SELECT count(*) from cte;", ))?; - assert_eq!(4, res.unwrap()); + assert_eq!(3, res.unwrap()); Spi::run(&format!( - "INSERT INTO test(embedding) VALUES ('[11,12,13]'), ('[14,15,16]');", + " + set enable_seqscan = 0; + explain analyze select * from test order by embedding <=> '[0,0,0]'; + ", ))?; - let res: Option = Spi::get_one(&format!( - " set enable_seqscan = 0; - WITH cte as (select * from test order by embedding <=> '[0,0,0]') SELECT count(*) from cte;", - ))?; - assert_eq!(6, res.unwrap()); Spi::run(&format!("drop index idxtest;",))?; Ok(()) } - #[pg_test] - unsafe fn test_empty_table_insert() -> spi::Result<()> { + #[cfg(any(test, feature = "pg_test"))] + pub unsafe fn test_insert_empty_insert_scaffold(index_options: &str) -> spi::Result<()> { Spi::run(&format!( "CREATE TABLE test(embedding vector(3)); CREATE INDEX idxtest ON test USING tsv(embedding) - WITH (num_neighbors=30); + WITH ({index_options}); INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); + DELETE FROM test; + INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[14,15,16]'); ", ))?; @@ -408,36 +728,110 @@ mod tests { " set enable_seqscan = 0; WITH cte as (select * from test order by embedding <=> '[0,0,0]') SELECT count(*) from cte;", ))?; - assert_eq!(3, res.unwrap()); + assert_eq!(2, res.unwrap()); Spi::run(&format!("drop index idxtest;",))?; Ok(()) } - #[pg_test] - unsafe fn test_insert_empty_insert() -> spi::Result<()> { + #[cfg(any(test, feature = "pg_test"))] + pub unsafe fn test_index_updates(index_options: &str, expected_cnt: i64) -> spi::Result<()> { Spi::run(&format!( - "CREATE TABLE test(embedding vector(3)); + "CREATE TABLE test_data ( + id int, + embedding vector (1536) + ); - CREATE INDEX idxtest - ON test - USING tsv(embedding) - WITH (num_neighbors=30); + select setseed(0.5); + -- generate 300 vectors + INSERT INTO test_data (id, embedding) + SELECT + * + FROM ( + SELECT + i % {expected_cnt}, + ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 1536 * {expected_cnt}) i + GROUP BY + i % {expected_cnt}) g; - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); - DELETE FROM test; - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[14,15,16]'); - ", + CREATE INDEX idx_tsv_bq ON test_data USING tsv (embedding) WITH ({index_options}); + + + SET enable_seqscan = 0; + -- perform index scans on the vectors + SELECT + * + FROM + test_data + ORDER BY + embedding <=> ( + SELECT + ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM generate_series(1, 1536));"))?; + + let test_vec: Option> = Spi::get_one(&format!( + "SELECT('{{' || array_to_string(array_agg(1.0), ',', '0') || '}}')::real[] AS embedding + FROM generate_series(1, 1536)" ))?; - let res: Option = Spi::get_one(&format!( - " set enable_seqscan = 0; - WITH cte as (select * from test order by embedding <=> '[0,0,0]') SELECT count(*) from cte;", + let cnt: Option = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + SET tsv.query_search_list_size = 2; + WITH cte as (select * from test_data order by embedding <=> $1::vector) SELECT count(*) from cte; + ", + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + + assert!(cnt.unwrap() == expected_cnt, "initial count"); + + Spi::run(&format!( + " + + --CREATE INDEX idx_id ON test_data(id); + + WITH CTE as ( + SELECT + i % {expected_cnt} as id, + ('[' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 1536 * {expected_cnt}) i + GROUP BY + i % {expected_cnt} + ) + UPDATE test_data SET embedding = cte.embedding + FROM cte + WHERE test_data.id = cte.id; + + --DROP INDEX idx_id; + ", ))?; - assert_eq!(2, res.unwrap()); - Spi::run(&format!("drop index idxtest;",))?; + let cnt: Option = Spi::get_one_with_args( + &format!( + " + SET enable_seqscan = 0; + SET enable_indexscan = 1; + SET tsv.query_search_list_size = 2; + WITH cte as (select * from test_data order by embedding <=> $1::vector) SELECT count(*) from cte; + ", + ), + vec![( + pgrx::PgOid::Custom(pgrx::pg_sys::FLOAT4ARRAYOID), + test_vec.clone().into_datum(), + )], + )?; + + assert!(cnt.unwrap() == expected_cnt, "after update count"); Ok(()) } diff --git a/timescale_vector/src/access_method/builder_graph.rs b/timescale_vector/src/access_method/builder_graph.rs deleted file mode 100644 index 8a050b03..00000000 --- a/timescale_vector/src/access_method/builder_graph.rs +++ /dev/null @@ -1,174 +0,0 @@ -use std::collections::HashMap; -use std::time::Instant; - -use ndarray::Array1; -use pgrx::*; -use reductive::pq::{Pq, QuantizeVector}; - -use crate::util::{IndexPointer, ItemPointer}; - -use super::graph::{Graph, VectorProvider}; -use super::meta_page::MetaPage; -use super::model::*; - -/// A builderGraph is a graph that keep the neighbors in-memory in the neighbor_map below -/// The idea is that during the index build, you don't want to update the actual Postgres -/// pages every time you change the neighbors. Instead you change the neighbors in memory -/// until the build is done. Afterwards, calling the `write` method, will write out all -/// the neighbors to the right pages. -pub struct BuilderGraph<'a> { - //maps node's pointer to the representation on disk - neighbor_map: HashMap>, - meta_page: MetaPage, - vector_provider: VectorProvider<'a>, -} - -impl<'a> BuilderGraph<'a> { - pub fn new(meta_page: MetaPage, vp: VectorProvider<'a>) -> Self { - Self { - neighbor_map: HashMap::with_capacity(200), - meta_page, - vector_provider: vp, - } - } - - unsafe fn get_pq_vector( - &self, - index: &PgRelation, - index_pointer: ItemPointer, - pq: &Pq, - ) -> Vec { - let vp = self.get_vector_provider(); - let copy = vp.get_full_vector_copy_from_heap(index, index_pointer); - let og_vec = Array1::from(copy); - pq.quantize_vector(og_vec).to_vec() - } - - pub unsafe fn write(&self, index: &PgRelation, pq: &Option>) -> WriteStats { - let mut stats = WriteStats::new(); - - //TODO: OPT: do this in order of item pointers - for (index_pointer, neighbors) in &self.neighbor_map { - stats.num_nodes += 1; - let prune_neighbors; - let neighbors = if neighbors.len() > self.meta_page.get_num_neighbors() as _ { - stats.num_prunes += 1; - stats.num_neighbors_before_prune += neighbors.len(); - (prune_neighbors, _) = self.prune_neighbors(index, *index_pointer, vec![]); - stats.num_neighbors_after_prune += prune_neighbors.len(); - &prune_neighbors - } else { - neighbors - }; - stats.num_neighbors += neighbors.len(); - - let pqv = match pq { - Some(pq) => Some(self.get_pq_vector(index, *index_pointer, pq)), - None => None, - }; - Node::update_neighbors_and_pq( - index, - *index_pointer, - neighbors, - self.get_meta_page(index), - pqv, - ); - } - stats - } -} - -impl<'a> Graph for BuilderGraph<'a> { - fn read<'b>(&self, index: &'b PgRelation, index_pointer: ItemPointer) -> ReadableNode<'b> { - unsafe { Node::read(index, index_pointer) } - } - - fn get_init_ids(&mut self) -> Option> { - //returns a vector for generality - self.meta_page.get_init_ids() - } - - fn get_neighbors( - &self, - _index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool { - let neighbors = self.neighbor_map.get(&neighbors_of); - match neighbors { - Some(n) => { - for nwd in n { - result.push(nwd.get_index_pointer_to_neighbor()); - } - true - } - None => false, - } - } - - fn get_neighbors_with_distances( - &self, - _index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool { - let neighbors = self.neighbor_map.get(&neighbors_of); - match neighbors { - Some(n) => { - for nwd in n { - result.push(nwd.clone()); - } - true - } - None => false, - } - } - - fn is_empty(&self) -> bool { - self.neighbor_map.len() == 0 - } - - fn get_vector_provider(&self) -> VectorProvider { - return self.vector_provider.clone(); - } - - fn get_meta_page(&self, _index: &PgRelation) -> &MetaPage { - &self.meta_page - } - - fn set_neighbors( - &mut self, - index: &PgRelation, - neighbors_of: ItemPointer, - new_neighbors: Vec, - ) { - if self.meta_page.get_init_ids().is_none() { - //TODO probably better set off of centeroids - MetaPage::update_init_ids(index, vec![neighbors_of]); - self.meta_page = MetaPage::read(index); - } - self.neighbor_map.insert(neighbors_of, new_neighbors); - } -} - -pub struct WriteStats { - pub started: Instant, - pub num_nodes: usize, - pub num_prunes: usize, - pub num_neighbors_before_prune: usize, - pub num_neighbors_after_prune: usize, - pub num_neighbors: usize, -} - -impl WriteStats { - pub fn new() -> Self { - Self { - started: Instant::now(), - num_nodes: 0, - num_prunes: 0, - num_neighbors_before_prune: 0, - num_neighbors_after_prune: 0, - num_neighbors: 0, - } - } -} diff --git a/timescale_vector/src/access_method/debugging.rs b/timescale_vector/src/access_method/debugging.rs index 5be1674e..40a86392 100644 --- a/timescale_vector/src/access_method/debugging.rs +++ b/timescale_vector/src/access_method/debugging.rs @@ -3,11 +3,10 @@ use std::collections::HashMap; use pgrx::PgRelation; -use rkyv::Deserialize; use crate::util::ItemPointer; -use super::model::Node; +use super::{plain_node::Node, stats::GreedySearchStats}; #[allow(dead_code)] pub fn print_graph_from_disk(index: &PgRelation, init_id: ItemPointer) { @@ -26,7 +25,8 @@ unsafe fn print_graph_from_disk_visitor( sb: &mut String, level: usize, ) { - let data_node = Node::read(&index, index_pointer); + let mut stats = GreedySearchStats::new(); + let data_node = Node::read(&index, index_pointer, &mut stats); let node = data_node.get_archived_node(); let v = node.vector.as_slice(); let copy: Vec = v.iter().map(|f| *f).collect(); @@ -34,20 +34,18 @@ unsafe fn print_graph_from_disk_visitor( map.insert(index_pointer, copy); - node.apply_to_neighbors(|neighbor_pointer| { - let p = neighbor_pointer.deserialize_item_pointer(); + for neighbor_pointer in node.iter_neighbors() { + let p = neighbor_pointer; if !map.contains_key(&p) { print_graph_from_disk_visitor(index, p, map, sb, level + 1); } - }); + } sb.push_str(&name); sb.push_str("\n"); - node.apply_to_neighbors(|neighbor_pointer| { - let ip: ItemPointer = (neighbor_pointer) - .deserialize(&mut rkyv::Infallible) - .unwrap(); - let neighbor = map.get(&ip).unwrap(); + + for neighbor_pointer in node.iter_neighbors() { + let neighbor = map.get(&neighbor_pointer).unwrap(); sb.push_str(&format!("->{:?}\n", neighbor)) - }); + } sb.push_str("\n") } diff --git a/timescale_vector/src/access_method/disk_index_graph.rs b/timescale_vector/src/access_method/disk_index_graph.rs deleted file mode 100644 index c3bd6eea..00000000 --- a/timescale_vector/src/access_method/disk_index_graph.rs +++ /dev/null @@ -1,99 +0,0 @@ -use pgrx::PgRelation; - -use crate::util::{IndexPointer, ItemPointer}; - -use super::{ - graph::{Graph, VectorProvider}, - meta_page::MetaPage, - model::{NeighborWithDistance, Node, ReadableNode}, -}; - -pub struct DiskIndexGraph<'a> { - meta_page: MetaPage, - vector_provider: VectorProvider<'a>, -} - -impl<'a> DiskIndexGraph<'a> { - pub fn new(index: &PgRelation, vp: VectorProvider<'a>) -> Self { - let meta = MetaPage::read(index); - Self { - meta_page: meta, - vector_provider: vp, - } - } -} - -impl<'h> Graph for DiskIndexGraph<'h> { - fn get_vector_provider(&self) -> VectorProvider { - return self.vector_provider.clone(); - } - - fn read<'a>(&self, index: &'a PgRelation, index_pointer: ItemPointer) -> ReadableNode<'a> { - unsafe { Node::read(index, index_pointer) } - } - - fn get_init_ids(&mut self) -> Option> { - self.meta_page.get_init_ids() - } - - fn get_neighbors( - &self, - index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool { - let rn = self.read(index, neighbors_of); - rn.get_archived_node().apply_to_neighbors(|n| { - let n = n.deserialize_item_pointer(); - result.push(n) - }); - true - } - - fn get_neighbors_with_distances( - &self, - index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool { - let rn = self.read(index, neighbors_of); - let vp = self.get_vector_provider(); - let dist_state = unsafe { vp.get_full_vector_distance_state(index, neighbors_of) }; - rn.get_archived_node().apply_to_neighbors(|n| { - let n = n.deserialize_item_pointer(); - let dist = - unsafe { vp.get_distance_pair_for_full_vectors_from_state(&dist_state, index, n) }; - result.push(NeighborWithDistance::new(n, dist)) - }); - true - } - - fn is_empty(&self) -> bool { - self.meta_page.get_init_ids().is_none() - } - - fn get_meta_page(&self, _index: &PgRelation) -> &MetaPage { - &self.meta_page - } - - fn set_neighbors( - &mut self, - index: &PgRelation, - neighbors_of: ItemPointer, - new_neighbors: Vec, - ) { - if self.meta_page.get_init_ids().is_none() { - MetaPage::update_init_ids(index, vec![neighbors_of]); - self.meta_page = MetaPage::read(index); - } - unsafe { - Node::update_neighbors_and_pq( - index, - neighbors_of, - &new_neighbors, - self.get_meta_page(index), - None, - ); - } - } -} diff --git a/timescale_vector/src/access_method/distance.rs b/timescale_vector/src/access_method/distance.rs new file mode 100644 index 00000000..eb13b821 --- /dev/null +++ b/timescale_vector/src/access_method/distance.rs @@ -0,0 +1,210 @@ +/* we use the avx2 version of x86 functions. This verifies that's kosher */ +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[cfg(not(target_feature = "avx2"))] +compile_error!( + "On x86, the AVX2 feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+avx2,+fma\"" +); + +#[inline] +pub fn distance_l2(a: &[f32], b: &[f32]) -> f32 { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + //note safety is guraranteed by compile_error above + unsafe { + return super::distance_x86::distance_l2_x86_avx2(a, b); + } + + #[allow(unreachable_code)] + { + return distance_l2_unoptimized(a, b); + } +} + +#[inline(always)] +pub fn distance_l2_unoptimized(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + let norm: f32 = a + .iter() + .zip(b.iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(); + assert!(norm >= 0.); + //don't sqrt for performance. These are only used for ordering so sqrt not needed + norm +} + +/* PQ computes distances on subsegments that have few dimensions (e.g. 6). This function optimizes that. +* We optimize by telling the compiler exactly how long the slices are. This allows the compiler to figure +* out SIMD optimizations. Look at the benchmark results. */ +#[inline] +pub fn distance_l2_optimized_for_few_dimensions(a: &[f32], b: &[f32]) -> f32 { + let norm: f32 = match a.len() { + 0 => 0., + 1 => a[..1] + .iter() + .zip(b[..1].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 2 => a[..2] + .iter() + .zip(b[..2].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 3 => a[..3] + .iter() + .zip(b[..3].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 4 => a[..4] + .iter() + .zip(b[..4].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 5 => a[..5] + .iter() + .zip(b[..5].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 6 => a[..6] + .iter() + .zip(b[..6].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 7 => a[..7] + .iter() + .zip(b[..7].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + 8 => a[..8] + .iter() + .zip(b[..8].iter()) + .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) + .sum(), + _ => distance_l2(a, b), + }; + assert!(norm >= 0.); + //don't sqrt for performance. These are only used for ordering so sqrt not needed + norm +} + +#[inline] +pub fn distance_cosine(a: &[f32], b: &[f32]) -> f32 { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + //note safety is guraranteed by compile_error above + unsafe { + return super::distance_x86::distance_cosine_x86_avx2(a, b); + } + + #[allow(unreachable_code)] + { + return distance_cosine_unoptimized(a, b); + } +} + +#[inline(always)] +pub fn distance_cosine_unoptimized(a: &[f32], b: &[f32]) -> f32 { + assert_eq!(a.len(), b.len()); + debug_assert!(preprocess_cosine_get_norm(a).is_none()); + debug_assert!(preprocess_cosine_get_norm(b).is_none()); + let res: f32 = a.iter().zip(b).map(|(a, b)| *a * *b).sum(); + (1.0 - res).max(0.0) +} + +pub fn preprocess_cosine_get_norm(a: &[f32]) -> Option { + let norm = a.iter().map(|v| v * v).sum::(); + //adjust the epsilon to the length of the vector + let adj_epsilon = f32::EPSILON * a.len() as f32; + + /* this mainly handles the zero-vector case */ + if norm < f32::EPSILON { + return None; + } + /* no need to renormalize if norm around 1.0 */ + if norm >= 1.0 - adj_epsilon && norm <= 1.0 + adj_epsilon { + return None; + } + return Some(norm.sqrt()); +} + +pub fn preprocess_cosine(a: &mut [f32]) { + let norm = preprocess_cosine_get_norm(a); + match norm { + None => (), + Some(norm) => { + a.iter_mut().for_each(|v| *v /= norm); + debug_assert!( + preprocess_cosine_get_norm(a).is_none(), + "preprocess_cosine isn't idempotent", + ); + } + } +} + +macro_rules! xor_arm { + ($a: expr, $b: expr, $sz: expr) => { + $a[..$sz] + .iter() + .zip($b[..$sz].iter()) + .map(|(&l, &r)| (l ^ r).count_ones() as usize) + .sum() + }; +} + +#[inline(always)] +pub fn distance_xor_optimized(a: &[u64], b: &[u64]) -> usize { + match a.len() { + 1 => xor_arm!(a, b, 1), + 2 => xor_arm!(a, b, 2), + 3 => xor_arm!(a, b, 3), + 4 => xor_arm!(a, b, 4), + 5 => xor_arm!(a, b, 5), + 6 => xor_arm!(a, b, 6), + 7 => xor_arm!(a, b, 7), + 8 => xor_arm!(a, b, 8), + 9 => xor_arm!(a, b, 9), + 10 => xor_arm!(a, b, 10), + 11 => xor_arm!(a, b, 11), + 12 => xor_arm!(a, b, 12), + 13 => xor_arm!(a, b, 13), + 14 => xor_arm!(a, b, 14), + 15 => xor_arm!(a, b, 15), + 16 => xor_arm!(a, b, 16), + 17 => xor_arm!(a, b, 17), + 18 => xor_arm!(a, b, 18), + 19 => xor_arm!(a, b, 19), + 20 => xor_arm!(a, b, 20), + 21 => xor_arm!(a, b, 21), + 22 => xor_arm!(a, b, 22), + 23 => xor_arm!(a, b, 23), + 24 => xor_arm!(a, b, 24), + 25 => xor_arm!(a, b, 25), + 26 => xor_arm!(a, b, 26), + 27 => xor_arm!(a, b, 27), + 28 => xor_arm!(a, b, 28), + 29 => xor_arm!(a, b, 29), + 30 => xor_arm!(a, b, 30), + 31 => xor_arm!(a, b, 31), + 32 => xor_arm!(a, b, 32), + 33 => xor_arm!(a, b, 33), + 34 => xor_arm!(a, b, 34), + 35 => xor_arm!(a, b, 35), + 36 => xor_arm!(a, b, 36), + 37 => xor_arm!(a, b, 37), + 38 => xor_arm!(a, b, 38), + 39 => xor_arm!(a, b, 39), + 40 => xor_arm!(a, b, 40), + 41 => xor_arm!(a, b, 41), + 42 => xor_arm!(a, b, 42), + 43 => xor_arm!(a, b, 43), + 44 => xor_arm!(a, b, 44), + 45 => xor_arm!(a, b, 45), + 46 => xor_arm!(a, b, 46), + 47 => xor_arm!(a, b, 47), + 48 => xor_arm!(a, b, 48), + 49 => xor_arm!(a, b, 49), + _ => a + .iter() + .zip(b.iter()) + .map(|(&l, &r)| (l ^ r).count_ones() as usize) + .sum(), + } +} diff --git a/timescale_vector/src/access_method/distance_x86.rs b/timescale_vector/src/access_method/distance_x86.rs index d12de428..36f78537 100644 --- a/timescale_vector/src/access_method/distance_x86.rs +++ b/timescale_vector/src/access_method/distance_x86.rs @@ -6,11 +6,25 @@ use simdeez::sse41::*; //use simdeez::avx::*; use simdeez::avx2::*; +#[cfg(not(target_feature = "avx2"))] +compile_error!( + "On x86, the AVX2 feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+avx2,+fma\"" +); + +//note: without fmadd, the performance degrades pretty badly. Benchmark before disbaling +#[cfg(not(target_feature = "fma"))] +compile_error!( + "On x86, the fma feature must be enabled. Set RUSTFLAGS=\"-C target-feature=+avx2,+fma\"" +); + simdeez::simd_runtime_generate!( - pub fn distance_opt(x: &[f32], y: &[f32]) -> f32 { - let mut res = S::setzero_ps(); + pub fn distance_l2_x86(x: &[f32], y: &[f32]) -> f32 { + let mut accum0 = S::setzero_ps(); + let mut accum1 = S::setzero_ps(); + let mut accum2 = S::setzero_ps(); + let mut accum3 = S::setzero_ps(); - assert!(x.len() == y.len()); + //assert!(x.len() == y.len()); let mut x = &x[..]; let mut y = &y[..]; @@ -19,22 +33,30 @@ simdeez::simd_runtime_generate!( // the width of a vector type is provided as a constant // so the compiler is free to optimize it more. // S::VF32_WIDTH is a constant, 4 when using SSE, 8 when using AVX2, etc - while x.len() >= S::VF32_WIDTH { + while x.len() >= S::VF32_WIDTH * 4 { //load data from your vec into an SIMD value - let xv = S::loadu_ps(&x[0]); - let yv = S::loadu_ps(&y[0]); - - let mut diff = S::sub_ps(xv, yv); - diff *= diff; - - res = res + diff; + accum0 = accum0 + + ((S::loadu_ps(&x[S::VF32_WIDTH * 0]) - S::loadu_ps(&y[S::VF32_WIDTH * 0])) + * (S::loadu_ps(&x[S::VF32_WIDTH * 0]) - S::loadu_ps(&y[S::VF32_WIDTH * 0]))); + accum1 = accum1 + + ((S::loadu_ps(&x[S::VF32_WIDTH * 1]) - S::loadu_ps(&y[S::VF32_WIDTH * 1])) + * (S::loadu_ps(&x[S::VF32_WIDTH * 1]) - S::loadu_ps(&y[S::VF32_WIDTH * 1]))); + accum2 = accum2 + + ((S::loadu_ps(&x[S::VF32_WIDTH * 2]) - S::loadu_ps(&y[S::VF32_WIDTH * 2])) + * (S::loadu_ps(&x[S::VF32_WIDTH * 2]) - S::loadu_ps(&y[S::VF32_WIDTH * 2]))); + accum3 = accum3 + + ((S::loadu_ps(&x[S::VF32_WIDTH * 3]) - S::loadu_ps(&y[S::VF32_WIDTH * 3])) + * (S::loadu_ps(&x[S::VF32_WIDTH * 3]) - S::loadu_ps(&y[S::VF32_WIDTH * 3]))); // Move each slice to the next position - x = &x[S::VF32_WIDTH..]; - y = &y[S::VF32_WIDTH..]; + x = &x[S::VF32_WIDTH * 4..]; + y = &y[S::VF32_WIDTH * 4..]; } - let mut dist = S::horizontal_add_ps(res); + let mut dist = S::horizontal_add_ps(accum0) + + S::horizontal_add_ps(accum1) + + S::horizontal_add_ps(accum2) + + S::horizontal_add_ps(accum3); // compute for the remaining elements for i in 0..x.len() { @@ -43,6 +65,120 @@ simdeez::simd_runtime_generate!( } assert!(dist >= 0.); - dist.sqrt() + //dist.sqrt() + dist } ); + +simdeez::simd_runtime_generate!( + pub fn distance_cosine_x86(x: &[f32], y: &[f32]) -> f32 { + let mut accum0 = S::setzero_ps(); + let mut accum1 = S::setzero_ps(); + let mut accum2 = S::setzero_ps(); + let mut accum3 = S::setzero_ps(); + + let mut x = &x[..]; + let mut y = &y[..]; + + //assert!(x.len() == y.len()); + + // Operations have to be done in terms of the vector width + // so that it will work with any size vector. + // the width of a vector type is provided as a constant + // so the compiler is free to optimize it more. + // S::VF32_WIDTH is a constant, 4 when using SSE, 8 when using AVX2, etc + while x.len() >= S::VF32_WIDTH * 4 { + accum0 = S::fmadd_ps( + S::loadu_ps(&x[S::VF32_WIDTH * 0]), + S::loadu_ps(&y[S::VF32_WIDTH * 0]), + accum0, + ); + accum1 = S::fmadd_ps( + S::loadu_ps(&x[S::VF32_WIDTH * 1]), + S::loadu_ps(&y[S::VF32_WIDTH * 1]), + accum1, + ); + accum2 = S::fmadd_ps( + S::loadu_ps(&x[S::VF32_WIDTH * 2]), + S::loadu_ps(&y[S::VF32_WIDTH * 2]), + accum2, + ); + accum3 = S::fmadd_ps( + S::loadu_ps(&x[S::VF32_WIDTH * 3]), + S::loadu_ps(&y[S::VF32_WIDTH * 3]), + accum3, + ); + + // Move each slice to the next position + x = &x[S::VF32_WIDTH * 4..]; + y = &y[S::VF32_WIDTH * 4..]; + } + + let mut dist = S::horizontal_add_ps(accum0) + + S::horizontal_add_ps(accum1) + + S::horizontal_add_ps(accum2) + + S::horizontal_add_ps(accum3); + + // compute for the remaining elements + for i in 0..x.len() { + dist += x[i] * y[i]; + } + + (1.0 - dist).max(0.0) + } +); + +#[cfg(test)] +mod tests { + #[test] + fn distances_equal() { + let r: Vec = (0..2000).map(|_| 1.0).collect(); + let l: Vec = (0..2000).map(|_| 2.0).collect(); + + assert_eq!( + unsafe { super::distance_cosine_x86_avx2(&r, &l) }, + super::super::distance::distance_cosine_unoptimized(&r, &l) + ); + + assert_eq!( + unsafe { super::distance_l2_x86_avx2(&r, &l) }, + super::super::distance::distance_l2_unoptimized(&r, &l) + ); + + //don't use too many dimensions to avoid overflow + let r: Vec = (0..20).map(|v| v as f32).collect(); + let l: Vec = (0..20).map(|v| v as f32).collect(); + + assert_eq!( + unsafe { super::distance_cosine_x86_avx2(&r, &l) }, + super::super::distance::distance_cosine_unoptimized(&r, &l) + ); + assert_eq!( + unsafe { super::distance_l2_x86_avx2(&r, &l) }, + super::super::distance::distance_l2_unoptimized(&r, &l) + ); + + //many dimensions but normalized + let r: Vec = (0..2000).map(|v| v as f32 + 1.0).collect(); + let l: Vec = (0..2000).map(|v| v as f32 + 2.0).collect(); + + let r_size = r.iter().map(|v| v * v).sum::().sqrt(); + let l_size = l.iter().map(|v| v * v).sum::().sqrt(); + + let r: Vec = r.iter().map(|v| v / r_size).collect(); + let l: Vec = l.iter().map(|v| v / l_size).collect(); + + assert!( + (unsafe { super::distance_cosine_x86_avx2(&r, &l) } + - super::super::distance::distance_cosine_unoptimized(&r, &l)) + .abs() + < 0.000001 + ); + assert!( + (unsafe { super::distance_l2_x86_avx2(&r, &l) } + - super::super::distance::distance_l2_unoptimized(&r, &l)) + .abs() + < 0.000001 + ); + } +} diff --git a/timescale_vector/src/access_method/graph.rs b/timescale_vector/src/access_method/graph.rs index 05894324..b939a969 100644 --- a/timescale_vector/src/access_method/graph.rs +++ b/timescale_vector/src/access_method/graph.rs @@ -1,445 +1,238 @@ +use std::cmp::Reverse; +use std::collections::BinaryHeap; use std::{cmp::Ordering, collections::HashSet}; -use pgrx::pg_sys::{Datum, TupleTableSlot}; -use pgrx::{pg_sys, PgBox, PgRelation}; +use pgrx::PgRelation; -use crate::access_method::model::Node; -use crate::access_method::pq::{DistanceCalculator, PgPq}; -use crate::util::ports::slot_getattr; -use crate::util::{HeapPointer, IndexPointer, ItemPointer}; - -use super::model::PgVector; -use super::{ - meta_page::MetaPage, - model::{NeighborWithDistance, ReadableNode}, -}; - -#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] -fn distance(a: &[f32], b: &[f32]) -> f32 { - super::distance_x86::distance_opt_runtime_select(a, b) -} - -//TODO: use slow L2 for now. Make pluggable and simd -#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] -fn distance(a: &[f32], b: &[f32]) -> f32 { - assert_eq!(a.len(), b.len()); - - let norm: f32 = a - .iter() - .zip(b.iter()) - .map(|t| (*t.0 as f32 - *t.1 as f32) * (*t.0 as f32 - *t.1 as f32)) - .sum(); - assert!(norm >= 0.); - norm.sqrt() -} - -struct TableSlot { - slot: PgBox, -} - -impl TableSlot { - unsafe fn new(relation: &PgRelation) -> Self { - let slot = PgBox::from_pg(pg_sys::table_slot_create( - relation.as_ptr(), - std::ptr::null_mut(), - )); - Self { slot } - } - - unsafe fn get_attribute(&self, attribute_number: pg_sys::AttrNumber) -> Option { - slot_getattr(&self.slot, attribute_number) - } -} - -impl Drop for TableSlot { - fn drop(&mut self) { - unsafe { pg_sys::ExecDropSingleTupleTableSlot(self.slot.as_ptr()) }; - } -} - -#[derive(Clone)] -pub struct VectorProvider<'a> { - pq_enabled: bool, - calc_distance_with_pq: bool, - heap_rel: Option<&'a PgRelation>, - heap_attr_number: Option, - distance_fn: fn(&[f32], &[f32]) -> f32, -} - -impl<'a> VectorProvider<'a> { - pub fn new( - heap_rel: Option<&'a PgRelation>, - heap_attr_number: Option, - pq_enabled: bool, - calc_distance_with_pq: bool, - ) -> Self { - Self { - pq_enabled, - calc_distance_with_pq, - heap_rel, - heap_attr_number, - distance_fn: distance, - } - } +use crate::access_method::storage::NodeDistanceMeasure; - pub unsafe fn get_full_vector_copy_from_heap( - &self, - index: &PgRelation, - index_pointer: ItemPointer, - ) -> Vec { - let heap_pointer = self.get_heap_pointer(index, index_pointer); - let slot = TableSlot::new(self.heap_rel.unwrap()); - self.init_slot(&slot, heap_pointer); - let slice = self.get_slice(&slot); - slice.to_vec() - } - - unsafe fn init_slot(&self, slot: &TableSlot, heap_pointer: HeapPointer) { - let table_am = self.heap_rel.unwrap().rd_tableam; - let fetch_row_version = (*table_am).tuple_fetch_row_version.unwrap(); - let mut ctid: pg_sys::ItemPointerData = pg_sys::ItemPointerData { - ..Default::default() - }; - heap_pointer.to_item_pointer_data(&mut ctid); - fetch_row_version( - self.heap_rel.unwrap().as_ptr(), - &mut ctid, - &mut pg_sys::SnapshotAnyData, - slot.slot.as_ptr(), - ); - } - - unsafe fn get_slice<'s>(&self, slot: &'s TableSlot) -> &'s [f32] { - let vector = - PgVector::from_datum(slot.get_attribute(self.heap_attr_number.unwrap()).unwrap()); - - //note pgvector slice is only valid as long as the slot is valid that's why the lifetime is tied to it. - (*vector).to_slice() - } - - fn get_heap_pointer(&self, index: &PgRelation, index_pointer: IndexPointer) -> HeapPointer { - let rn = unsafe { Node::read(index, index_pointer) }; - let node = rn.get_archived_node(); - let heap_pointer = node.heap_item_pointer.deserialize_item_pointer(); - heap_pointer - } - - unsafe fn get_distance( - &self, - index: &PgRelation, - index_pointer: IndexPointer, - query: &[f32], - dm: &DistanceMeasure, - stats: &mut GreedySearchStats, - ) -> (f32, HeapPointer) { - if self.calc_distance_with_pq { - let rn = unsafe { Node::read(index, index_pointer) }; - stats.node_reads += 1; - let node = rn.get_archived_node(); - assert!(node.pq_vector.len() > 0); - let vec = node.pq_vector.as_slice(); - let distance = dm.get_pq_distance(vec); - stats.pq_distance_comparisons += 1; - stats.distance_comparisons += 1; - return (distance, node.heap_item_pointer.deserialize_item_pointer()); - } - - //now we know we're doing a distance calc on the full-sized vector - if self.pq_enabled { - //have to get it from the heap - let heap_pointer = self.get_heap_pointer(index, index_pointer); - stats.node_reads += 1; - let slot = TableSlot::new(self.heap_rel.unwrap()); - self.init_slot(&slot, heap_pointer); - let slice = self.get_slice(&slot); - stats.distance_comparisons += 1; - return (dm.get_full_vector_distance(slice, query), heap_pointer); - } else { - //have to get it from the index - let rn = unsafe { Node::read(index, index_pointer) }; - stats.node_reads += 1; - let node = rn.get_archived_node(); - assert!(node.vector.len() > 0); - let vec = node.vector.as_slice(); - let distance = dm.get_full_vector_distance(vec, query); - stats.distance_comparisons += 1; - return (distance, node.heap_item_pointer.deserialize_item_pointer()); - } - } - - pub unsafe fn get_full_vector_distance_state<'i>( - &self, - index: &'i PgRelation, - index_pointer: IndexPointer, - ) -> FullVectorDistanceState<'i> { - if self.pq_enabled { - let heap_pointer = self.get_heap_pointer(index, index_pointer); - let slot = TableSlot::new(self.heap_rel.unwrap()); - self.init_slot(&slot, heap_pointer); - FullVectorDistanceState { - table_slot: Some(slot), - readable_node: None, - } - } else { - let rn = Node::read(index, index_pointer); - FullVectorDistanceState { - table_slot: None, - readable_node: Some(rn), - } - } - } - - pub unsafe fn get_distance_pair_for_full_vectors_from_state( - &self, - state: &FullVectorDistanceState, - index: &PgRelation, - index_pointer: IndexPointer, - ) -> f32 { - if self.pq_enabled { - let heap_pointer = self.get_heap_pointer(index, index_pointer); - let slot = TableSlot::new(self.heap_rel.unwrap()); - self.init_slot(&slot, heap_pointer); - let slice1 = self.get_slice(&slot); - let slice2 = self.get_slice(state.table_slot.as_ref().unwrap()); - (self.distance_fn)(slice1, slice2) - } else { - let rn1 = Node::read(index, index_pointer); - let rn2 = state.readable_node.as_ref().unwrap(); - let node1 = rn1.get_archived_node(); - let node2 = rn2.get_archived_node(); - assert!(node1.vector.len() > 0); - assert!(node1.vector.len() == node2.vector.len()); - let vec1 = node1.vector.as_slice(); - let vec2 = node2.vector.as_slice(); - (self.distance_fn)(vec1, vec2) - } - } -} - -pub struct FullVectorDistanceState<'a> { - table_slot: Option, - readable_node: Option>, -} - -pub struct DistanceMeasure { - distance_calculator: Option, - //query: Option<&[f32]> -} - -impl DistanceMeasure { - pub fn new( - index: &PgRelation, - meta_page: &MetaPage, - query: &[f32], - calc_distance_with_pq: bool, - ) -> Self { - let use_pq = meta_page.get_use_pq(); - if calc_distance_with_pq { - assert!(use_pq); - let pq = PgPq::new(meta_page, index); - let dc = pq.unwrap().distance_calculator(query, distance); - return Self { - distance_calculator: Some(dc), - }; - } - - return Self { - distance_calculator: None, - }; - } +use crate::util::{HeapPointer, IndexPointer, ItemPointer}; - fn get_pq_distance(&self, vec: &[u8]) -> f32 { - let dc = self.distance_calculator.as_ref().unwrap(); - let distance = dc.distance(vec); - distance - } +use super::graph_neighbor_store::GraphNeighborStore; - fn get_full_vector_distance(&self, vec: &[f32], query: &[f32]) -> f32 { - assert!(self.distance_calculator.is_none()); - distance(vec, query) - } -} +use super::pg_vector::PgVector; +use super::stats::{GreedySearchStats, InsertStats, PruneNeighborStats, StatsNodeVisit}; +use super::storage::Storage; +use super::{meta_page::MetaPage, neighbor_with_distance::NeighborWithDistance}; -struct ListSearchNeighbor { - index_pointer: IndexPointer, - heap_pointer: HeapPointer, +pub struct ListSearchNeighbor { + pub index_pointer: IndexPointer, distance: f32, - visited: bool, + private_data: PD, } -impl PartialOrd for ListSearchNeighbor { +impl PartialOrd for ListSearchNeighbor { fn partial_cmp(&self, other: &Self) -> Option { self.distance.partial_cmp(&other.distance) } } -impl PartialEq for ListSearchNeighbor { +impl PartialEq for ListSearchNeighbor { fn eq(&self, other: &Self) -> bool { self.index_pointer == other.index_pointer } } -impl ListSearchNeighbor { - pub fn new(index_pointer: IndexPointer, heap_pointer: HeapPointer, distance: f32) -> Self { +impl Eq for ListSearchNeighbor {} + +impl Ord for ListSearchNeighbor { + fn cmp(&self, other: &Self) -> Ordering { + self.distance.partial_cmp(&other.distance).unwrap() + } +} + +impl ListSearchNeighbor { + pub fn new(index_pointer: IndexPointer, distance: f32, private_data: PD) -> Self { + assert!(!distance.is_nan()); + debug_assert!(distance >= 0.0); Self { index_pointer, - heap_pointer, + private_data, distance, - visited: false, } } + + pub fn get_private_data(&self) -> &PD { + &self.private_data + } } -pub struct ListSearchResult { - best_candidate: Vec, //keep sorted by distanced +pub struct ListSearchResult { + candidates: BinaryHeap>>, + visited: Vec>, inserted: HashSet, - max_history_size: Option, - dm: DistanceMeasure, + pub sdm: Option, pub stats: GreedySearchStats, } -impl ListSearchResult { +impl ListSearchResult { fn empty() -> Self { Self { - best_candidate: vec![], + candidates: BinaryHeap::new(), + visited: vec![], inserted: HashSet::new(), - max_history_size: None, - dm: DistanceMeasure { - distance_calculator: None, - }, + sdm: None, stats: GreedySearchStats::new(), } } - fn new( - index: &PgRelation, - max_history_size: Option, - graph: &G, + fn new>( init_ids: Vec, - query: &[f32], - dm: DistanceMeasure, - ) -> Self - where - G: Graph + ?Sized, - { + sdm: S::QueryDistanceMeasure, + search_list_size: usize, + meta_page: &MetaPage, + gns: &GraphNeighborStore, + storage: &S, + ) -> Self { + let neigbors = meta_page.get_num_neighbors() as usize; let mut res = Self { - best_candidate: Vec::new(), - inserted: HashSet::new(), - max_history_size, + candidates: BinaryHeap::with_capacity(search_list_size * neigbors), + visited: Vec::with_capacity(search_list_size * 2), + //candidate_storage: Vec::with_capacity(search_list_size * neigbors), + //best_candidate: Vec::with_capacity(search_list_size * neigbors), + inserted: HashSet::with_capacity(search_list_size * neigbors), stats: GreedySearchStats::new(), - dm: dm, + sdm: Some(sdm), }; - res.stats.calls += 1; + res.stats.record_call(); for index_pointer in init_ids { - res.insert(index, graph, index_pointer, query); + let lsn = storage.create_lsn_for_init_id(&mut res, index_pointer, gns); + res.insert_neighbor(lsn); } res } - fn insert( - &mut self, - index: &PgRelation, - graph: &G, - index_pointer: ItemPointer, - query: &[f32], - ) where - G: Graph + ?Sized, - { - //no point reprocessing a point. Distance calcs are expensive. - if !self.inserted.insert(index_pointer) { - return; - } + pub fn prepare_insert(&mut self, ip: ItemPointer) -> bool { + return self.inserted.insert(ip); + } - let vp = graph.get_vector_provider(); - let (dist, heap_pointer) = - unsafe { vp.get_distance(index, index_pointer, query, &self.dm, &mut self.stats) }; + /// Internal function + pub fn insert_neighbor(&mut self, n: ListSearchNeighbor) { + self.stats.record_candidate(); + self.candidates.push(Reverse(n)); + } - let neighbor = ListSearchNeighbor::new(index_pointer, heap_pointer, dist); - self._insert_neighbor(neighbor); + pub fn get_lsn_by_idx(&self, idx: usize) -> &ListSearchNeighbor { + &self.visited[idx] } - /// Internal function - fn _insert_neighbor(&mut self, n: ListSearchNeighbor) { - if let Some(max_size) = self.max_history_size { - if self.best_candidate.len() >= max_size { - let last = self.best_candidate.last().unwrap(); - if n >= *last { - //n is too far in the list to be the best candidate. - return; - } - self.best_candidate.pop(); - } + fn visit_closest(&mut self, pos_limit: usize) -> Option { + if self.candidates.len() == 0 { + return None; } - //insert while preserving sort order. - let idx = self.best_candidate.partition_point(|x| *x < n); - self.best_candidate.insert(idx, n) - } - fn visit_closest(&mut self, pos_limit: usize) -> Option<(ItemPointer, f32)> { - //OPT: should we optimize this not to do a linear search each time? - let neighbor_position = self.best_candidate.iter().position(|n| !n.visited); - match neighbor_position { - Some(pos) => { - if pos > pos_limit { - return None; - } - let n = &mut self.best_candidate[pos]; - n.visited = true; - Some((n.index_pointer, n.distance)) + if self.visited.len() > pos_limit { + let node_at_pos = &self.visited[pos_limit - 1]; + let head = self.candidates.peek().unwrap(); + if head.0.distance >= node_at_pos.distance { + return None; } - None => None, } + + let head = self.candidates.pop().unwrap(); + let idx = self + .visited + .partition_point(|x| x.distance < head.0.distance); + self.visited.insert(idx, head.0); + Some(idx) } //removes and returns the first element. Given that the element remains in self.inserted, that means the element will never again be insereted //into the best_candidate list, so it will never again be returned. - pub fn consume(&mut self) -> Option<(HeapPointer, IndexPointer)> { - if self.best_candidate.is_empty() { + pub fn consume>( + &mut self, + storage: &S, + ) -> Option<(HeapPointer, IndexPointer)> { + if self.visited.len() == 0 { return None; } - let f = self.best_candidate.remove(0); - return Some((f.heap_pointer, f.index_pointer)); + let lsn = self.visited.remove(0); + let heap_pointer = storage.return_lsn(&lsn, &mut self.stats); + return Some((heap_pointer, lsn.index_pointer)); } } -pub trait Graph { - fn read<'a>(&self, index: &'a PgRelation, index_pointer: ItemPointer) -> ReadableNode<'a>; - fn get_init_ids(&mut self) -> Option>; - fn get_neighbors( - &self, - index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool; - fn get_neighbors_with_distances( - &self, - index: &PgRelation, - neighbors_of: ItemPointer, - result: &mut Vec, - ) -> bool; +pub struct Graph<'a> { + neighbor_store: GraphNeighborStore, + meta_page: &'a mut MetaPage, +} - fn is_empty(&self) -> bool; +impl<'a> Graph<'a> { + pub fn new(neighbor_store: GraphNeighborStore, meta_page: &'a mut MetaPage) -> Self { + Self { + neighbor_store, + meta_page, + } + } - fn get_vector_provider(&self) -> VectorProvider; + pub fn get_neighbor_store(&self) -> &GraphNeighborStore { + &self.neighbor_store + } - fn get_distance_measure( - &self, - index: &PgRelation, - query: &[f32], - calc_distance_with_pq: bool, - ) -> DistanceMeasure { - let meta_page = self.get_meta_page(index); - return DistanceMeasure::new(index, meta_page, query, calc_distance_with_pq); + fn get_init_ids(&self) -> Option> { + self.meta_page.get_init_ids() } - fn set_neighbors( + fn add_neighbors( &mut self, - index: &PgRelation, + storage: &S, neighbors_of: ItemPointer, - new_neighbors: Vec, - ); + additional_neighbors: Vec, + stats: &mut PruneNeighborStats, + ) -> (bool, Vec) { + let mut candidates = Vec::::with_capacity( + (self.neighbor_store.max_neighbors(self.get_meta_page()) as usize) + + additional_neighbors.len(), + ); + self.neighbor_store + .get_neighbors_with_full_vector_distances( + neighbors_of, + storage, + &mut candidates, + stats, + ); + + let mut hash: HashSet = candidates + .iter() + .map(|c| c.get_index_pointer_to_neighbor()) + .collect(); + for n in additional_neighbors { + if hash.insert(n.get_index_pointer_to_neighbor()) { + candidates.push(n); + } + } + //remove myself + if !hash.insert(neighbors_of) { + //prevent self-loops + let index = candidates + .iter() + .position(|x| x.get_index_pointer_to_neighbor() == neighbors_of) + .unwrap(); + candidates.remove(index); + } + + let (pruned, new_neighbors) = + if candidates.len() > self.neighbor_store.max_neighbors(self.get_meta_page()) { + let new_list = self.prune_neighbors(candidates, storage, stats); + (true, new_list) + } else { + (false, candidates) + }; + + //OPT: remove clone + self.neighbor_store.set_neighbors( + storage, + self.meta_page, + neighbors_of, + new_neighbors.clone(), + stats, + ); + (pruned, new_neighbors) + } - fn get_meta_page(&self, index: &PgRelation) -> &MetaPage; + pub fn get_meta_page(&self) -> &MetaPage { + &self.meta_page + } /// greedy search looks for the closest neighbors to a query vector /// You may think that this needs the "K" parameter but it does not, @@ -453,88 +246,82 @@ pub trait Graph { /// /// Note this is the one-shot implementation that keeps only the closest `search_list_size` results in /// the returned ListSearchResult elements. It shouldn't be used with self.greedy_search_iterate - fn greedy_search( - &mut self, - index: &PgRelation, - query: &[f32], - search_list_size: usize, - ) -> (ListSearchResult, Option>) - where - Self: Graph, - { + fn greedy_search_for_build( + &self, + query: PgVector, + meta_page: &MetaPage, + storage: &S, + stats: &mut GreedySearchStats, + ) -> HashSet { let init_ids = self.get_init_ids(); if let None = init_ids { //no nodes in the graph - return (ListSearchResult::empty(), None); + return HashSet::with_capacity(0); } - let dm = { - self.get_distance_measure( - index, - query, - self.get_vector_provider().calc_distance_with_pq, - ) - }; + let dm = storage.get_query_distance_measure(query); + let search_list_size = meta_page.get_search_list_size_for_build() as usize; + let mut l = ListSearchResult::new( - index, - Some(search_list_size), - self, init_ids.unwrap(), - query, dm, + search_list_size, + meta_page, + self.get_neighbor_store(), + storage, ); - let v = self.greedy_search_iterate(&mut l, index, query, search_list_size); - return (l, v); + let mut visited_nodes = HashSet::with_capacity(search_list_size); + self.greedy_search_iterate(&mut l, search_list_size, Some(&mut visited_nodes), storage); + stats.combine(&l.stats); + return visited_nodes; } /// Returns a ListSearchResult initialized for streaming. The output should be used with greedy_search_iterate to obtain /// the next elements. - fn greedy_search_streaming_init( - &mut self, - index: &PgRelation, - query: &[f32], - ) -> ListSearchResult { + pub fn greedy_search_streaming_init( + &self, + query: PgVector, + search_list_size: usize, + storage: &S, + ) -> ListSearchResult { let init_ids = self.get_init_ids(); if let None = init_ids { //no nodes in the graph return ListSearchResult::empty(); } - let dm = self.get_distance_measure( - index, - query, - self.get_vector_provider().calc_distance_with_pq, - ); - ListSearchResult::new(index, None, self, init_ids.unwrap(), query, dm) + let dm = storage.get_query_distance_measure(query); + + ListSearchResult::new( + init_ids.unwrap(), + dm, + search_list_size, + &self.meta_page, + self.get_neighbor_store(), + storage, + ) } /// Advance the state of the lsr until the closest `visit_n_closest` elements have been visited. - fn greedy_search_iterate( - &mut self, - lsr: &mut ListSearchResult, - index: &PgRelation, - query: &[f32], + pub fn greedy_search_iterate( + &self, + lsr: &mut ListSearchResult, visit_n_closest: usize, - ) -> Option> - where - Self: Graph, - { - //OPT: Only build v when needed. - let mut v: HashSet<_> = HashSet::::with_capacity(visit_n_closest); - let mut neighbors = - Vec::::with_capacity(self.get_meta_page(index).get_num_neighbors() as _); - while let Some((index_pointer, distance)) = lsr.visit_closest(visit_n_closest) { - neighbors.clear(); - let neighbors_existed = self.get_neighbors(index, index_pointer, &mut neighbors); - if !neighbors_existed { - panic!("Nodes in the list search results that aren't in the builder"); - } - - for neighbor_index_pointer in &neighbors { - lsr.insert(index, self, *neighbor_index_pointer, query) + mut visited_nodes: Option<&mut HashSet>, + storage: &S, + ) { + while let Some(list_search_entry_idx) = lsr.visit_closest(visit_n_closest) { + match visited_nodes { + None => {} + Some(ref mut visited_nodes) => { + let list_search_entry = &lsr.visited[list_search_entry_idx]; + visited_nodes.insert(NeighborWithDistance::new( + list_search_entry.index_pointer, + list_search_entry.distance, + )); + } } - v.insert(NeighborWithDistance::new(index_pointer, distance)); + lsr.stats.record_visit(); + storage.visit_lsn(lsr, list_search_entry_idx, &self.neighbor_store); } - - Some(v) } /// Prune neigbors by prefering neighbors closer to the point in question @@ -542,40 +329,17 @@ pub trait Graph { /// /// TODO: this is the ann-disk implementation. There may be better implementations /// if we save the factors or the distances and add incrementally. Not sure. - fn prune_neighbors( + pub fn prune_neighbors( &self, - index: &PgRelation, - index_pointer: ItemPointer, - new_neigbors: Vec, - ) -> (Vec, PruneNeighborStats) { - let mut stats = PruneNeighborStats::new(); + mut candidates: Vec, + storage: &S, + stats: &mut PruneNeighborStats, + ) -> Vec { stats.calls += 1; //TODO make configurable? - let max_alpha = self.get_meta_page(index).get_max_alpha(); - //get a unique candidate pool - let mut candidates = Vec::::with_capacity( - (self.get_meta_page(index).get_num_neighbors() as usize) + new_neigbors.len(), - ); - self.get_neighbors_with_distances(index, index_pointer, &mut candidates); + let max_alpha = self.get_meta_page().get_max_alpha(); - let mut hash: HashSet = candidates - .iter() - .map(|c| c.get_index_pointer_to_neighbor()) - .collect(); - for n in new_neigbors { - if hash.insert(n.get_index_pointer_to_neighbor()) { - candidates.push(n); - } - } - //remove myself - if !hash.insert(index_pointer) { - //prevent self-loops - let index = candidates - .iter() - .position(|x| x.get_index_pointer_to_neighbor() == index_pointer) - .unwrap(); - candidates.remove(index); - } + stats.num_neighbors_before_prune += candidates.len(); //TODO remove deleted nodes //TODO diskann has something called max_occlusion_size/max_candidate_size(default:750). Do we need to implement? @@ -583,20 +347,19 @@ pub trait Graph { //sort by distance candidates.sort(); let mut results = Vec::::with_capacity( - self.get_meta_page(index).get_max_neighbors_during_build(), + self.get_meta_page().get_num_neighbors() as _, ); let mut max_factors: Vec = vec![0.0; candidates.len()]; let mut alpha = 1.0; + let dimension_epsilon = self.get_meta_page().get_num_dimensions() as f32 * f32::EPSILON; //first we add nodes that "pass" a small alpha. Then, if there //is still room we loop again with a larger alpha. - while alpha <= max_alpha - && results.len() < self.get_meta_page(index).get_num_neighbors() as _ - { + while alpha <= max_alpha && results.len() < self.get_meta_page().get_num_neighbors() as _ { for (i, neighbor) in candidates.iter().enumerate() { - if results.len() >= self.get_meta_page(index).get_num_neighbors() as _ { - return (results, stats); + if results.len() >= self.get_meta_page().get_num_neighbors() as _ { + return results; } if max_factors[i] > alpha { continue; @@ -610,11 +373,10 @@ pub trait Graph { //rename for clarity. let existing_neighbor = neighbor; - let vp = self.get_vector_provider(); let dist_state = unsafe { - vp.get_full_vector_distance_state( - index, + storage.get_node_distance_measure( existing_neighbor.get_index_pointer_to_neighbor(), + stats, ) }; @@ -625,179 +387,121 @@ pub trait Graph { continue; } - //todo handle the non-pq case - let distance_between_candidate_and_existing_neighbor = unsafe { - vp.get_distance_pair_for_full_vectors_from_state( - &dist_state, - index, - candidate_neighbor.get_index_pointer_to_neighbor(), - ) + let mut distance_between_candidate_and_existing_neighbor = unsafe { + dist_state + .get_distance(candidate_neighbor.get_index_pointer_to_neighbor(), stats) }; - stats.node_reads += 2; - stats.distance_comparisons += 1; - let distance_between_candidate_and_point = candidate_neighbor.get_distance(); + let mut distance_between_candidate_and_point = + candidate_neighbor.get_distance(); + + //We need both values to be positive. + //Otherwise, the case where distance_between_candidate_and_point > 0 and distance_between_candidate_and_existing_neighbor < 0 is totally wrong. + //If we implement inner product distance we'll have to figure something else out. + if distance_between_candidate_and_point < 0.0 + && distance_between_candidate_and_point >= 0.0 - dimension_epsilon + { + distance_between_candidate_and_point = 0.0; + } + + if distance_between_candidate_and_existing_neighbor < 0.0 + && distance_between_candidate_and_existing_neighbor + >= 0.0 - dimension_epsilon + { + distance_between_candidate_and_existing_neighbor = 0.0; + } + + debug_assert!( + distance_between_candidate_and_point >= 0.0, + "distance_between_candidate_and_point is negative: {}, {}", + distance_between_candidate_and_point, + f32::EPSILON + ); + debug_assert!(distance_between_candidate_and_existing_neighbor >= 0.0); + //factor is high if the candidate is closer to an existing neighbor than the point it's being considered for - let factor = if distance_between_candidate_and_existing_neighbor == 0.0 { - f64::MAX //avoid division by 0 - } else { - distance_between_candidate_and_point as f64 - / distance_between_candidate_and_existing_neighbor as f64 - }; + let factor = + if distance_between_candidate_and_existing_neighbor < 0.0 + f32::EPSILON { + if distance_between_candidate_and_point < 0.0 + f32::EPSILON { + 1.0 + } else { + f64::MAX + } + } else { + distance_between_candidate_and_point as f64 + / distance_between_candidate_and_existing_neighbor as f64 + }; max_factors[j] = max_factors[j].max(factor) } } alpha = alpha * 1.2 } - (results, stats) + stats.num_neighbors_after_prune += results.len(); + results } - fn insert( + pub fn insert( &mut self, index: &PgRelation, index_pointer: IndexPointer, - vec: &[f32], - ) -> InsertStats { - let mut prune_neighbor_stats: PruneNeighborStats = PruneNeighborStats::new(); - let mut greedy_search_stats = GreedySearchStats::new(); - let meta_page = self.get_meta_page(index); - if self.is_empty() { - self.set_neighbors( - index, + vec: PgVector, + storage: &S, + stats: &mut InsertStats, + ) { + if self.meta_page.get_init_ids().is_none() { + //TODO probably better set off of centeroids + MetaPage::update_init_ids(index, vec![index_pointer], stats); + *self.meta_page = MetaPage::fetch(index); + + self.neighbor_store.set_neighbors( + storage, + self.meta_page, index_pointer, Vec::::with_capacity( - meta_page.get_max_neighbors_during_build() as _, + self.neighbor_store.max_neighbors(self.meta_page) as _, ), + stats, ); - return InsertStats { - prune_neighbor_stats: prune_neighbor_stats, - greedy_search_stats: greedy_search_stats, - }; } + let meta_page = self.get_meta_page(); + //TODO: make configurable? - let (l, v) = - self.greedy_search(index, vec, meta_page.get_search_list_size_for_build() as _); - greedy_search_stats.combine(l.stats); - let (neighbor_list, forward_stats) = - self.prune_neighbors(index, index_pointer, v.unwrap().into_iter().collect()); - prune_neighbor_stats.combine(forward_stats); + let v = + self.greedy_search_for_build(vec, meta_page, storage, &mut stats.greedy_search_stats); - //set forward pointers - self.set_neighbors(index, index_pointer, neighbor_list.clone()); + let (_, neighbor_list) = self.add_neighbors( + storage, + index_pointer, + v.into_iter().collect(), + &mut stats.prune_neighbor_stats, + ); //update back pointers let mut cnt = 0; for neighbor in neighbor_list { - let (needed_prune, backpointer_stats) = self.update_back_pointer( - index, + let needed_prune = self.update_back_pointer( neighbor.get_index_pointer_to_neighbor(), index_pointer, neighbor.get_distance(), + storage, + &mut stats.prune_neighbor_stats, ); if needed_prune { cnt = cnt + 1; } - prune_neighbor_stats.combine(backpointer_stats); } - //info!("pruned {} neighbors", cnt); - return InsertStats { - prune_neighbor_stats, - greedy_search_stats, - }; } - fn update_back_pointer( + fn update_back_pointer( &mut self, - index: &PgRelation, from: IndexPointer, to: IndexPointer, distance: f32, - ) -> (bool, PruneNeighborStats) { - let mut current_links = Vec::::new(); - self.get_neighbors_with_distances(index, from, &mut current_links); - - if current_links.len() < current_links.capacity() as _ { - current_links.push(NeighborWithDistance::new(to, distance)); - self.set_neighbors(index, from, current_links); - (false, PruneNeighborStats::new()) - } else { - //info!("sizes {} {} {}", current_links.len() + 1, current_links.capacity(), self.meta_page.get_max_neighbors_during_build()); - //Note prune_neighbors will reduce to current_links.len() to num_neighbors while capacity is num_neighbors * 1.3 - //thus we are avoiding prunning every time - let (new_list, stats) = - self.prune_neighbors(index, from, vec![NeighborWithDistance::new(to, distance)]); - self.set_neighbors(index, from, new_list); - (true, stats) - } - } -} - -#[derive(Debug)] -pub struct PruneNeighborStats { - pub calls: usize, - pub distance_comparisons: usize, - pub node_reads: usize, -} - -impl PruneNeighborStats { - pub fn new() -> Self { - PruneNeighborStats { - calls: 0, - distance_comparisons: 0, - node_reads: 0, - } - } - - pub fn combine(&mut self, other: Self) { - self.calls += other.calls; - self.distance_comparisons += other.distance_comparisons; - self.node_reads += other.node_reads; - } -} - -#[derive(Debug)] -pub struct GreedySearchStats { - pub calls: usize, - pub distance_comparisons: usize, - pub node_reads: usize, - pub pq_distance_comparisons: usize, -} - -impl GreedySearchStats { - pub fn new() -> Self { - GreedySearchStats { - calls: 0, - distance_comparisons: 0, - node_reads: 0, - pq_distance_comparisons: 0, - } - } - - pub fn combine(&mut self, other: Self) { - self.calls += other.calls; - self.distance_comparisons += other.distance_comparisons; - self.node_reads += other.node_reads; - self.pq_distance_comparisons += other.pq_distance_comparisons; - } -} - -#[derive(Debug)] -pub struct InsertStats { - pub prune_neighbor_stats: PruneNeighborStats, - pub greedy_search_stats: GreedySearchStats, -} - -impl InsertStats { - pub fn new() -> Self { - return InsertStats { - prune_neighbor_stats: PruneNeighborStats::new(), - greedy_search_stats: GreedySearchStats::new(), - }; - } - - pub fn combine(&mut self, other: InsertStats) { - self.prune_neighbor_stats - .combine(other.prune_neighbor_stats); - self.greedy_search_stats.combine(other.greedy_search_stats); + storage: &S, + prune_stats: &mut PruneNeighborStats, + ) -> bool { + let new = vec![NeighborWithDistance::new(to, distance)]; + let (pruned, _) = self.add_neighbors(storage, from, new, prune_stats); + pruned } } diff --git a/timescale_vector/src/access_method/graph_neighbor_store.rs b/timescale_vector/src/access_method/graph_neighbor_store.rs new file mode 100644 index 00000000..50933a71 --- /dev/null +++ b/timescale_vector/src/access_method/graph_neighbor_store.rs @@ -0,0 +1,124 @@ +use std::collections::BTreeMap; + +use crate::util::{IndexPointer, ItemPointer}; + +use super::stats::{StatsDistanceComparison, StatsNodeModify, StatsNodeRead}; + +use super::meta_page::MetaPage; +use super::neighbor_with_distance::*; +use super::storage::Storage; + +/// A builderGraph is a graph that keep the neighbors in-memory in the neighbor_map below +/// The idea is that during the index build, you don't want to update the actual Postgres +/// pages every time you change the neighbors. Instead you change the neighbors in memory +/// until the build is done. Afterwards, calling the `write` method, will write out all +/// the neighbors to the right pages. +pub struct BuilderNeighborCache { + //maps node's pointer to the representation on disk + //use a btree to provide ordering on the item pointers in iter(). + //this ensures the write in finalize_node_at_end_of_build() is ordered, not random. + neighbor_map: BTreeMap>, +} + +impl BuilderNeighborCache { + pub fn new() -> Self { + Self { + neighbor_map: BTreeMap::new(), + } + } + pub fn iter(&self) -> impl Iterator)> { + self.neighbor_map.iter() + } + + pub fn get_neighbors(&self, neighbors_of: ItemPointer) -> Vec { + let neighbors = self.neighbor_map.get(&neighbors_of); + match neighbors { + Some(n) => n + .iter() + .map(|n| n.get_index_pointer_to_neighbor()) + .collect(), + None => vec![], + } + } + + pub fn get_neighbors_with_full_vector_distances( + &self, + neighbors_of: ItemPointer, + result: &mut Vec, + ) { + let neighbors = self.neighbor_map.get(&neighbors_of); + match neighbors { + Some(n) => { + for nwd in n { + result.push(nwd.clone()); + } + } + None => (), + } + } + + pub fn set_neighbors( + &mut self, + neighbors_of: ItemPointer, + new_neighbors: Vec, + ) { + self.neighbor_map.insert(neighbors_of, new_neighbors); + } + + pub fn max_neighbors(&self, meta_page: &MetaPage) -> usize { + meta_page.get_max_neighbors_during_build() + } +} + +pub enum GraphNeighborStore { + Builder(BuilderNeighborCache), + Disk, +} + +impl GraphNeighborStore { + pub fn get_neighbors_with_full_vector_distances< + S: Storage, + T: StatsNodeRead + StatsDistanceComparison, + >( + &self, + neighbors_of: ItemPointer, + storage: &S, + result: &mut Vec, + stats: &mut T, + ) { + match self { + GraphNeighborStore::Builder(b) => { + b.get_neighbors_with_full_vector_distances(neighbors_of, result) + } + GraphNeighborStore::Disk => { + storage.get_neighbors_with_distances_from_disk(neighbors_of, result, stats) + } + }; + } + + pub fn set_neighbors( + &mut self, + storage: &S, + meta_page: &MetaPage, + neighbors_of: ItemPointer, + new_neighbors: Vec, + stats: &mut T, + ) { + match self { + GraphNeighborStore::Builder(b) => b.set_neighbors(neighbors_of, new_neighbors), + GraphNeighborStore::Disk => storage.set_neighbors_on_disk( + meta_page, + neighbors_of, + new_neighbors.as_slice(), + stats, + ), + } + } + + pub fn max_neighbors(&self, meta_page: &MetaPage) -> usize { + match self { + GraphNeighborStore::Builder(b) => b.max_neighbors(meta_page), + GraphNeighborStore::Disk => meta_page.get_num_neighbors() as _, + } + } +} diff --git a/timescale_vector/src/access_method/guc.rs b/timescale_vector/src/access_method/guc.rs index 5e9d8a54..3275d221 100644 --- a/timescale_vector/src/access_method/guc.rs +++ b/timescale_vector/src/access_method/guc.rs @@ -1,6 +1,7 @@ use pgrx::*; pub static TSV_QUERY_SEARCH_LIST_SIZE: GucSetting = GucSetting::::new(100); +pub static TSV_RESORT_SIZE: GucSetting = GucSetting::::new(50); pub fn init() { GucRegistry::define_int_guc( @@ -13,4 +14,15 @@ pub fn init() { GucContext::Userset, GucFlags::default(), ); + + GucRegistry::define_int_guc( + "tsv.query_rescore", + "The number of elements rescored (0 to disable rescoring)", + "Rescoring takes the query_rescore number of elements that have the smallest approximate distance, rescores them with the exact distance, returning the closest ones with the exact distance.", + &TSV_RESORT_SIZE, + 1, + 1000, + GucContext::Userset, + GucFlags::default(), + ); } diff --git a/timescale_vector/src/access_method/meta_page.rs b/timescale_vector/src/access_method/meta_page.rs index 2d68f8dc..1f03ef72 100644 --- a/timescale_vector/src/access_method/meta_page.rs +++ b/timescale_vector/src/access_method/meta_page.rs @@ -1,17 +1,33 @@ -use pgrx::pg_sys::{BufferGetBlockNumber, Pointer}; +use pgrx::pg_sys::{BufferGetBlockNumber, InvalidBlockNumber, InvalidOffsetNumber}; use pgrx::*; +use rkyv::{Archive, Deserialize, Serialize}; +use semver::Version; +use timescale_vector_derive::{Readable, Writeable}; use crate::access_method::options::TSVIndexOptions; use crate::util::page; use crate::util::*; +use super::distance; +use super::options::{ + NUM_DIMENSIONS_DEFAULT_SENTINEL, NUM_NEIGHBORS_DEFAULT_SENTINEL, + SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL, +}; +use super::sbq::SbqNode; +use super::stats::StatsNodeModify; +use super::storage::StorageType; + const TSV_MAGIC_NUMBER: u32 = 768756476; //Magic number, random -const TSV_VERSION: u32 = 1; +const TSV_VERSION: u32 = 2; const GRAPH_SLACK_FACTOR: f64 = 1.3_f64; -/// This is metadata about the entire index. -/// Stored as the first page in the index relation. + +const META_BLOCK_NUMBER: pg_sys::BlockNumber = 0; +const META_HEADER_OFFSET: pgrx::pg_sys::OffsetNumber = 1; +const META_OFFSET: pgrx::pg_sys::OffsetNumber = 2; +/// This is old metadata version for extension versions <=0.0.2. +/// Note it is NOT repr(C) #[derive(Clone)] -pub struct MetaPage { +pub struct MetaPageV1 { /// random magic number for identifying the index magic_number: u32, /// version number for future-proofing @@ -25,9 +41,97 @@ pub struct MetaPage { init_ids_block_number: pg_sys::BlockNumber, init_ids_offset: pg_sys::OffsetNumber, use_pq: bool, - pq_vector_length: usize, - pq_block_number: pg_sys::BlockNumber, - pq_block_offset: pg_sys::OffsetNumber, + _pq_vector_length: usize, + _pq_block_number: pg_sys::BlockNumber, + _pq_block_offset: pg_sys::OffsetNumber, +} + +impl MetaPageV1 { + /// Returns the MetaPage from a page. + /// Should only be called from the very first page in a relation. + unsafe fn page_get_meta(page: pg_sys::Page, buffer: pg_sys::Buffer) -> *mut MetaPageV1 { + assert_eq!(BufferGetBlockNumber(buffer), 0); + let meta_page = ports::PageGetContents(page) as *mut MetaPageV1; + assert_eq!((*meta_page).magic_number, TSV_MAGIC_NUMBER); + assert_eq!((*meta_page).version, 1); + meta_page + } + + pub fn get_new_meta(&self) -> MetaPage { + if self.use_pq { + pgrx::error!("PQ is no longer supported. Please rebuild the TSV index."); + } + + MetaPage { + magic_number: TSV_MAGIC_NUMBER, + version: TSV_VERSION, + extension_version_when_built: "0.0.2".to_string(), + distance_type: DistanceType::L2 as u16, + num_dimensions: self.num_dimensions, + num_dimensions_to_index: self.num_dimensions, + bq_num_bits_per_dimension: 1, + num_neighbors: self.num_neighbors, + storage_type: StorageType::Plain as u8, + search_list_size: self.search_list_size, + max_alpha: self.max_alpha, + init_ids: ItemPointer::new(self.init_ids_block_number, self.init_ids_offset), + quantizer_metadata: ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber), + } + } +} + +/// This is metadata header. It contains just the magic number and version number. +/// Stored as the first page (offset 1) in the index relation. +/// The header is separate from the actual metadata to allow for future-proofing. +/// In particular, if the metadata format changes, we can still read the header to check the version. +#[derive(Clone, PartialEq, Archive, Deserialize, Serialize, Readable, Writeable)] +#[archive(check_bytes)] +pub struct MetaPageHeader { + /// random magic number for identifying the index + magic_number: u32, + /// version number for future-proofing + version: u32, +} + +enum DistanceType { + Cosine = 0, + L2 = 1, +} + +impl DistanceType { + fn from_u16(value: u16) -> Self { + match value { + 0 => DistanceType::Cosine, + 1 => DistanceType::L2, + _ => panic!("Unknown DistanceType number {}", value), + } + } +} + +/// This is metadata about the entire index. +/// Stored as the first page (offset 2) in the index relation. +#[derive(Clone, PartialEq, Archive, Deserialize, Serialize, Readable, Writeable)] +#[archive(check_bytes)] +pub struct MetaPage { + /// repeat the magic number and version from MetaPageHeader for sanity checks + magic_number: u32, + version: u32, + extension_version_when_built: String, + /// The value of the DistanceType enum + distance_type: u16, + /// number of total_dimensions in the vector + num_dimensions: u32, + //number of dimensions in the vectors stored in the index + num_dimensions_to_index: u32, + bq_num_bits_per_dimension: u8, + /// the value of the TSVStorageLayout enum + storage_type: u8, + /// max number of outgoing edges a node in the graph can have (R in the papers) + num_neighbors: u32, + search_list_size: u32, + max_alpha: f64, + init_ids: ItemPointer, + quantizer_metadata: ItemPointer, } impl MetaPage { @@ -37,16 +141,30 @@ impl MetaPage { self.num_dimensions } + pub fn get_num_dimensions_to_index(&self) -> u32 { + self.num_dimensions_to_index + } + + pub fn get_bq_num_bits_per_dimension(&self) -> u8 { + self.bq_num_bits_per_dimension + } + + pub fn get_num_dimensions_for_neighbors(&self) -> u32 { + match StorageType::from_u8(self.storage_type) { + StorageType::Plain => { + error!("get_num_dimensions_for_neighbors should not be called for Plain storage") + } + StorageType::SbqSpeedup => self.num_dimensions_to_index, + StorageType::SbqCompression => 0, + } + } + /// Maximum number of neigbors per node. Given that we pre-allocate /// these many slots for each node, this cannot change after the graph is built. pub fn get_num_neighbors(&self) -> u32 { self.num_neighbors } - pub fn get_pq_vector_length(&self) -> usize { - self.pq_vector_length - } - pub fn get_search_list_size_for_build(&self) -> u32 { self.search_list_size } @@ -55,8 +173,15 @@ impl MetaPage { self.max_alpha } - pub fn get_use_pq(&self) -> bool { - self.use_pq + pub fn get_distance_function(&self) -> fn(&[f32], &[f32]) -> f32 { + match DistanceType::from_u16(self.distance_type) { + DistanceType::Cosine => distance::distance_cosine, + DistanceType::L2 => distance::distance_l2, + } + } + + pub fn get_storage_type(&self) -> StorageType { + StorageType::from_u8(self.storage_type) } pub fn get_max_neighbors_during_build(&self) -> usize { @@ -64,36 +189,43 @@ impl MetaPage { } pub fn get_init_ids(&self) -> Option> { - if self.init_ids_block_number == 0 && self.init_ids_offset == 0 { + if !self.init_ids.is_valid() { return None; } - let ptr = HeapPointer::new(self.init_ids_block_number, self.init_ids_offset); - Some(vec![ptr]) + Some(vec![self.init_ids]) } - pub fn get_pq_pointer(&self) -> Option { - if !self.use_pq || (self.pq_block_number == 0 && self.pq_block_offset == 0) { + pub fn get_quantizer_metadata_pointer(&self) -> Option { + if !self.quantizer_metadata.is_valid() { return None; } - let ptr = IndexPointer::new(self.pq_block_number, self.pq_block_offset); - Some(ptr) + match self.get_storage_type() { + StorageType::Plain => None, + StorageType::SbqSpeedup | StorageType::SbqCompression => Some(self.quantizer_metadata), + } } - /// Returns the MetaPage from a page. - /// Should only be called from the very first page in a relation. - unsafe fn page_get_meta( - page: pg_sys::Page, - buffer: pg_sys::Buffer, - new: bool, - ) -> *mut MetaPage { - assert_eq!(BufferGetBlockNumber(buffer), 0); - let meta_page = ports::PageGetContents(page) as *mut MetaPage; - if !new { - assert_eq!((*meta_page).magic_number, TSV_MAGIC_NUMBER); + fn calculate_num_neighbors( + num_dimensions: u32, + num_bits_per_dimension: u8, + opt: &PgBox, + ) -> u32 { + let num_neighbors = (*opt).get_num_neighbors(); + if num_neighbors == NUM_NEIGHBORS_DEFAULT_SENTINEL { + match (*opt).get_storage_type() { + StorageType::Plain => 50, + StorageType::SbqSpeedup => SbqNode::get_default_num_neighbors( + num_dimensions as usize, + num_dimensions as usize, + num_bits_per_dimension, + ) as u32, + StorageType::SbqCompression => 50, + } + } else { + num_neighbors as u32 } - meta_page } /// Write out a new meta page. @@ -103,61 +235,171 @@ impl MetaPage { num_dimensions: u32, opt: PgBox, ) -> MetaPage { + let version = Version::parse(env!("CARGO_PKG_VERSION")).unwrap(); + + let num_dimensions_to_index = if (*opt).num_dimensions == NUM_DIMENSIONS_DEFAULT_SENTINEL { + num_dimensions + } else { + (*opt).num_dimensions + }; + + let bq_num_bits_per_dimension = + if (*opt).bq_num_bits_per_dimension == SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL { + if (*opt).get_storage_type() == StorageType::SbqCompression + && num_dimensions_to_index < 900 + { + 2 + } else { + 1 + } + } else { + (*opt).bq_num_bits_per_dimension as u8 + }; + + if bq_num_bits_per_dimension > 1 && num_dimensions_to_index > 930 { + //limited by SbqMeans fitting on a page + pgrx::error!("SBQ with more than 1 bit per dimension is not supported for more than 900 dimensions"); + } + if bq_num_bits_per_dimension > 1 && (*opt).get_storage_type() != StorageType::SbqCompression + { + pgrx::error!( + "SBQ with more than 1 bit per dimension is only supported with the memory_optimized storage layout" + ); + } + + let meta = MetaPage { + magic_number: TSV_MAGIC_NUMBER, + version: TSV_VERSION, + extension_version_when_built: version.to_string(), + distance_type: DistanceType::Cosine as u16, + num_dimensions, + num_dimensions_to_index, + storage_type: (*opt).get_storage_type() as u8, + num_neighbors: Self::calculate_num_neighbors( + num_dimensions, + bq_num_bits_per_dimension, + &opt, + ), + bq_num_bits_per_dimension, + search_list_size: (*opt).search_list_size, + max_alpha: (*opt).max_alpha, + init_ids: ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber), + quantizer_metadata: ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber), + }; let page = page::WritablePage::new(index, crate::util::page::PageType::Meta); - let meta = Self::page_get_meta(*page, *(*(page.get_buffer())), true); - (*meta).magic_number = TSV_MAGIC_NUMBER; - (*meta).version = TSV_VERSION; - (*meta).num_dimensions = num_dimensions; - (*meta).num_neighbors = (*opt).num_neighbors; - (*meta).search_list_size = (*opt).search_list_size; - (*meta).max_alpha = (*opt).max_alpha; - (*meta).use_pq = (*opt).use_pq; - (*meta).pq_vector_length = (*opt).pq_vector_length; - (*meta).pq_block_number = 0; - (*meta).pq_block_offset = 0; - (*meta).init_ids_block_number = 0; - (*meta).init_ids_offset = 0; - let header = page.cast::(); - - let meta_end = (meta as Pointer).add(std::mem::size_of::()); - let page_start = (*page) as Pointer; - (*header).pd_lower = meta_end.offset_from(page_start) as _; - - let mp = (*meta).clone(); + meta.write_to_page(page); + meta + } + + unsafe fn write_to_page(&self, mut page: page::WritablePage) { + let header = MetaPageHeader { + magic_number: self.magic_number, + version: self.version, + }; + + assert!(header.magic_number == TSV_MAGIC_NUMBER); + assert!(header.version == TSV_VERSION); + + //serialize the header + let bytes = header.serialize_to_vec(); + let off = page.add_item(&bytes); + assert!(off == META_HEADER_OFFSET); + + //serialize the meta + let bytes = self.serialize_to_vec(); + let off = page.add_item(&bytes); + assert!(off == META_OFFSET); + page.commit(); - mp + } + + unsafe fn overwrite(index: &PgRelation, new_meta: &MetaPage) { + let mut page = page::WritablePage::modify(index, META_BLOCK_NUMBER); + page.reinit(crate::util::page::PageType::Meta); + new_meta.write_to_page(page); + + let page = page::ReadablePage::read(index, META_BLOCK_NUMBER); + let page_type = page.get_type(); + if page_type != crate::util::page::PageType::Meta { + pgrx::error!( + "Problem upgrading meta page: wrong page type: {:?}", + page_type + ); + } + let meta = Self::get_meta_from_page(page); + if meta != *new_meta { + pgrx::error!("Problem upgrading meta page: meta mismatch"); + } } /// Read the meta page for an index - pub fn read(index: &PgRelation) -> MetaPage { + pub fn fetch(index: &PgRelation) -> MetaPage { unsafe { - let page = page::ReadablePage::read(index, 0); - let meta = Self::page_get_meta(*page, *(*(page.get_buffer())), false); - (*meta).clone() + let page = page::ReadablePage::read(index, META_BLOCK_NUMBER); + let page_type = page.get_type(); + if page_type == crate::util::page::PageType::MetaV1 { + let old_meta = MetaPageV1::page_get_meta(*page, *(*(page.get_buffer()))); + let new_meta = (*old_meta).get_new_meta(); + + //release the page + std::mem::drop(page); + + Self::overwrite(index, &new_meta); + return new_meta; + } + Self::get_meta_from_page(page) } } + unsafe fn get_meta_from_page(page: page::ReadablePage) -> MetaPage { + //check the header. In the future, we can use this to check the version + let rb = page.get_item_unchecked(META_HEADER_OFFSET); + let meta = ReadableMetaPageHeader::with_readable_buffer(rb); + let archived = meta.get_archived_node(); + assert!(archived.magic_number == TSV_MAGIC_NUMBER); + assert!(archived.version == TSV_VERSION); + + let page = meta.get_owned_page(); + + //retrieve the MetaPage itself and deserialize it + let rb = page.get_item_unchecked(META_OFFSET); + let meta = ReadableMetaPage::with_readable_buffer(rb); + let archived = meta.get_archived_node(); + assert!(archived.magic_number == TSV_MAGIC_NUMBER); + assert!(archived.version == TSV_VERSION); + + archived.deserialize(&mut rkyv::Infallible).unwrap() + } + /// Change the init ids for an index. - pub fn update_init_ids(index: &PgRelation, init_ids: Vec) { + pub fn update_init_ids( + index: &PgRelation, + init_ids: Vec, + stats: &mut S, + ) { assert_eq!(init_ids.len(), 1); //change this if we support multiple let id = init_ids[0]; + let mut meta = Self::fetch(index); + meta.init_ids = id; + unsafe { - let page = page::WritablePage::modify(index, 0); - let meta = Self::page_get_meta(*page, *(*(page.get_buffer())), false); - (*meta).init_ids_block_number = id.block_number; - (*meta).init_ids_offset = id.offset; - page.commit() - } + Self::overwrite(index, &meta); + stats.record_modify(); + }; } - pub fn update_pq_pointer(index: &PgRelation, pq_pointer: IndexPointer) { + pub fn update_quantizer_metadata_pointer( + index: &PgRelation, + quantizer_pointer: IndexPointer, + stats: &mut S, + ) { + let mut meta = Self::fetch(index); + meta.quantizer_metadata = quantizer_pointer; + unsafe { - let page = page::WritablePage::modify(index, 0); - let meta = Self::page_get_meta(*page, *(*(page.get_buffer())), false); - (*meta).pq_block_number = pq_pointer.block_number; - (*meta).pq_block_offset = pq_pointer.offset; - page.commit() - } + Self::overwrite(index, &meta); + stats.record_modify(); + }; } } diff --git a/timescale_vector/src/access_method/mod.rs b/timescale_vector/src/access_method/mod.rs index 77d8e607..8c23b77c 100644 --- a/timescale_vector/src/access_method/mod.rs +++ b/timescale_vector/src/access_method/mod.rs @@ -1,26 +1,47 @@ use pgrx::*; mod build; -mod builder_graph; mod cost_estimate; mod debugging; -mod disk_index_graph; mod graph; +mod graph_neighbor_store; pub mod guc; mod meta_page; -mod model; +mod neighbor_with_distance; pub mod options; +pub mod pg_vector; +mod plain_node; +mod plain_storage; mod scan; +pub mod stats; +mod storage; +mod storage_common; +mod upgrade_test; mod vacuum; extern crate blas_src; +pub mod distance; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] mod distance_x86; -mod pq; +mod sbq; #[pg_extern(sql = " - CREATE OR REPLACE FUNCTION tsv_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS 'MODULE_PATHNAME', '@FUNCTION_NAME@'; - CREATE ACCESS METHOD tsv TYPE INDEX HANDLER tsv_amhandler; + CREATE OR REPLACE FUNCTION tsv_amhandler(internal) RETURNS index_am_handler PARALLEL SAFE IMMUTABLE STRICT COST 0.0001 LANGUAGE c AS '@MODULE_PATHNAME@', '@FUNCTION_NAME@'; + + DO $$ + DECLARE + c int; + BEGIN + SELECT count(*) + INTO c + FROM pg_catalog.pg_am a + WHERE a.amname = 'tsv'; + + IF c = 0 THEN + CREATE ACCESS METHOD tsv TYPE INDEX HANDLER tsv_amhandler; + END IF; + END; + $$; ")] fn amhandler(_fcinfo: pg_sys::FunctionCallInfo) -> PgBox { let mut amroutine = @@ -61,15 +82,32 @@ fn amhandler(_fcinfo: pg_sys::FunctionCallInfo) -> PgBox amroutine.amgetbitmap = None; amroutine.amendscan = Some(scan::amendscan); + amroutine.ambuildphasename = Some(build::ambuildphasename); + amroutine.into_pg_boxed() } +// This SQL is made idempotent so that we can use the same script for the installation and the upgrade. extension_sql!( r#" -CREATE OPERATOR CLASS vector_cosine_ops DEFAULT -FOR TYPE vector USING tsv AS - OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops -; +DO $$ +DECLARE + c int; +BEGIN + SELECT count(*) + INTO c + FROM pg_catalog.pg_opclass c + WHERE c.opcname = 'vector_cosine_ops' + AND c.opcmethod = (SELECT oid FROM pg_catalog.pg_am am WHERE am.amname = 'tsv'); + + IF c = 0 THEN + CREATE OPERATOR CLASS vector_cosine_ops DEFAULT + FOR TYPE vector USING tsv AS + OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops; + END IF; +END; +$$; + "#, name = "tsv_ops_operator" ); diff --git a/timescale_vector/src/access_method/model.rs b/timescale_vector/src/access_method/model.rs deleted file mode 100644 index 4de0b607..00000000 --- a/timescale_vector/src/access_method/model.rs +++ /dev/null @@ -1,420 +0,0 @@ -use std::cmp::Ordering; -use std::mem::size_of; -use std::pin::Pin; - -use ndarray::Array3; -use pgrx::pg_sys::{InvalidBlockNumber, InvalidOffsetNumber, BLCKSZ}; -use pgrx::*; -use reductive::pq::Pq; -use rkyv::vec::ArchivedVec; -use rkyv::{Archive, Archived, Deserialize, Serialize}; - -use crate::util::page::PageType; -use crate::util::tape::Tape; -use crate::util::{ - ArchivedItemPointer, HeapPointer, IndexPointer, ItemPointer, ReadableBuffer, WritableBuffer, -}; - -use super::meta_page::MetaPage; - -//Ported from pg_vector code -#[repr(C)] -#[derive(Debug)] -pub struct PgVector { - vl_len_: i32, /* varlena header (do not touch directly!) */ - pub dim: i16, /* number of dimensions */ - unused: i16, - pub x: pg_sys::__IncompleteArrayField, -} - -impl PgVector { - pub unsafe fn from_pg_parts( - datum_parts: *mut pg_sys::Datum, - isnull_parts: *mut bool, - index: usize, - ) -> Option<*mut PgVector> { - let isnulls = std::slice::from_raw_parts(isnull_parts, index + 1); - if isnulls[index] { - return None; - } - let datums = std::slice::from_raw_parts(datum_parts, index + 1); - Some(Self::from_datum(datums[index])) - } - - pub unsafe fn from_datum(datum: pg_sys::Datum) -> *mut PgVector { - let detoasted = pg_sys::pg_detoast_datum(datum.cast_mut_ptr()); - let casted = detoasted.cast::(); - casted - } - - pub fn to_slice(&self) -> &[f32] { - let dim = (*self).dim; - unsafe { (*self).x.as_slice(dim as _) } - // unsafe { std::slice::from_raw_parts((*self).x, (*self).dim as _) } - } -} - -#[derive(Archive, Deserialize, Serialize)] -#[archive(check_bytes)] -pub struct Node { - pub vector: Vec, - pub pq_vector: Vec, - neighbor_index_pointers: Vec, - pub heap_item_pointer: HeapPointer, -} - -//ReadableNode ties an archive node to it's underlying buffer -pub struct ReadableNode<'a> { - _rb: ReadableBuffer<'a>, -} - -impl<'a> ReadableNode<'a> { - pub fn get_archived_node(&self) -> &ArchivedNode { - // checking the code here is expensive during build, so skip it. - // TODO: should we check the data during queries? - //rkyv::check_archived_root::(self._rb.get_data_slice()).unwrap() - unsafe { rkyv::archived_root::(self._rb.get_data_slice()) } - } -} - -//WritableNode ties an archive node to it's underlying buffer that can be modified -pub struct WritableNode<'a> { - wb: WritableBuffer<'a>, -} - -impl<'a> WritableNode<'a> { - pub fn get_archived_node(&self) -> Pin<&mut ArchivedNode> { - ArchivedNode::with_data(self.wb.get_data_slice()) - } - - pub fn commit(self) { - self.wb.commit() - } -} - -impl Node { - pub fn new(vector: Vec, heap_item_pointer: ItemPointer, meta_page: &MetaPage) -> Self { - let num_neighbors = meta_page.get_num_neighbors(); - let (vector, pq_vector) = if meta_page.get_use_pq() { - let pq_vec_len = meta_page.get_pq_vector_length(); - ( - Vec::with_capacity(0), - (0..pq_vec_len).map(|_| 0u8).collect(), - ) - } else { - (vector, Vec::with_capacity(0)) - }; - Self { - vector, - // always use vectors of num_clusters on length because we never want the serialized size of a Node to change - pq_vector, - // always use vectors of num_neighbors on length because we never want the serialized size of a Node to change - neighbor_index_pointers: (0..num_neighbors) - .map(|_| ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber)) - .collect(), - heap_item_pointer, - } - } - - pub unsafe fn read<'a>(index: &'a PgRelation, index_pointer: ItemPointer) -> ReadableNode<'a> { - let rb = index_pointer.read_bytes(index); - ReadableNode { _rb: rb } - } - - pub unsafe fn modify(index: &PgRelation, index_pointer: ItemPointer) -> WritableNode { - let wb = index_pointer.modify_bytes(index); - WritableNode { wb: wb } - } - - pub unsafe fn update_neighbors_and_pq( - index: &PgRelation, - index_pointer: ItemPointer, - neighbors: &Vec, - meta_page: &MetaPage, - vector: Option>, - ) { - let node = Node::modify(index, index_pointer); - let mut archived = node.get_archived_node(); - for (i, new_neighbor) in neighbors.iter().enumerate() { - //TODO: why do we need to recreate the archive? - let mut a_index_pointer = archived.as_mut().neighbor_index_pointer().index_pin(i); - //TODO hate that we have to set each field like this - a_index_pointer.block_number = - new_neighbor.get_index_pointer_to_neighbor().block_number; - a_index_pointer.offset = new_neighbor.get_index_pointer_to_neighbor().offset; - } - //set the marker that the list ended - if neighbors.len() < meta_page.get_num_neighbors() as _ { - //TODO: why do we need to recreate the archive? - let archived = node.get_archived_node(); - let mut past_last_index_pointers = - archived.neighbor_index_pointer().index_pin(neighbors.len()); - past_last_index_pointers.block_number = InvalidBlockNumber; - past_last_index_pointers.offset = InvalidOffsetNumber; - } - - match vector { - Some(v) => { - assert!(v.len() == archived.pq_vector.len()); - for i in 0..=v.len() - 1 { - let mut pgv = archived.as_mut().pq_vectors().index_pin(i); - *pgv = v[i]; - } - } - None => {} - } - - node.commit() - } - pub fn write(&self, tape: &mut Tape) -> ItemPointer { - let bytes = rkyv::to_bytes::<_, 256>(self).unwrap(); - unsafe { tape.write(&bytes) } - } -} - -/// contains helpers for mutate-in-place. See struct_mutable_refs in test_alloc.rs in rkyv -impl ArchivedNode { - pub fn with_data(data: &mut [u8]) -> Pin<&mut ArchivedNode> { - let pinned_bytes = Pin::new(data); - unsafe { rkyv::archived_root_mut::(pinned_bytes) } - } - - pub fn is_deleted(&self) -> bool { - self.heap_item_pointer.offset == InvalidOffsetNumber - } - - pub fn delete(self: Pin<&mut Self>) { - //TODO: actually optimize the deletes by removing index tuples. For now just mark it. - let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; - heap_pointer.offset = InvalidOffsetNumber; - heap_pointer.block_number = InvalidBlockNumber; - } - - pub fn neighbor_index_pointer( - self: Pin<&mut Self>, - ) -> Pin<&mut ArchivedVec> { - unsafe { self.map_unchecked_mut(|s| &mut s.neighbor_index_pointers) } - } - - pub fn pq_vectors(self: Pin<&mut Self>) -> Pin<&mut Archived>> { - unsafe { self.map_unchecked_mut(|s| &mut s.pq_vector) } - } - - pub fn num_neighbors(&self) -> usize { - self.neighbor_index_pointers - .iter() - .position(|f| f.block_number == InvalidBlockNumber) - .unwrap_or(self.neighbor_index_pointers.len()) - } - - pub fn apply_to_neighbors(&self, mut f: F) - where - F: FnMut(&ArchivedItemPointer), - { - for i in 0..self.num_neighbors() { - let neighbor = &self.neighbor_index_pointers[i]; - f(neighbor); - } - } -} - -//TODO is this right? -pub type Distance = f32; -#[derive(Clone)] -pub struct NeighborWithDistance { - index_pointer: IndexPointer, - distance: Distance, -} - -impl NeighborWithDistance { - pub fn new(neighbor_index_pointer: ItemPointer, distance: Distance) -> Self { - Self { - index_pointer: neighbor_index_pointer, - distance, - } - } - - pub fn get_index_pointer_to_neighbor(&self) -> ItemPointer { - return self.index_pointer; - } - pub fn get_distance(&self) -> Distance { - return self.distance; - } -} - -impl PartialOrd for NeighborWithDistance { - fn partial_cmp(&self, other: &Self) -> Option { - self.distance.partial_cmp(&other.distance) - } -} - -impl Ord for NeighborWithDistance { - fn cmp(&self, other: &Self) -> Ordering { - self.distance.total_cmp(&other.distance) - } -} - -impl PartialEq for NeighborWithDistance { - fn eq(&self, other: &Self) -> bool { - self.index_pointer == other.index_pointer - } -} - -//promise that PartialEq is reflexive -impl Eq for NeighborWithDistance {} - -impl std::hash::Hash for NeighborWithDistance { - fn hash(&self, state: &mut H) { - self.index_pointer.hash(state); - } -} - -#[derive(Archive, Deserialize, Serialize)] -#[archive(check_bytes)] -#[repr(C)] -pub struct PqQuantizerDef { - dim_0: usize, - dim_1: usize, - dim_2: usize, - vec_len: usize, - next_vector_pointer: ItemPointer, -} - -impl PqQuantizerDef { - pub fn new(dim_0: usize, dim_1: usize, dim_2: usize, vec_len: usize) -> PqQuantizerDef { - { - Self { - dim_0, - dim_1, - dim_2, - vec_len, - next_vector_pointer: ItemPointer { - block_number: 0, - offset: 0, - }, - } - } - } - - pub unsafe fn write(&self, tape: &mut Tape) -> ItemPointer { - let bytes = rkyv::to_bytes::<_, 256>(self).unwrap(); - tape.write(&bytes) - } - pub unsafe fn read<'a>( - index: &'a PgRelation, - index_pointer: &ItemPointer, - ) -> ReadablePqQuantizerDef<'a> { - let rb = index_pointer.read_bytes(index); - ReadablePqQuantizerDef { _rb: rb } - } -} - -pub struct ReadablePqQuantizerDef<'a> { - _rb: ReadableBuffer<'a>, -} - -impl<'a> ReadablePqQuantizerDef<'a> { - pub fn get_archived_node(&self) -> &ArchivedPqQuantizerDef { - // checking the code here is expensive during build, so skip it. - // TODO: should we check the data during queries? - //rkyv::check_archived_root::(self._rb.get_data_slice()).unwrap() - unsafe { rkyv::archived_root::(self._rb.get_data_slice()) } - } -} - -#[derive(Archive, Deserialize, Serialize)] -#[archive(check_bytes)] -#[repr(C)] -pub struct PqQuantizerVector { - vec: Vec, - next_vector_pointer: ItemPointer, -} - -impl PqQuantizerVector { - pub unsafe fn write(&self, tape: &mut Tape) -> ItemPointer { - let bytes = rkyv::to_bytes::<_, 8192>(self).unwrap(); - tape.write(&bytes) - } - pub unsafe fn read<'a>( - index: &'a PgRelation, - index_pointer: &ItemPointer, - ) -> ReadablePqVectorNode<'a> { - let rb = index_pointer.read_bytes(index); - ReadablePqVectorNode { _rb: rb } - } -} - -//ReadablePqNode ties an archive node to it's underlying buffer -pub struct ReadablePqVectorNode<'a> { - _rb: ReadableBuffer<'a>, -} - -impl<'a> ReadablePqVectorNode<'a> { - pub fn get_archived_node(&self) -> &ArchivedPqQuantizerVector { - // checking the code here is expensive during build, so skip it. - // TODO: should we check the data during queries? - //rkyv::check_archived_root::(self._rb.get_data_slice()).unwrap() - unsafe { rkyv::archived_root::(self._rb.get_data_slice()) } - } -} - -pub unsafe fn read_pq(index: &PgRelation, index_pointer: &IndexPointer) -> Pq { - let rpq = PqQuantizerDef::read(index, &index_pointer); - let rpn = rpq.get_archived_node(); - let mut result: Vec = Vec::new(); - let mut next = rpn.next_vector_pointer.deserialize_item_pointer(); - loop { - if next.offset == 0 && next.block_number == 0 { - break; - } - let qvn = PqQuantizerVector::read(index, &next); - let vn = qvn.get_archived_node(); - let vs = vn.vec.as_slice(); - result.extend_from_slice(vs); - next = vn.next_vector_pointer.deserialize_item_pointer(); - } - let sq = Array3::from_shape_vec( - (rpn.dim_0 as usize, rpn.dim_1 as usize, rpn.dim_2 as usize), - result, - ) - .unwrap(); - Pq::new(None, sq) -} - -pub unsafe fn write_pq(pq: Pq, index: &PgRelation) -> ItemPointer { - let vec = pq.subquantizers().to_slice_memory_order().unwrap().to_vec(); - let shape = pq.subquantizers().dim(); - let mut pq_node = PqQuantizerDef::new(shape.0, shape.1, shape.2, vec.len()); - - let mut pqt = Tape::new(index, PageType::PqQuantizerDef); - - // write out the large vector bits. - // we write "from the back" - let mut prev: IndexPointer = ItemPointer { - block_number: 0, - offset: 0, - }; - let mut prev_vec = vec; - - // get numbers that can fit in a page by subtracting the item pointer. - let block_fit = (BLCKSZ as usize / size_of::()) - size_of::() - 64; - let mut tape = Tape::new(index, PageType::PqQuantizerVector); - loop { - let l = prev_vec.len(); - if l == 0 { - pq_node.next_vector_pointer = prev; - return pq_node.write(&mut pqt); - } - let lv = prev_vec; - let ni = if l > block_fit { l - block_fit } else { 0 }; - let (b, a) = lv.split_at(ni); - - let pqv_node = PqQuantizerVector { - vec: a.to_vec(), - next_vector_pointer: prev, - }; - let index_pointer: IndexPointer = pqv_node.write(&mut tape); - prev = index_pointer; - prev_vec = b.clone().to_vec(); - } -} diff --git a/timescale_vector/src/access_method/neighbor_with_distance.rs b/timescale_vector/src/access_method/neighbor_with_distance.rs new file mode 100644 index 00000000..0a17c1ab --- /dev/null +++ b/timescale_vector/src/access_method/neighbor_with_distance.rs @@ -0,0 +1,56 @@ +use std::cmp::Ordering; + +use crate::util::{IndexPointer, ItemPointer}; + +//TODO is this right? +pub type Distance = f32; +#[derive(Clone, Debug)] +pub struct NeighborWithDistance { + index_pointer: IndexPointer, + distance: Distance, +} + +impl NeighborWithDistance { + pub fn new(neighbor_index_pointer: ItemPointer, distance: Distance) -> Self { + assert!(!distance.is_nan()); + assert!(distance >= 0.0); + Self { + index_pointer: neighbor_index_pointer, + distance, + } + } + + pub fn get_index_pointer_to_neighbor(&self) -> ItemPointer { + return self.index_pointer; + } + pub fn get_distance(&self) -> Distance { + return self.distance; + } +} + +impl PartialOrd for NeighborWithDistance { + fn partial_cmp(&self, other: &Self) -> Option { + self.distance.partial_cmp(&other.distance) + } +} + +impl Ord for NeighborWithDistance { + fn cmp(&self, other: &Self) -> Ordering { + self.distance.total_cmp(&other.distance) + } +} + +impl PartialEq for NeighborWithDistance { + fn eq(&self, other: &Self) -> bool { + self.index_pointer == other.index_pointer + } +} + +//promise that PartialEq is reflexive +impl Eq for NeighborWithDistance {} + +impl std::hash::Hash for NeighborWithDistance { + fn hash(&self, state: &mut H) { + self.index_pointer.hash(state); + } +} diff --git a/timescale_vector/src/access_method/options.rs b/timescale_vector/src/access_method/options.rs index b225295d..892df0d7 100644 --- a/timescale_vector/src/access_method/options.rs +++ b/timescale_vector/src/access_method/options.rs @@ -1,33 +1,45 @@ use memoffset::*; -use pgrx::{pg_sys::AsPgCStr, prelude::*, set_varsize, PgRelation}; -use std::fmt::Debug; +use pgrx::{pg_sys::AsPgCStr, prelude::*, set_varsize, void_ptr, PgRelation}; +use std::{ffi::CStr, fmt::Debug}; -#[derive(Copy, Clone, Debug, PartialEq)] +use super::storage::StorageType; + +//DO NOT derive Clone for this struct. The storage layout string comes at the end and wouldn't be copied properly. +#[derive(Debug, PartialEq)] #[repr(C)] pub struct TSVIndexOptions { /* varlena header (do not touch directly!) */ #[allow(dead_code)] vl_len_: i32, - pub num_neighbors: u32, + pub storage_layout_offset: i32, + num_neighbors: i32, pub search_list_size: u32, + pub num_dimensions: u32, pub max_alpha: f64, - pub use_pq: bool, - pub pq_vector_length: usize, + pub bq_num_bits_per_dimension: u32, } +pub const NUM_NEIGHBORS_DEFAULT_SENTINEL: i32 = -1; +pub const NUM_DIMENSIONS_DEFAULT_SENTINEL: u32 = 0; +pub const SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL: u32 = 0; +const DEFAULT_MAX_ALPHA: f64 = 1.2; + impl TSVIndexOptions { + //note: this should only be used when building a new index. The options aren't really versioned. + //therefore, we should move all the options to the meta page when building the index (meta pages are properly versioned). pub fn from_relation(relation: &PgRelation) -> PgBox { if relation.rd_index.is_null() { panic!("'{}' is not a TSV index", relation.name()) } else if relation.rd_options.is_null() { // use defaults let mut ops = unsafe { PgBox::::alloc0() }; - ops.num_neighbors = 50; - ops.search_list_size = 65; - ops.max_alpha = 1.0; - ops.use_pq = false; - ops.pq_vector_length = 64; + ops.storage_layout_offset = 0; + ops.num_neighbors = NUM_NEIGHBORS_DEFAULT_SENTINEL; + ops.search_list_size = 100; + ops.max_alpha = DEFAULT_MAX_ALPHA; + ops.num_dimensions = NUM_DIMENSIONS_DEFAULT_SENTINEL; + ops.bq_num_bits_per_dimension = SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL; unsafe { set_varsize( ops.as_ptr().cast(), @@ -39,11 +51,53 @@ impl TSVIndexOptions { unsafe { PgBox::from_pg(relation.rd_options as *mut TSVIndexOptions) } } } + + pub fn get_num_neighbors(&self) -> i32 { + if self.num_neighbors == NUM_NEIGHBORS_DEFAULT_SENTINEL { + //specify to use the default value here + //we can't derive the default at this point in the code because the default is based on the number of dimensions in the vector in the io_optimized case. + NUM_NEIGHBORS_DEFAULT_SENTINEL + } else { + if self.num_neighbors < 10 { + panic!("num_neighbors must be greater than 10, or -1 for default") + } + self.num_neighbors + } + } + + pub fn get_storage_type(&self) -> StorageType { + let s = self.get_str(self.storage_layout_offset, || { + super::storage::DEFAULT_STORAGE_TYPE_STR.to_owned() + }); + + StorageType::from_str(s.as_str()) + } + + fn get_str String>(&self, offset: i32, default: F) -> String { + if offset == 0 { + default() + } else { + let opts = self as *const _ as void_ptr as usize; + let value = + unsafe { CStr::from_ptr((opts + offset as usize) as *const std::os::raw::c_char) }; + + value.to_str().unwrap().to_owned() + } + } } -const NUM_REL_OPTS: usize = 5; +const NUM_REL_OPTS: usize = 6; static mut RELOPT_KIND_TSV: pg_sys::relopt_kind = 0; +// amoptions is a function that gets a datum of text[] data from pg_class.reloptions (which contains text in the format "key=value") and returns a bytea for the struct for the parsed options. +// this is used to fill the rd_options field in the index relation. +// except for during build the validate parameter should be false. +// any option that is no longer recognized that exists in the reloptions will simply be ignored when validate is false. +// therefore, it is safe to change the options struct and add/remove new options without breaking existing indexes. +// but note that the standard parsing way has no ability to put "migration" logic in here. So all new options will have to have defaults value when reading old indexes. +// we could do additional logic to fix this here, but instead we just move the option values to the meta page when building the index, and do versioning there. +// side note: this logic is not used in \d+ and similar psql commands to get description info. Those commands use the text array in pg_class.reloptions directly. +// so when displaying the info, they'll show the old options and their values as set when the index was created. #[allow(clippy::unneeded_field_pattern)] // b/c of offset_of!() #[pg_guard] pub unsafe extern "C" fn amoptions( @@ -52,6 +106,11 @@ pub unsafe extern "C" fn amoptions( ) -> *mut pg_sys::bytea { // TODO: how to make this const? we can't use offset_of!() macro in const definitions, apparently let tab: [pg_sys::relopt_parse_elt; NUM_REL_OPTS] = [ + pg_sys::relopt_parse_elt { + optname: "storage_layout".as_pg_cstr(), + opttype: pg_sys::relopt_type_RELOPT_TYPE_STRING, + offset: offset_of!(TSVIndexOptions, storage_layout_offset) as i32, + }, pg_sys::relopt_parse_elt { optname: "num_neighbors".as_pg_cstr(), opttype: pg_sys::relopt_type_RELOPT_TYPE_INT, @@ -63,19 +122,19 @@ pub unsafe extern "C" fn amoptions( offset: offset_of!(TSVIndexOptions, search_list_size) as i32, }, pg_sys::relopt_parse_elt { - optname: "max_alpha".as_pg_cstr(), - opttype: pg_sys::relopt_type_RELOPT_TYPE_REAL, - offset: offset_of!(TSVIndexOptions, max_alpha) as i32, + optname: "num_dimensions".as_pg_cstr(), + opttype: pg_sys::relopt_type_RELOPT_TYPE_INT, + offset: offset_of!(TSVIndexOptions, num_dimensions) as i32, }, pg_sys::relopt_parse_elt { - optname: "use_pq".as_pg_cstr(), - opttype: pg_sys::relopt_type_RELOPT_TYPE_BOOL, - offset: offset_of!(TSVIndexOptions, use_pq) as i32, + optname: "num_bits_per_dimension".as_pg_cstr(), + opttype: pg_sys::relopt_type_RELOPT_TYPE_INT, + offset: offset_of!(TSVIndexOptions, bq_num_bits_per_dimension) as i32, }, pg_sys::relopt_parse_elt { - optname: "pq_vector_length".as_pg_cstr(), - opttype: pg_sys::relopt_type_RELOPT_TYPE_INT, - offset: offset_of!(TSVIndexOptions, pq_vector_length) as i32, + optname: "max_alpha".as_pg_cstr(), + opttype: pg_sys::relopt_type_RELOPT_TYPE_REAL, + offset: offset_of!(TSVIndexOptions, max_alpha) as i32, }, ]; @@ -103,15 +162,37 @@ unsafe fn build_relopts( rdopts as *mut pg_sys::bytea } +#[pg_guard] +extern "C" fn validate_storage_layout(value: *const std::os::raw::c_char) { + if value.is_null() { + // use a default value + return; + } + + let value = unsafe { CStr::from_ptr(value) } + .to_str() + .expect("failed to parse storage_layout value"); + _ = StorageType::from_str(value); +} + pub unsafe fn init() { RELOPT_KIND_TSV = pg_sys::add_reloption_kind(); + pg_sys::add_string_reloption( + RELOPT_KIND_TSV, + "storage_layout".as_pg_cstr(), + "Storage layout: either memory_optimized, io_optimized, or plain".as_pg_cstr(), + super::storage::DEFAULT_STORAGE_TYPE_STR.as_pg_cstr(), + Some(validate_storage_layout), + pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE, + ); + pg_sys::add_int_reloption( RELOPT_KIND_TSV, "num_neighbors".as_pg_cstr(), "Maximum number of neighbors in the graph".as_pg_cstr(), - 50, - 10, + NUM_NEIGHBORS_DEFAULT_SENTINEL, + -1, 1000, pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE, ); @@ -130,25 +211,29 @@ pub unsafe fn init() { RELOPT_KIND_TSV, "max_alpha".as_pg_cstr(), "The maximum alpha used in pruning".as_pg_cstr(), - 1.0, + DEFAULT_MAX_ALPHA, 1.0, 5.0, pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE, ); - pg_sys::add_bool_reloption( + + pg_sys::add_int_reloption( RELOPT_KIND_TSV, - "use_pq".as_pg_cstr(), - "Enable product quantization".as_pg_cstr(), - false, + "num_dimensions".as_pg_cstr(), + "The number of dimensions to index (0 to index all dimensions)".as_pg_cstr(), + 0, + 0, + 5000, pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE, ); + pg_sys::add_int_reloption( RELOPT_KIND_TSV, - "pq_vector_length".as_pg_cstr(), - "Length of the quantized vector representation".as_pg_cstr(), - 256, - 8, - 256, + "num_bits_per_dimension".as_pg_cstr(), + "The number of bits to use per dimension for compressed storage".as_pg_cstr(), + SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL as _, + 0, + 32, pg_sys::AccessExclusiveLock as pg_sys::LOCKMODE, ); } @@ -156,7 +241,13 @@ pub unsafe fn init() { #[cfg(any(test, feature = "pg_test"))] #[pgrx::pg_schema] mod tests { - use crate::access_method::options::TSVIndexOptions; + use crate::access_method::{ + options::{ + TSVIndexOptions, DEFAULT_MAX_ALPHA, NUM_DIMENSIONS_DEFAULT_SENTINEL, + NUM_NEIGHBORS_DEFAULT_SENTINEL, SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL, + }, + storage::StorageType, + }; use pgrx::*; #[pg_test] @@ -174,6 +265,11 @@ mod tests { let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); let options = TSVIndexOptions::from_relation(&indexrel); assert_eq!(options.num_neighbors, 30); + assert_eq!(options.num_dimensions, NUM_DIMENSIONS_DEFAULT_SENTINEL); + assert_eq!( + options.bq_num_bits_per_dimension, + SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL, + ); Ok(()) } @@ -190,11 +286,107 @@ mod tests { Spi::get_one::("SELECT 'idxtest'::regclass::oid")?.expect("oid was null"); let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); let options = TSVIndexOptions::from_relation(&indexrel); - assert_eq!(options.num_neighbors, 50); - assert_eq!(options.search_list_size, 65); - assert_eq!(options.max_alpha, 1.0); - assert_eq!(options.use_pq, false); - assert_eq!(options.pq_vector_length, 64); + assert_eq!(options.get_num_neighbors(), NUM_NEIGHBORS_DEFAULT_SENTINEL); + assert_eq!(options.search_list_size, 100); + assert_eq!(options.max_alpha, DEFAULT_MAX_ALPHA); + assert_eq!(options.num_dimensions, NUM_DIMENSIONS_DEFAULT_SENTINEL); + assert_eq!(options.get_storage_type(), StorageType::SbqCompression); + assert_eq!( + options.bq_num_bits_per_dimension, + SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL, + ); + Ok(()) + } + + #[pg_test] + unsafe fn test_index_options_bq() -> spi::Result<()> { + Spi::run(&format!( + "CREATE TABLE test(encoding vector(3)); + CREATE INDEX idxtest + ON test + USING tsv(encoding) + WITH (storage_layout = io_optimized);", + ))?; + + let index_oid = + Spi::get_one::("SELECT 'idxtest'::regclass::oid")?.expect("oid was null"); + let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); + let options = TSVIndexOptions::from_relation(&indexrel); + assert_eq!(options.get_num_neighbors(), NUM_NEIGHBORS_DEFAULT_SENTINEL); + assert_eq!(options.search_list_size, 100); + assert_eq!(options.max_alpha, DEFAULT_MAX_ALPHA); + assert_eq!(options.num_dimensions, NUM_DIMENSIONS_DEFAULT_SENTINEL); + assert_eq!(options.get_storage_type(), StorageType::SbqSpeedup); + Ok(()) + } + + #[pg_test] + unsafe fn test_index_options_plain() -> spi::Result<()> { + Spi::run(&format!( + "CREATE TABLE test(encoding vector(3)); + CREATE INDEX idxtest + ON test + USING tsv(encoding) + WITH (storage_layout = plain);", + ))?; + + let index_oid = + Spi::get_one::("SELECT 'idxtest'::regclass::oid")?.expect("oid was null"); + let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); + let options = TSVIndexOptions::from_relation(&indexrel); + assert_eq!(options.get_num_neighbors(), NUM_NEIGHBORS_DEFAULT_SENTINEL); + assert_eq!(options.search_list_size, 100); + assert_eq!(options.max_alpha, DEFAULT_MAX_ALPHA); + assert_eq!(options.get_storage_type(), StorageType::Plain); + Ok(()) + } + + #[pg_test] + unsafe fn test_index_options_custom() -> spi::Result<()> { + Spi::run(&format!( + "CREATE TABLE test(encoding vector(3)); + CREATE INDEX idxtest + ON test + USING tsv(encoding) + WITH (storage_layout = plain, num_neighbors=40, search_list_size=18, num_dimensions=20, max_alpha=1.4);", + ))?; + + let index_oid = + Spi::get_one::("SELECT 'idxtest'::regclass::oid")?.expect("oid was null"); + let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); + let options = TSVIndexOptions::from_relation(&indexrel); + assert_eq!(options.get_num_neighbors(), 40); + assert_eq!(options.search_list_size, 18); + assert_eq!(options.max_alpha, 1.4); + assert_eq!(options.get_storage_type(), StorageType::Plain); + assert_eq!(options.num_dimensions, 20); + assert_eq!( + options.bq_num_bits_per_dimension, + SBQ_NUM_BITS_PER_DIMENSION_DEFAULT_SENTINEL + ); + Ok(()) + } + + #[pg_test] + unsafe fn test_index_options_custom_mem_optimized() -> spi::Result<()> { + Spi::run(&format!( + "CREATE TABLE test(encoding vector(3)); + CREATE INDEX idxtest + ON test + USING tsv(encoding) + WITH (storage_layout = memory_optimized, num_neighbors=40, search_list_size=18, num_dimensions=20, max_alpha=1.4, num_bits_per_dimension=5);", + ))?; + + let index_oid = + Spi::get_one::("SELECT 'idxtest'::regclass::oid")?.expect("oid was null"); + let indexrel = PgRelation::from_pg(pg_sys::RelationIdGetRelation(index_oid)); + let options = TSVIndexOptions::from_relation(&indexrel); + assert_eq!(options.get_num_neighbors(), 40); + assert_eq!(options.search_list_size, 18); + assert_eq!(options.max_alpha, 1.4); + assert_eq!(options.get_storage_type(), StorageType::SbqCompression); + assert_eq!(options.num_dimensions, 20); + assert_eq!(options.bq_num_bits_per_dimension, 5); Ok(()) } } diff --git a/timescale_vector/src/access_method/pg_vector.rs b/timescale_vector/src/access_method/pg_vector.rs new file mode 100644 index 00000000..66abba0f --- /dev/null +++ b/timescale_vector/src/access_method/pg_vector.rs @@ -0,0 +1,147 @@ +use pgrx::*; + +use super::{distance::preprocess_cosine, meta_page}; + +//Ported from pg_vector code +#[repr(C)] +#[derive(Debug)] +pub struct PgVectorInternal { + vl_len_: i32, /* varlena header (do not touch directly!) */ + pub dim: i16, /* number of dimensions */ + unused: i16, + pub x: pg_sys::__IncompleteArrayField, +} + +impl PgVectorInternal { + pub fn to_slice(&self) -> &[f32] { + let dim = (*self).dim; + let raw_slice = unsafe { (*self).x.as_slice(dim as _) }; + raw_slice + } +} + +pub struct PgVector { + index_distance: Option<*mut PgVectorInternal>, + index_distance_needs_pfree: bool, + full_distance: Option<*mut PgVectorInternal>, + full_distance_needs_pfree: bool, +} + +impl Drop for PgVector { + fn drop(&mut self) { + if self.index_distance_needs_pfree { + unsafe { + if self.index_distance.is_some() { + pg_sys::pfree(self.index_distance.unwrap().cast()); + } + } + } + if self.full_distance_needs_pfree { + unsafe { + if self.full_distance.is_some() { + pg_sys::pfree(self.full_distance.unwrap().cast()); + } + } + } + } +} + +impl PgVector { + pub unsafe fn from_pg_parts( + datum_parts: *mut pg_sys::Datum, + isnull_parts: *mut bool, + index: usize, + meta_page: &meta_page::MetaPage, + index_distance: bool, + full_distance: bool, + ) -> Option { + let isnulls = std::slice::from_raw_parts(isnull_parts, index + 1); + if isnulls[index] { + return None; + } + let datums = std::slice::from_raw_parts(datum_parts, index + 1); + Some(Self::from_datum( + datums[index], + meta_page, + index_distance, + full_distance, + )) + } + + unsafe fn create_inner( + datum: pg_sys::Datum, + meta_page: &meta_page::MetaPage, + is_index_distance: bool, + ) -> *mut PgVectorInternal { + //TODO: we are using a copy here to avoid lifetime issues and because in some cases we have to + //modify the datum in preprocess_cosine. We should find a way to avoid the copy if the vector is + //normalized and preprocess_cosine is a noop; + let detoasted = pg_sys::pg_detoast_datum_copy(datum.cast_mut_ptr()); + let is_copy = !std::ptr::eq( + detoasted.cast::(), + datum.cast_mut_ptr::(), + ); + + /* if is_copy every changes, need to change needs_pfree */ + assert!(is_copy, "Datum should be a copy"); + let casted = detoasted.cast::(); + + if is_index_distance + && meta_page.get_num_dimensions() != meta_page.get_num_dimensions_to_index() + { + assert!((*casted).dim > meta_page.get_num_dimensions_to_index() as _); + (*casted).dim = meta_page.get_num_dimensions_to_index() as _; + } + + let dim = (*casted).dim; + let raw_slice = unsafe { (*casted).x.as_mut_slice(dim as _) }; + + preprocess_cosine(raw_slice); + casted + } + + pub unsafe fn from_datum( + datum: pg_sys::Datum, + meta_page: &meta_page::MetaPage, + index_distance: bool, + full_distance: bool, + ) -> PgVector { + if meta_page.get_num_dimensions() == meta_page.get_num_dimensions_to_index() { + /* optimization if the num dimensions are the same */ + let inner = Self::create_inner(datum, meta_page, true); + return PgVector { + index_distance: Some(inner), + index_distance_needs_pfree: true, + full_distance: Some(inner), + full_distance_needs_pfree: false, + }; + } + + let idx = if index_distance { + Some(Self::create_inner(datum, meta_page, true)) + } else { + None + }; + + let full = if full_distance { + Some(Self::create_inner(datum, meta_page, false)) + } else { + None + }; + + PgVector { + index_distance: idx, + index_distance_needs_pfree: true, + full_distance: full, + full_distance_needs_pfree: true, + } + } + + pub fn to_index_slice(&self) -> &[f32] { + unsafe { (*self.index_distance.unwrap()).to_slice() } + } + + pub fn to_full_slice(&self) -> &[f32] { + unsafe { (*self.full_distance.unwrap()).to_slice() } + } +} diff --git a/timescale_vector/src/access_method/plain_node.rs b/timescale_vector/src/access_method/plain_node.rs new file mode 100644 index 00000000..b63096ea --- /dev/null +++ b/timescale_vector/src/access_method/plain_node.rs @@ -0,0 +1,132 @@ +use std::pin::Pin; + +use pgrx::pg_sys::{InvalidBlockNumber, InvalidOffsetNumber}; +use pgrx::*; +use rkyv::vec::ArchivedVec; +use rkyv::{Archive, Deserialize, Serialize}; +use timescale_vector_derive::{Readable, Writeable}; + +use super::neighbor_with_distance::NeighborWithDistance; +use super::storage::ArchivedData; +use crate::util::{ArchivedItemPointer, HeapPointer, ItemPointer, ReadableBuffer, WritableBuffer}; + +use super::meta_page::MetaPage; + +#[derive(Archive, Deserialize, Serialize, Readable, Writeable)] +#[archive(check_bytes)] +pub struct Node { + pub vector: Vec, + pub pq_vector: Vec, + neighbor_index_pointers: Vec, + pub heap_item_pointer: HeapPointer, +} + +impl Node { + fn new_internal( + vector: Vec, + pq_vector: Vec, + heap_item_pointer: ItemPointer, + meta_page: &MetaPage, + ) -> Self { + let num_neighbors = meta_page.get_num_neighbors(); + Self { + vector, + // always use vectors of num_clusters on length because we never want the serialized size of a Node to change + pq_vector, + // always use vectors of num_neighbors on length because we never want the serialized size of a Node to change + neighbor_index_pointers: (0..num_neighbors) + .map(|_| ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber)) + .collect(), + heap_item_pointer, + } + } + + pub fn new_for_full_vector( + vector: Vec, + heap_item_pointer: ItemPointer, + meta_page: &MetaPage, + ) -> Self { + let pq_vector = Vec::with_capacity(0); + Self::new_internal(vector, pq_vector, heap_item_pointer, meta_page) + } +} + +/// contains helpers for mutate-in-place. See struct_mutable_refs in test_alloc.rs in rkyv +impl ArchivedNode { + pub fn is_deleted(&self) -> bool { + self.heap_item_pointer.offset == InvalidOffsetNumber + } + + pub fn delete(self: Pin<&mut Self>) { + //TODO: actually optimize the deletes by removing index tuples. For now just mark it. + let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; + heap_pointer.offset = InvalidOffsetNumber; + heap_pointer.block_number = InvalidBlockNumber; + } + + pub fn neighbor_index_pointer( + self: Pin<&mut Self>, + ) -> Pin<&mut ArchivedVec> { + unsafe { self.map_unchecked_mut(|s| &mut s.neighbor_index_pointers) } + } + + pub fn num_neighbors(&self) -> usize { + self.neighbor_index_pointers + .iter() + .position(|f| f.block_number == InvalidBlockNumber) + .unwrap_or(self.neighbor_index_pointers.len()) + } + + pub fn iter_neighbors(&self) -> impl Iterator + '_ { + self.neighbor_index_pointers + .iter() + .take(self.num_neighbors()) + .map(|ip| ip.deserialize_item_pointer()) + } + + pub fn set_neighbors( + mut self: Pin<&mut Self>, + neighbors: &[NeighborWithDistance], + meta_page: &MetaPage, + ) { + for (i, new_neighbor) in neighbors.iter().enumerate() { + let mut a_index_pointer = self.as_mut().neighbor_index_pointer().index_pin(i); + //TODO hate that we have to set each field like this + a_index_pointer.block_number = + new_neighbor.get_index_pointer_to_neighbor().block_number; + a_index_pointer.offset = new_neighbor.get_index_pointer_to_neighbor().offset; + } + //set the marker that the list ended + if neighbors.len() < meta_page.get_num_neighbors() as _ { + let mut past_last_index_pointers = + self.neighbor_index_pointer().index_pin(neighbors.len()); + past_last_index_pointers.block_number = InvalidBlockNumber; + past_last_index_pointers.offset = InvalidOffsetNumber; + } + } +} + +impl ArchivedData for ArchivedNode { + fn with_data(data: &mut [u8]) -> Pin<&mut ArchivedNode> { + ArchivedNode::with_data(data) + } + + fn get_index_pointer_to_neighbors(&self) -> Vec { + self.iter_neighbors().collect() + } + + fn is_deleted(&self) -> bool { + self.heap_item_pointer.offset == InvalidOffsetNumber + } + + fn delete(self: Pin<&mut Self>) { + //TODO: actually optimize the deletes by removing index tuples. For now just mark it. + let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; + heap_pointer.offset = InvalidOffsetNumber; + heap_pointer.block_number = InvalidBlockNumber; + } + + fn get_heap_item_pointer(&self) -> HeapPointer { + self.heap_item_pointer.deserialize_item_pointer() + } +} diff --git a/timescale_vector/src/access_method/plain_storage.rs b/timescale_vector/src/access_method/plain_storage.rs new file mode 100644 index 00000000..d5872173 --- /dev/null +++ b/timescale_vector/src/access_method/plain_storage.rs @@ -0,0 +1,411 @@ +use super::{ + graph::{ListSearchNeighbor, ListSearchResult}, + graph_neighbor_store::GraphNeighborStore, + pg_vector::PgVector, + plain_node::{ArchivedNode, Node, ReadableNode}, + stats::{ + GreedySearchStats, StatsDistanceComparison, StatsHeapNodeRead, StatsNodeModify, + StatsNodeRead, StatsNodeWrite, WriteStats, + }, + storage::{ArchivedData, NodeDistanceMeasure, Storage}, + storage_common::get_attribute_number_from_index, +}; + +use pgrx::PgRelation; + +use crate::util::{ + page::PageType, table_slot::TableSlot, tape::Tape, HeapPointer, IndexPointer, ItemPointer, +}; + +use super::{meta_page::MetaPage, neighbor_with_distance::NeighborWithDistance}; + +pub struct PlainStorage<'a> { + pub index: &'a PgRelation, + pub distance_fn: fn(&[f32], &[f32]) -> f32, + heap_rel: &'a PgRelation, + heap_attr: pgrx::pg_sys::AttrNumber, +} + +impl<'a> PlainStorage<'a> { + pub fn new_for_build( + index: &'a PgRelation, + heap_rel: &'a PgRelation, + distance_fn: fn(&[f32], &[f32]) -> f32, + ) -> PlainStorage<'a> { + Self { + index: index, + distance_fn: distance_fn, + heap_rel: heap_rel, + heap_attr: get_attribute_number_from_index(index), + } + } + + pub fn load_for_insert( + index_relation: &'a PgRelation, + heap_rel: &'a PgRelation, + distance_fn: fn(&[f32], &[f32]) -> f32, + ) -> PlainStorage<'a> { + Self { + index: index_relation, + distance_fn: distance_fn, + heap_rel: heap_rel, + heap_attr: get_attribute_number_from_index(&index_relation), + } + } + + pub fn load_for_search( + index_relation: &'a PgRelation, + heap_rel: &'a PgRelation, + distance_fn: fn(&[f32], &[f32]) -> f32, + ) -> PlainStorage<'a> { + Self { + index: index_relation, + distance_fn: distance_fn, + heap_rel: heap_rel, + heap_attr: get_attribute_number_from_index(&index_relation), + } + } +} + +pub enum PlainDistanceMeasure { + Full(PgVector), +} + +impl PlainDistanceMeasure { + pub fn calculate_distance( + distance_fn: fn(&[f32], &[f32]) -> f32, + query: &[f32], + vector: &[f32], + stats: &mut S, + ) -> f32 { + assert!(vector.len() > 0); + assert!(vector.len() == query.len()); + stats.record_full_distance_comparison(); + (distance_fn)(query, vector) + } +} + +/* This is only applicable to plain, so keep here not in storage_common */ +pub struct IndexFullDistanceMeasure<'a> { + readable_node: ReadableNode<'a>, + storage: &'a PlainStorage<'a>, +} + +impl<'a> IndexFullDistanceMeasure<'a> { + pub unsafe fn with_index_pointer( + storage: &'a PlainStorage<'a>, + index_pointer: IndexPointer, + stats: &mut T, + ) -> Self { + let rn = unsafe { Node::read(storage.index, index_pointer, stats) }; + Self { + readable_node: rn, + storage: storage, + } + } + + pub unsafe fn with_readable_node( + storage: &'a PlainStorage<'a>, + readable_node: ReadableNode<'a>, + ) -> Self { + Self { + readable_node: readable_node, + storage: storage, + } + } +} + +impl<'a> NodeDistanceMeasure for IndexFullDistanceMeasure<'a> { + unsafe fn get_distance( + &self, + index_pointer: IndexPointer, + stats: &mut T, + ) -> f32 { + let rn1 = Node::read(self.storage.index, index_pointer, stats); + let rn2 = &self.readable_node; + let node1 = rn1.get_archived_node(); + let node2 = rn2.get_archived_node(); + assert!(node1.vector.len() > 0); + assert!(node1.vector.len() == node2.vector.len()); + let vec1 = node1.vector.as_slice(); + let vec2 = node2.vector.as_slice(); + (self.storage.get_distance_function())(vec1, vec2) + } +} + +//todo move to storage_common +pub struct PlainStorageLsnPrivateData { + pub heap_pointer: HeapPointer, + pub neighbors: Vec, +} + +impl PlainStorageLsnPrivateData { + pub fn new( + index_pointer_to_node: IndexPointer, + node: &ArchivedNode, + gns: &GraphNeighborStore, + ) -> Self { + let heap_pointer = node.heap_item_pointer.deserialize_item_pointer(); + let neighbors = match gns { + GraphNeighborStore::Disk => node.get_index_pointer_to_neighbors(), + GraphNeighborStore::Builder(b) => b.get_neighbors(index_pointer_to_node), + }; + Self { + heap_pointer: heap_pointer, + neighbors: neighbors, + } + } +} + +impl<'a> Storage for PlainStorage<'a> { + type QueryDistanceMeasure = PlainDistanceMeasure; + type NodeDistanceMeasure<'b> = IndexFullDistanceMeasure<'b> where Self: 'b; + type ArchivedType = ArchivedNode; + type LSNPrivateData = PlainStorageLsnPrivateData; + + fn page_type() -> PageType { + PageType::Node + } + + fn create_node( + &self, + full_vector: &[f32], + heap_pointer: HeapPointer, + meta_page: &MetaPage, + tape: &mut Tape, + stats: &mut S, + ) -> ItemPointer { + //OPT: avoid the clone? + let node = Node::new_for_full_vector(full_vector.to_vec(), heap_pointer, meta_page); + let index_pointer: IndexPointer = node.write(tape, stats); + index_pointer + } + + fn start_training(&mut self, _meta_page: &super::meta_page::MetaPage) {} + + fn add_sample(&mut self, _sample: &[f32]) {} + + fn finish_training(&mut self, _stats: &mut WriteStats) {} + + fn finalize_node_at_end_of_build( + &mut self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &Vec, + stats: &mut S, + ) { + let node = unsafe { Node::modify(self.index, index_pointer, stats) }; + let mut archived = node.get_archived_node(); + archived.as_mut().set_neighbors(neighbors, &meta); + node.commit(); + } + + unsafe fn get_node_distance_measure<'b, S: StatsNodeRead>( + &'b self, + index_pointer: IndexPointer, + stats: &mut S, + ) -> Self::NodeDistanceMeasure<'b> { + IndexFullDistanceMeasure::with_index_pointer(self, index_pointer, stats) + } + + fn get_query_distance_measure(&self, query: PgVector) -> PlainDistanceMeasure { + return PlainDistanceMeasure::Full(query); + } + fn get_full_distance_for_resort( + &self, + qdm: &Self::QueryDistanceMeasure, + _index_pointer: IndexPointer, + heap_pointer: HeapPointer, + meta_page: &MetaPage, + stats: &mut S, + ) -> f32 { + /* Plain storage only needs to resort when the index is using less dimensions than the underlying data. */ + assert!(meta_page.get_num_dimensions() > meta_page.get_num_dimensions_to_index()); + + let slot = unsafe { TableSlot::new(self.heap_rel, heap_pointer, stats) }; + match qdm { + PlainDistanceMeasure::Full(query) => { + let datum = unsafe { slot.get_attribute(self.heap_attr).unwrap() }; + let vec = unsafe { PgVector::from_datum(datum, meta_page, false, true) }; + self.get_distance_function()(vec.to_full_slice(), query.to_full_slice()) + } + } + } + fn get_neighbors_with_distances_from_disk( + &self, + neighbors_of: ItemPointer, + result: &mut Vec, + stats: &mut S, + ) { + let rn = unsafe { Node::read(self.index, neighbors_of, stats) }; + //get neighbors copy before givining ownership of rn to the distance state + let neighbors: Vec<_> = rn.get_archived_node().iter_neighbors().collect(); + let dist_state = unsafe { IndexFullDistanceMeasure::with_readable_node(self, rn) }; + for n in neighbors { + let dist = unsafe { dist_state.get_distance(n, stats) }; + result.push(NeighborWithDistance::new(n, dist)) + } + } + + /* get_lsn and visit_lsn are different because the distance + comparisons for SBQ get the vector from different places */ + fn create_lsn_for_init_id( + &self, + lsr: &mut ListSearchResult, + index_pointer: ItemPointer, + gns: &GraphNeighborStore, + ) -> ListSearchNeighbor { + if !lsr.prepare_insert(index_pointer) { + panic!("should not have had an init id already inserted"); + } + + let rn = unsafe { Node::read(self.index, index_pointer, &mut lsr.stats) }; + let node = rn.get_archived_node(); + + let distance = match lsr.sdm.as_ref().unwrap() { + PlainDistanceMeasure::Full(query) => PlainDistanceMeasure::calculate_distance( + self.distance_fn, + query.to_index_slice(), + node.vector.as_slice(), + &mut lsr.stats, + ), + }; + + ListSearchNeighbor::new( + index_pointer, + distance, + PlainStorageLsnPrivateData::new(index_pointer, node, gns), + ) + } + + fn visit_lsn( + &self, + lsr: &mut ListSearchResult, + lsn_idx: usize, + gns: &GraphNeighborStore, + ) { + let lsn = lsr.get_lsn_by_idx(lsn_idx); + //clone needed so we don't continue to borrow lsr + let neighbors = lsn.get_private_data().neighbors.clone(); + + for &neighbor_index_pointer in neighbors.iter() { + if !lsr.prepare_insert(neighbor_index_pointer) { + continue; + } + + let rn_neighbor = + unsafe { Node::read(self.index, neighbor_index_pointer, &mut lsr.stats) }; + let node_neighbor = rn_neighbor.get_archived_node(); + + let distance = match lsr.sdm.as_ref().unwrap() { + PlainDistanceMeasure::Full(query) => PlainDistanceMeasure::calculate_distance( + self.distance_fn, + query.to_index_slice(), + node_neighbor.vector.as_slice(), + &mut lsr.stats, + ), + }; + let lsn = ListSearchNeighbor::new( + neighbor_index_pointer, + distance, + PlainStorageLsnPrivateData::new(neighbor_index_pointer, node_neighbor, gns), + ); + + lsr.insert_neighbor(lsn); + } + } + + fn return_lsn( + &self, + lsn: &ListSearchNeighbor, + _stats: &mut GreedySearchStats, + ) -> HeapPointer { + lsn.get_private_data().heap_pointer + } + + fn set_neighbors_on_disk( + &self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &[NeighborWithDistance], + stats: &mut S, + ) { + let node = unsafe { Node::modify(self.index, index_pointer, stats) }; + let mut archived = node.get_archived_node(); + archived.as_mut().set_neighbors(neighbors, &meta); + node.commit(); + } + + fn get_distance_function(&self) -> fn(&[f32], &[f32]) -> f32 { + self.distance_fn + } +} + +#[cfg(any(test, feature = "pg_test"))] +#[pgrx::pg_schema] +mod tests { + + use pgrx::*; + + #[pg_test] + unsafe fn test_plain_storage_index_creation_many_neighbors() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "num_neighbors=38, storage_layout = plain", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_plain_storage_index_creation_few_neighbors() -> spi::Result<()> { + //a test with few neighbors tests the case that nodes share a page, which has caused deadlocks in the past. + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "num_neighbors=10, storage_layout = plain", + )?; + Ok(()) + } + + #[test] + fn test_plain_storage_delete_vacuum_plain() { + crate::access_method::vacuum::tests::test_delete_vacuum_plain_scaffold( + "num_neighbors = 38, storage_layout = plain", + ); + } + + #[test] + fn test_plain_storage_delete_vacuum_full() { + crate::access_method::vacuum::tests::test_delete_vacuum_full_scaffold( + "num_neighbors = 38, storage_layout = plain", + ); + } + + #[pg_test] + unsafe fn test_plain_storage_empty_table_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_empty_table_insert_scaffold( + "num_neighbors=38, storage_layout = plain", + ) + } + + #[pg_test] + unsafe fn test_plain_storage_insert_empty_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_insert_empty_insert_scaffold( + "num_neighbors=38, storage_layout = plain", + ) + } + + #[pg_test] + unsafe fn test_plain_storage_num_dimensions() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "num_neighbors=38, storage_layout = plain, num_dimensions=768", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_plain_storage_index_updates() -> spi::Result<()> { + crate::access_method::build::tests::test_index_updates( + "storage_layout = plain, num_neighbors=30", + 50, + )?; + Ok(()) + } +} diff --git a/timescale_vector/src/access_method/pq.rs b/timescale_vector/src/access_method/pq.rs deleted file mode 100644 index 9e5cb8fc..00000000 --- a/timescale_vector/src/access_method/pq.rs +++ /dev/null @@ -1,189 +0,0 @@ -use ndarray::{Array1, Array2, Axis}; -use pgrx::{error, notice, PgRelation}; -use rand::Rng; -use reductive::pq::{Pq, QuantizeVector, TrainPq}; - -use crate::access_method::model::read_pq; - -/// pq aka Product quantization (PQ) is one of the most widely used algorithms for memory-efficient approximated nearest neighbor search, -/// This module encapsulates a vanilla implementation of PQ that we use for the vector index. -/// More details: https://lear.inrialpes.fr/pubs/2011/JDS11/jegou_searching_with_quantization.pdf - -/// PQ_TRAINING_ITERATIONS is the number of times we train each independent Kmeans cluster. -/// 20 - 40 iterations is considered an industry best practice -/// https://github.com/matsui528/nanopq/blob/main/nanopq/pq.py#L60 -const PQ_TRAINING_ITERATIONS: usize = 20; - -/// NUM_SUBQUANTIZER_BITS is the number of code words used for quantization. We pin it to 8 so we can -/// use u8 to represent a subspace. -const NUM_SUBQUANTIZER_BITS: u32 = 8; - -/// NUM_TRAINING_ATTEMPTS is the number of times we'll attempt to train the quantizer. -const NUM_TRAINING_ATTEMPTS: usize = 1; - -/// NUM_TRAINING_SET_SIZE is the maximum number of vectors we want to consider for the quantizer training set. -/// We pick a value used by DiskANN implementations. -const NUM_TRAINING_SET_SIZE: usize = 256000; - -/// PqTrainer is a utility that produces a product quantizer from training with sample vectors. -pub struct PqTrainer { - /// training_set contains the vectors we'll use to train PQ. - training_set: Vec>, - /// considered_samples is the number of samples we considered for the training set. - /// It is useful for reservoir sampling as we add samples. - considered_samples: usize, - /// num_subquantizers is the number of independent kmeans we want to partition the vectors into. - /// the more we have the more accurate the PQ, but the more space we use in memory. - num_subquantizers: usize, - // rng is the random number generator for reservoir sampling - //rng: ThreadRng, -} - -impl PqTrainer { - pub fn new(meta_page: &super::meta_page::MetaPage) -> PqTrainer { - PqTrainer { - training_set: Vec::with_capacity(NUM_TRAINING_SET_SIZE), - num_subquantizers: meta_page.get_pq_vector_length(), - considered_samples: 0, - } - } - - /// add_sample adds vectors to the training set via uniform reservoir sampling to keep the - /// number of vectors within a reasonable memory limit. - pub fn add_sample(&mut self, sample: Vec) { - if self.training_set.len() >= NUM_TRAINING_SET_SIZE { - // TODO: Cache this somehow. - let mut rng = rand::thread_rng(); - let index = rng.gen_range(0..self.considered_samples + 1); - if index < NUM_TRAINING_SET_SIZE { - self.training_set[index] = sample; - } - } else { - self.training_set.push(sample); - } - self.considered_samples += 1; - } - - pub fn train_pq(self) -> Pq { - notice!( - "Training Product Quantization with {} vectors", - self.training_set.len() - ); - if (self.training_set.len() as i32) < (2_i32.pow(NUM_SUBQUANTIZER_BITS)) { - error!("training set is too small, please run with use_pq as false.") - } - let training_set = self - .training_set - .iter() - .map(|x| x.to_vec()) - .flatten() - .collect(); - let shape = (self.training_set.len(), self.training_set[0].len()); - let instances = Array2::::from_shape_vec(shape, training_set).unwrap(); - Pq::train_pq( - self.num_subquantizers, - NUM_SUBQUANTIZER_BITS, - PQ_TRAINING_ITERATIONS, - NUM_TRAINING_ATTEMPTS, - instances, - ) - .unwrap() - } -} - -/// PgPq encapsulates functions to work with PQ. -pub struct PgPq { - pq: Pq, -} - -impl PgPq { - pub fn new( - meta_page: &super::meta_page::MetaPage, - index_relation: &PgRelation, - ) -> Option { - if !meta_page.get_use_pq() { - return None; - } - let pq_id = meta_page.get_pq_pointer(); - match pq_id { - None => None, - Some(pq_id) => { - let pq = unsafe { read_pq(&index_relation, &pq_id) }; - Some(PgPq { pq }) - } - } - } - /// quantize produces a quantized vector from the raw pg vector. - pub fn quantize(self, vector: Vec) -> Vec { - let og_vec = Array1::from(vector.to_vec()); - self.pq.quantize_vector(og_vec).to_vec() - } - pub fn distance_calculator( - self, - query: &[f32], - distance_fn: fn(&[f32], &[f32]) -> f32, - ) -> DistanceCalculator { - DistanceCalculator::new(&self.pq, distance_fn, query) - } -} - -/// build_distance_table produces an Asymmetric Distance Table to quickly compute distances. -/// We compute the distance from every centroid and cache that so actual distance calculations -/// can be fast. -// TODO: This function could return a table that fits in SIMD registers. -fn build_distance_table( - pq: &Pq, - query: &[f32], - distance_fn: fn(&[f32], &[f32]) -> f32, -) -> Vec { - let sq = pq.subquantizers(); - let num_centroids = pq.n_quantizer_centroids(); - let num_subquantizers = sq.len_of(Axis(0)); - let dt_size = num_subquantizers * num_centroids; - let mut distance_table = vec![0.0; dt_size]; - - let ds = query.len() / num_subquantizers; - let mut elements_for_assert = 0; - for (subquantizer_index, subquantizer) in sq.outer_iter().enumerate() { - let sl = &query[subquantizer_index * ds..(subquantizer_index + 1) * ds]; - for (centroid_index, c) in subquantizer.outer_iter().enumerate() { - let dist = distance_fn(sl, c.to_slice().unwrap()); - assert!(subquantizer_index < num_subquantizers); - assert!(centroid_index * num_subquantizers + subquantizer_index < dt_size); - distance_table[centroid_index * num_subquantizers + subquantizer_index] = dist; - elements_for_assert += 1; - } - } - assert_eq!(dt_size, elements_for_assert); - distance_table -} - -/// DistanceCalculator encapsulates the code to generate distances between a PQ vector and a query. -pub struct DistanceCalculator { - distance_table: Vec, -} - -impl DistanceCalculator { - pub fn new( - pq: &Pq, - distance_fn: fn(&[f32], &[f32]) -> f32, - query: &[f32], - ) -> DistanceCalculator { - DistanceCalculator { - distance_table: build_distance_table(pq, query, distance_fn), - } - } - - /// distance emits the sum of distances between each centroid in the quantized vector. - pub fn distance(&self, pq_vector: &[u8]) -> f32 { - let mut d = 0.0; - let num_subquantizers = pq_vector.len(); - // maybe we should unroll this loop? - for subquantizer_index in 0..num_subquantizers { - let centroid_index = pq_vector[subquantizer_index] as usize; - d += self.distance_table[centroid_index * num_subquantizers + subquantizer_index] - //d += self.distance_table[m][pq_vector[m] as usize]; - } - d - } -} diff --git a/timescale_vector/src/access_method/sbq.rs b/timescale_vector/src/access_method/sbq.rs new file mode 100644 index 00000000..1fd4cd26 --- /dev/null +++ b/timescale_vector/src/access_method/sbq.rs @@ -0,0 +1,1078 @@ +use super::{ + distance::distance_xor_optimized, + graph::{ListSearchNeighbor, ListSearchResult}, + graph_neighbor_store::GraphNeighborStore, + pg_vector::PgVector, + stats::{ + GreedySearchStats, StatsDistanceComparison, StatsHeapNodeRead, StatsNodeModify, + StatsNodeRead, StatsNodeWrite, WriteStats, + }, + storage::{ArchivedData, NodeDistanceMeasure, Storage}, + storage_common::get_attribute_number_from_index, +}; +use std::{cell::RefCell, collections::HashMap, iter::once, marker::PhantomData, pin::Pin}; + +use pgrx::{ + pg_sys::{InvalidBlockNumber, InvalidOffsetNumber, BLCKSZ}, + PgRelation, +}; +use rkyv::{vec::ArchivedVec, Archive, Deserialize, Serialize}; + +use crate::util::{ + page::PageType, table_slot::TableSlot, tape::Tape, ArchivedItemPointer, HeapPointer, + IndexPointer, ItemPointer, ReadableBuffer, +}; + +use super::{meta_page::MetaPage, neighbor_with_distance::NeighborWithDistance}; +use crate::util::WritableBuffer; + +type SbqVectorElement = u64; +const BITS_STORE_TYPE_SIZE: usize = 64; + +#[derive(Archive, Deserialize, Serialize, Readable, Writeable)] +#[archive(check_bytes)] +#[repr(C)] +pub struct SbqMeans { + count: u64, + means: Vec, + m2: Vec, +} + +impl SbqMeans { + pub unsafe fn load( + index: &PgRelation, + meta_page: &super::meta_page::MetaPage, + stats: &mut S, + ) -> SbqQuantizer { + let mut quantizer = SbqQuantizer::new(meta_page); + if quantizer.use_mean { + if meta_page.get_quantizer_metadata_pointer().is_none() { + pgrx::error!("No SBQ pointer found in meta page"); + } + let quantizer_item_pointer = meta_page.get_quantizer_metadata_pointer().unwrap(); + let bq = SbqMeans::read(index, quantizer_item_pointer, stats); + let archived = bq.get_archived_node(); + + quantizer.load( + archived.count, + archived.means.to_vec(), + archived.m2.to_vec(), + ); + } + quantizer + } + + pub unsafe fn store( + index: &PgRelation, + quantizer: &SbqQuantizer, + stats: &mut S, + ) -> ItemPointer { + let mut tape = Tape::new(index, PageType::SbqMeans); + let node = SbqMeans { + count: quantizer.count, + means: quantizer.mean.to_vec(), + m2: quantizer.m2.to_vec(), + }; + let ptr = node.write(&mut tape, stats); + tape.close(); + ptr + } +} + +#[derive(Clone)] +pub struct SbqQuantizer { + pub use_mean: bool, + training: bool, + pub count: u64, + pub mean: Vec, + pub m2: Vec, + pub num_bits_per_dimension: u8, +} + +impl SbqQuantizer { + fn new(meta_page: &super::meta_page::MetaPage) -> SbqQuantizer { + Self { + use_mean: true, + training: false, + count: 0, + mean: vec![], + m2: vec![], + num_bits_per_dimension: meta_page.get_bq_num_bits_per_dimension(), + } + } + + fn load(&mut self, count: u64, mean: Vec, m2: Vec) { + self.count = count; + self.mean = mean; + self.m2 = m2 + } + + fn quantized_size(&self, full_vector_size: usize) -> usize { + Self::quantized_size_internal(full_vector_size, self.num_bits_per_dimension) + } + + fn quantized_size_internal(full_vector_size: usize, num_bits_per_dimension: u8) -> usize { + let num_bits = full_vector_size * num_bits_per_dimension as usize; + + if num_bits % BITS_STORE_TYPE_SIZE == 0 { + num_bits / BITS_STORE_TYPE_SIZE + } else { + (num_bits / BITS_STORE_TYPE_SIZE) + 1 + } + } + + fn quantized_size_bytes(num_dimensions: usize, num_bits_per_dimension: u8) -> usize { + Self::quantized_size_internal(num_dimensions, num_bits_per_dimension) + * std::mem::size_of::() + } + + fn quantize(&self, full_vector: &[f32]) -> Vec { + assert!(!self.training); + if self.use_mean { + let mut res_vector = vec![0; self.quantized_size(full_vector.len())]; + + if self.num_bits_per_dimension == 1 { + for (i, &v) in full_vector.iter().enumerate() { + if v > self.mean[i] { + res_vector[i / BITS_STORE_TYPE_SIZE] |= 1 << (i % BITS_STORE_TYPE_SIZE); + } + } + } else { + for (i, &v) in full_vector.iter().enumerate() { + let mean = self.mean[i]; + let variance = self.m2[i] / self.count as f32; + let std_dev = variance.sqrt(); + let ranges = self.num_bits_per_dimension + 1; + + let v_z_score = (v - mean) / std_dev; + let index = (v_z_score + 2.0) / (4.0 / ranges as f32); //we consider z scores between -2 and 2 and divide them into {ranges} ranges + + let bit_position = i * self.num_bits_per_dimension as usize; + if index < 1.0 { + //all zeros + } else { + let count_ones = + (index.floor() as usize).min(self.num_bits_per_dimension as usize); + //fill in count_ones bits from the left + // ex count_ones=1: 100 + // ex count_ones=2: 110 + // ex count_ones=3: 111 + for j in 0..count_ones { + res_vector[(bit_position + j) / BITS_STORE_TYPE_SIZE] |= + 1 << ((bit_position + j) % BITS_STORE_TYPE_SIZE); + } + } + } + } + res_vector + } else { + let mut res_vector = vec![0; self.quantized_size(full_vector.len())]; + + for (i, &v) in full_vector.iter().enumerate() { + if v > 0.0 { + res_vector[i / BITS_STORE_TYPE_SIZE] |= 1 << (i % BITS_STORE_TYPE_SIZE); + } + } + + res_vector + } + } + + fn start_training(&mut self, meta_page: &super::meta_page::MetaPage) { + self.training = true; + if self.use_mean { + self.count = 0; + self.mean = vec![0.0; meta_page.get_num_dimensions_to_index() as _]; + if self.num_bits_per_dimension > 1 { + self.m2 = vec![0.0; meta_page.get_num_dimensions_to_index() as _]; + } + } + } + + fn add_sample(&mut self, sample: &[f32]) { + if self.use_mean { + self.count += 1; + assert!(self.mean.len() == sample.len()); + + if self.num_bits_per_dimension > 1 { + assert!(self.m2.len() == sample.len()); + let delta: Vec<_> = self + .mean + .iter() + .zip(sample.iter()) + .map(|(m, s)| s - *m) + .collect(); + + self.mean + .iter_mut() + .zip(sample.iter()) + .for_each(|(m, s)| *m += (s - *m) / self.count as f32); + + let delta2 = self.mean.iter().zip(sample.iter()).map(|(m, s)| s - *m); + + self.m2 + .iter_mut() + .zip(delta.iter()) + .zip(delta2) + .for_each(|((m2, d), d2)| *m2 += d * d2); + } else { + self.mean + .iter_mut() + .zip(sample.iter()) + .for_each(|(m, s)| *m += (s - *m) / self.count as f32); + } + } + } + + fn finish_training(&mut self) { + self.training = false; + } + + fn vector_for_new_node( + &self, + _meta_page: &super::meta_page::MetaPage, + full_vector: &[f32], + ) -> Vec { + self.quantize(&full_vector) + } +} + +pub struct SbqSearchDistanceMeasure { + quantized_vector: Vec, + query: PgVector, + num_dimensions_for_neighbors: usize, + quantized_dimensions: usize, +} + +impl SbqSearchDistanceMeasure { + pub fn new( + quantizer: &SbqQuantizer, + query: PgVector, + num_dimensions_for_neighbors: usize, + ) -> SbqSearchDistanceMeasure { + SbqSearchDistanceMeasure { + quantized_vector: quantizer.quantize(query.to_index_slice()), + query, + num_dimensions_for_neighbors, + quantized_dimensions: quantizer.quantized_size(num_dimensions_for_neighbors), + } + } + + pub fn calculate_bq_distance( + &self, + bq_vector: &[SbqVectorElement], + gns: &GraphNeighborStore, + stats: &mut S, + ) -> f32 { + assert!(bq_vector.len() > 0); + stats.record_quantized_distance_comparison(); + let (a, b) = match gns { + GraphNeighborStore::Disk => { + if self.num_dimensions_for_neighbors > 0 { + debug_assert!(self.quantized_vector.len() >= self.quantized_dimensions); + debug_assert!(bq_vector.len() >= self.quantized_dimensions); + ( + &self.quantized_vector.as_slice()[..self.quantized_dimensions], + &bq_vector[..self.quantized_dimensions], + ) + } else { + debug_assert!(self.quantized_vector.len() == bq_vector.len()); + (self.quantized_vector.as_slice(), bq_vector) + } + } + GraphNeighborStore::Builder(_b) => { + debug_assert!(self.quantized_vector.len() == bq_vector.len()); + (self.quantized_vector.as_slice(), bq_vector) + } + }; + + let count_ones = distance_xor_optimized(a, b); + //dot product is LOWER the more xors that lead to 1 becaues that means a negative times a positive = negative component + //but the distance is 1 - dot product, so the more count_ones the higher the distance. + // one other check for distance(a,a), xor=0, count_ones=0, distance=0 + count_ones as f32 + } +} + +pub struct SbqNodeDistanceMeasure<'a> { + vec: Vec, + storage: &'a SbqSpeedupStorage<'a>, +} + +impl<'a> SbqNodeDistanceMeasure<'a> { + pub unsafe fn with_index_pointer( + storage: &'a SbqSpeedupStorage<'a>, + index_pointer: IndexPointer, + stats: &mut T, + ) -> Self { + let cache = &mut storage.qv_cache.borrow_mut(); + Self { + vec: cache.get(index_pointer, storage, stats).to_vec(), + storage: storage, + } + } +} + +impl<'a> NodeDistanceMeasure for SbqNodeDistanceMeasure<'a> { + unsafe fn get_distance( + &self, + index_pointer: IndexPointer, + stats: &mut T, + ) -> f32 { + let cache = &mut self.storage.qv_cache.borrow_mut(); + let vec1 = cache.get(index_pointer, self.storage, stats); + distance_xor_optimized(vec1, self.vec.as_slice()) as f32 + } +} + +struct QuantizedVectorCache { + quantized_vector_map: HashMap>, +} + +/* should be a LRU cache for quantized vector. For now cheat and never evict + TODO: implement LRU cache +*/ +impl QuantizedVectorCache { + fn new(capacity: usize) -> Self { + Self { + quantized_vector_map: HashMap::with_capacity(capacity), + } + } + + fn get( + &mut self, + index_pointer: IndexPointer, + storage: &SbqSpeedupStorage, + stats: &mut S, + ) -> &[SbqVectorElement] { + self.quantized_vector_map + .entry(index_pointer) + .or_insert_with(|| { + storage.get_quantized_vector_from_index_pointer(index_pointer, stats) + }) + } + + fn must_get(&self, index_pointer: IndexPointer) -> &[SbqVectorElement] { + self.quantized_vector_map.get(&index_pointer).unwrap() + } + + /* Ensure that all these elements are in the cache. If the capacity isn't big enough throw an error. + must_get must succeed on all the elements after this call prior to another get or preload call */ + + fn preload, S: StatsNodeRead>( + &mut self, + index_pointers: I, + storage: &SbqSpeedupStorage, + stats: &mut S, + ) { + for index_pointer in index_pointers { + self.get(index_pointer, storage, stats); + } + } +} + +pub struct SbqSpeedupStorage<'a> { + pub index: &'a PgRelation, + pub distance_fn: fn(&[f32], &[f32]) -> f32, + quantizer: SbqQuantizer, + heap_rel: &'a PgRelation, + heap_attr: pgrx::pg_sys::AttrNumber, + qv_cache: RefCell, + num_dimensions_for_neighbors: usize, +} + +impl<'a> SbqSpeedupStorage<'a> { + pub fn new_for_build( + index: &'a PgRelation, + heap_rel: &'a PgRelation, + meta_page: &super::meta_page::MetaPage, + ) -> SbqSpeedupStorage<'a> { + Self { + index: index, + distance_fn: meta_page.get_distance_function(), + quantizer: SbqQuantizer::new(meta_page), + heap_rel: heap_rel, + heap_attr: get_attribute_number_from_index(index), + qv_cache: RefCell::new(QuantizedVectorCache::new(1000)), + num_dimensions_for_neighbors: meta_page.get_num_dimensions_for_neighbors() as usize, + } + } + + fn load_quantizer( + index_relation: &PgRelation, + meta_page: &super::meta_page::MetaPage, + stats: &mut S, + ) -> SbqQuantizer { + unsafe { SbqMeans::load(&index_relation, meta_page, stats) } + } + + pub fn load_for_insert( + heap_rel: &'a PgRelation, + index_relation: &'a PgRelation, + meta_page: &super::meta_page::MetaPage, + stats: &mut S, + ) -> SbqSpeedupStorage<'a> { + Self { + index: index_relation, + distance_fn: meta_page.get_distance_function(), + quantizer: Self::load_quantizer(index_relation, meta_page, stats), + heap_rel: heap_rel, + heap_attr: get_attribute_number_from_index(index_relation), + qv_cache: RefCell::new(QuantizedVectorCache::new(1000)), + num_dimensions_for_neighbors: meta_page.get_num_dimensions_for_neighbors() as usize, + } + } + + pub fn load_for_search( + index_relation: &'a PgRelation, + heap_relation: &'a PgRelation, + quantizer: &SbqQuantizer, + meta_page: &super::meta_page::MetaPage, + ) -> SbqSpeedupStorage<'a> { + Self { + index: index_relation, + distance_fn: meta_page.get_distance_function(), + //OPT: get rid of clone + quantizer: quantizer.clone(), + heap_rel: heap_relation, + heap_attr: get_attribute_number_from_index(index_relation), + qv_cache: RefCell::new(QuantizedVectorCache::new(1000)), + num_dimensions_for_neighbors: meta_page.get_num_dimensions_for_neighbors() as usize, + } + } + + fn get_quantized_vector_from_index_pointer( + &self, + index_pointer: IndexPointer, + stats: &mut S, + ) -> Vec { + let rn = unsafe { SbqNode::read(self.index, index_pointer, stats) }; + let node = rn.get_archived_node(); + node.bq_vector.as_slice().to_vec() + } + + fn write_quantizer_metadata(&self, stats: &mut S) { + if self.quantizer.use_mean { + let index_pointer = unsafe { SbqMeans::store(&self.index, &self.quantizer, stats) }; + super::meta_page::MetaPage::update_quantizer_metadata_pointer( + &self.index, + index_pointer, + stats, + ); + } + } + + fn visit_lsn_internal( + &self, + lsr: &mut ListSearchResult< + as Storage>::QueryDistanceMeasure, + as Storage>::LSNPrivateData, + >, + lsn_index_pointer: IndexPointer, + gns: &GraphNeighborStore, + ) { + match gns { + GraphNeighborStore::Disk => { + let rn_visiting = + unsafe { SbqNode::read(self.index, lsn_index_pointer, &mut lsr.stats) }; + let node_visiting = rn_visiting.get_archived_node(); + //OPT: get neighbors from private data just like plain storage in the self.num_dimensions_for_neighbors == 0 case + let neighbors = node_visiting.get_index_pointer_to_neighbors(); + + for (i, &neighbor_index_pointer) in neighbors.iter().enumerate() { + if !lsr.prepare_insert(neighbor_index_pointer) { + continue; + } + + let distance = if self.num_dimensions_for_neighbors > 0 { + let bq_vector = node_visiting.neighbor_vectors[i].as_slice(); + lsr.sdm.as_ref().unwrap().calculate_bq_distance( + bq_vector, + gns, + &mut lsr.stats, + ) + } else { + let rn_neighbor = unsafe { + SbqNode::read(self.index, neighbor_index_pointer, &mut lsr.stats) + }; + let node_neighbor = rn_neighbor.get_archived_node(); + let bq_vector = node_neighbor.bq_vector.as_slice(); + lsr.sdm.as_ref().unwrap().calculate_bq_distance( + bq_vector, + gns, + &mut lsr.stats, + ) + }; + + let lsn = ListSearchNeighbor::new( + neighbor_index_pointer, + distance, + PhantomData::, + ); + + lsr.insert_neighbor(lsn); + } + } + GraphNeighborStore::Builder(b) => { + let neighbors = b.get_neighbors(lsn_index_pointer); + for &neighbor_index_pointer in neighbors.iter() { + if !lsr.prepare_insert(neighbor_index_pointer) { + continue; + } + let mut cache = self.qv_cache.borrow_mut(); + let bq_vector = cache.get(neighbor_index_pointer, self, &mut lsr.stats); + let distance = lsr.sdm.as_ref().unwrap().calculate_bq_distance( + bq_vector, + gns, + &mut lsr.stats, + ); + + let lsn = ListSearchNeighbor::new( + neighbor_index_pointer, + distance, + PhantomData::, + ); + + lsr.insert_neighbor(lsn); + } + } + } + } + + unsafe fn get_heap_table_slot_from_heap_pointer( + &self, + heap_pointer: HeapPointer, + stats: &mut T, + ) -> TableSlot { + TableSlot::new(self.heap_rel, heap_pointer, stats) + } +} + +pub type SbqSpeedupStorageLsnPrivateData = PhantomData; //no data stored + +impl<'a> Storage for SbqSpeedupStorage<'a> { + type QueryDistanceMeasure = SbqSearchDistanceMeasure; + type NodeDistanceMeasure<'b> = SbqNodeDistanceMeasure<'b> where Self: 'b; + type ArchivedType = ArchivedSbqNode; + type LSNPrivateData = SbqSpeedupStorageLsnPrivateData; //no data stored + + fn page_type() -> PageType { + PageType::SbqNode + } + + fn create_node( + &self, + full_vector: &[f32], + heap_pointer: HeapPointer, + meta_page: &MetaPage, + tape: &mut Tape, + stats: &mut S, + ) -> ItemPointer { + let bq_vector = self.quantizer.vector_for_new_node(meta_page, full_vector); + + let node = SbqNode::with_meta( + &self.quantizer, + heap_pointer, + &meta_page, + bq_vector.as_slice(), + ); + + let index_pointer: IndexPointer = node.write(tape, stats); + index_pointer + } + + fn start_training(&mut self, meta_page: &super::meta_page::MetaPage) { + self.quantizer.start_training(meta_page); + } + + fn add_sample(&mut self, sample: &[f32]) { + self.quantizer.add_sample(sample); + } + + fn finish_training(&mut self, stats: &mut WriteStats) { + self.quantizer.finish_training(); + self.write_quantizer_metadata(stats); + } + + fn finalize_node_at_end_of_build( + &mut self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &Vec, + stats: &mut S, + ) { + let mut cache = self.qv_cache.borrow_mut(); + /* It's important to preload cache with all the items since you can run into deadlocks + if you try to fetch a quantized vector while holding the SbqNode::modify lock */ + let iter = neighbors + .iter() + .map(|n| n.get_index_pointer_to_neighbor()) + .chain(once(index_pointer)); + cache.preload(iter, self, stats); + + let node = unsafe { SbqNode::modify(self.index, index_pointer, stats) }; + let mut archived = node.get_archived_node(); + archived.as_mut().set_neighbors(neighbors, &meta, &cache); + + node.commit(); + } + + unsafe fn get_node_distance_measure<'b, S: StatsNodeRead>( + &'b self, + index_pointer: IndexPointer, + stats: &mut S, + ) -> SbqNodeDistanceMeasure<'b> { + SbqNodeDistanceMeasure::with_index_pointer(self, index_pointer, stats) + } + + fn get_query_distance_measure(&self, query: PgVector) -> SbqSearchDistanceMeasure { + return SbqSearchDistanceMeasure::new( + &self.quantizer, + query, + self.num_dimensions_for_neighbors, + ); + } + + fn get_full_distance_for_resort( + &self, + qdm: &Self::QueryDistanceMeasure, + _index_pointer: IndexPointer, + heap_pointer: HeapPointer, + meta_page: &MetaPage, + stats: &mut S, + ) -> f32 { + let slot = unsafe { self.get_heap_table_slot_from_heap_pointer(heap_pointer, stats) }; + + let datum = unsafe { slot.get_attribute(self.heap_attr).unwrap() }; + let vec = unsafe { PgVector::from_datum(datum, meta_page, false, true) }; + self.get_distance_function()(vec.to_full_slice(), qdm.query.to_full_slice()) + } + + fn get_neighbors_with_distances_from_disk( + &self, + neighbors_of: ItemPointer, + result: &mut Vec, + stats: &mut S, + ) { + let rn = unsafe { SbqNode::read(self.index, neighbors_of, stats) }; + let archived = rn.get_archived_node(); + let q = archived.bq_vector.as_slice(); + + for n in rn.get_archived_node().iter_neighbors() { + //OPT: we can optimize this if num_dimensions_for_neighbors == num_dimensions_to_index + let rn1 = unsafe { SbqNode::read(self.index, n, stats) }; + stats.record_quantized_distance_comparison(); + let dist = distance_xor_optimized(q, rn1.get_archived_node().bq_vector.as_slice()); + result.push(NeighborWithDistance::new(n, dist as f32)) + } + } + + /* get_lsn and visit_lsn are different because the distance + comparisons for SBQ get the vector from different places */ + fn create_lsn_for_init_id( + &self, + lsr: &mut ListSearchResult, + index_pointer: ItemPointer, + gns: &GraphNeighborStore, + ) -> ListSearchNeighbor { + if !lsr.prepare_insert(index_pointer) { + panic!("should not have had an init id already inserted"); + } + + let rn = unsafe { SbqNode::read(self.index, index_pointer, &mut lsr.stats) }; + let node = rn.get_archived_node(); + + let distance = lsr.sdm.as_ref().unwrap().calculate_bq_distance( + node.bq_vector.as_slice(), + gns, + &mut lsr.stats, + ); + + ListSearchNeighbor::new(index_pointer, distance, PhantomData::) + } + + fn visit_lsn( + &self, + lsr: &mut ListSearchResult, + lsn_idx: usize, + gns: &GraphNeighborStore, + ) { + let lsn_index_pointer = lsr.get_lsn_by_idx(lsn_idx).index_pointer; + self.visit_lsn_internal(lsr, lsn_index_pointer, gns); + } + + fn return_lsn( + &self, + lsn: &ListSearchNeighbor, + stats: &mut GreedySearchStats, + ) -> HeapPointer { + let lsn_index_pointer = lsn.index_pointer; + let rn = unsafe { SbqNode::read(self.index, lsn_index_pointer, stats) }; + let node = rn.get_archived_node(); + let heap_pointer = node.heap_item_pointer.deserialize_item_pointer(); + heap_pointer + } + + fn set_neighbors_on_disk( + &self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &[NeighborWithDistance], + stats: &mut S, + ) { + let mut cache = QuantizedVectorCache::new(neighbors.len() + 1); + + /* It's important to preload cache with all the items since you can run into deadlocks + if you try to fetch a quantized vector while holding the SbqNode::modify lock */ + let iter = neighbors + .iter() + .map(|n| n.get_index_pointer_to_neighbor()) + .chain(once(index_pointer)); + cache.preload(iter, self, stats); + + let node = unsafe { SbqNode::modify(self.index, index_pointer, stats) }; + let mut archived = node.get_archived_node(); + archived.as_mut().set_neighbors(neighbors, &meta, &cache); + node.commit(); + } + + fn get_distance_function(&self) -> fn(&[f32], &[f32]) -> f32 { + self.distance_fn + } +} + +use timescale_vector_derive::{Readable, Writeable}; + +#[derive(Archive, Deserialize, Serialize, Readable, Writeable)] +#[archive(check_bytes)] +pub struct SbqNode { + pub heap_item_pointer: HeapPointer, + pub bq_vector: Vec, //don't use SbqVectorElement because we don't want to change the size in on-disk format by accident + neighbor_index_pointers: Vec, + neighbor_vectors: Vec>, //don't use SbqVectorElement because we don't want to change the size in on-disk format by accident +} + +impl SbqNode { + pub fn with_meta( + quantizer: &SbqQuantizer, + heap_pointer: HeapPointer, + meta_page: &MetaPage, + bq_vector: &[SbqVectorElement], + ) -> Self { + Self::new( + heap_pointer, + meta_page.get_num_neighbors() as usize, + meta_page.get_num_dimensions_to_index() as usize, + meta_page.get_num_dimensions_for_neighbors() as usize, + quantizer.num_bits_per_dimension, + bq_vector, + ) + } + + fn new( + heap_pointer: HeapPointer, + num_neighbors: usize, + _num_dimensions: usize, + num_dimensions_for_neighbors: usize, + num_bits_per_dimension: u8, + bq_vector: &[SbqVectorElement], + ) -> Self { + // always use vectors of num_neighbors in length because we never want the serialized size of a Node to change + let neighbor_index_pointers: Vec<_> = (0..num_neighbors) + .map(|_| ItemPointer::new(InvalidBlockNumber, InvalidOffsetNumber)) + .collect(); + + let neighbor_vectors: Vec<_> = if num_dimensions_for_neighbors > 0 { + (0..num_neighbors) + .map(|_| { + vec![ + 0; + SbqQuantizer::quantized_size_internal( + num_dimensions_for_neighbors as _, + num_bits_per_dimension + ) + ] + }) + .collect() + } else { + vec![] + }; + + Self { + heap_item_pointer: heap_pointer, + bq_vector: bq_vector.to_vec(), + neighbor_index_pointers: neighbor_index_pointers, + neighbor_vectors: neighbor_vectors, + } + } + + fn test_size( + num_neighbors: usize, + num_dimensions: usize, + num_dimensions_for_neighbors: usize, + num_bits_per_dimension: u8, + ) -> usize { + let v: Vec = + vec![0; SbqQuantizer::quantized_size_internal(num_dimensions, num_bits_per_dimension)]; + let hp = HeapPointer::new(InvalidBlockNumber, InvalidOffsetNumber); + let n = Self::new( + hp, + num_neighbors, + num_dimensions, + num_dimensions_for_neighbors, + num_bits_per_dimension, + &v, + ); + n.serialize_to_vec().len() + } + + pub fn get_default_num_neighbors( + num_dimensions: usize, + num_dimensions_for_neighbors: usize, + num_bits_per_dimension: u8, + ) -> usize { + //how many neighbors can fit on one page? That's what we choose. + + //we first overapproximate the number of neighbors and then double check by actually calculating the size of the SbqNode. + + //blocksize - 100 bytes for the padding/header/etc. + let page_size = BLCKSZ as usize - 50; + //one quantized_vector takes this many bytes + let vec_size = + SbqQuantizer::quantized_size_bytes(num_dimensions as usize, num_bits_per_dimension) + 1; + //start from the page size then subtract the heap_item_pointer and bq_vector elements of SbqNode. + let starting = BLCKSZ as usize - std::mem::size_of::() - vec_size; + //one neigbors contribution to neighbor_index_pointers + neighbor_vectors in SbqNode. + let one_neighbor = vec_size + std::mem::size_of::(); + + let mut num_neighbors_overapproximate: usize = starting / one_neighbor; + while num_neighbors_overapproximate > 0 { + let serialized_size = SbqNode::test_size( + num_neighbors_overapproximate as usize, + num_dimensions as usize, + num_dimensions_for_neighbors as usize, + num_bits_per_dimension, + ); + if serialized_size <= page_size { + return num_neighbors_overapproximate; + } + num_neighbors_overapproximate -= 1; + } + pgrx::error!( + "Could not find a valid number of neighbors for the default value. Please specify one." + ); + } +} + +impl ArchivedSbqNode { + fn neighbor_index_pointer(self: Pin<&mut Self>) -> Pin<&mut ArchivedVec> { + unsafe { self.map_unchecked_mut(|s| &mut s.neighbor_index_pointers) } + } + + fn neighbor_vector(self: Pin<&mut Self>) -> Pin<&mut ArchivedVec>> { + unsafe { self.map_unchecked_mut(|s| &mut s.neighbor_vectors) } + } + + fn set_neighbors( + mut self: Pin<&mut Self>, + neighbors: &[NeighborWithDistance], + meta_page: &MetaPage, + cache: &QuantizedVectorCache, + ) { + for (i, new_neighbor) in neighbors.iter().enumerate() { + let mut a_index_pointer = self.as_mut().neighbor_index_pointer().index_pin(i); + let ip = new_neighbor.get_index_pointer_to_neighbor(); + //TODO hate that we have to set each field like this + a_index_pointer.block_number = ip.block_number; + a_index_pointer.offset = ip.offset; + + if meta_page.get_num_dimensions_for_neighbors() > 0 { + let quantized = &cache.must_get(ip)[..SbqQuantizer::quantized_size_internal( + meta_page.get_num_dimensions_for_neighbors() as _, + meta_page.get_bq_num_bits_per_dimension(), + )]; + let mut neighbor_vector = self.as_mut().neighbor_vector().index_pin(i); + for (index_in_q_vec, val) in quantized.iter().enumerate() { + let mut x = neighbor_vector.as_mut().index_pin(index_in_q_vec); + *x = *val; + } + } + } + //set the marker that the list ended + if neighbors.len() < meta_page.get_num_neighbors() as _ { + let mut past_last_index_pointers = + self.neighbor_index_pointer().index_pin(neighbors.len()); + past_last_index_pointers.block_number = InvalidBlockNumber; + past_last_index_pointers.offset = InvalidOffsetNumber; + } + } + + pub fn num_neighbors(&self) -> usize { + self.neighbor_index_pointers + .iter() + .position(|f| f.block_number == InvalidBlockNumber) + .unwrap_or(self.neighbor_index_pointers.len()) + } + + pub fn iter_neighbors(&self) -> impl Iterator + '_ { + self.neighbor_index_pointers + .iter() + .take(self.num_neighbors()) + .map(|ip| ip.deserialize_item_pointer()) + } +} + +impl ArchivedData for ArchivedSbqNode { + fn with_data(data: &mut [u8]) -> Pin<&mut ArchivedSbqNode> { + ArchivedSbqNode::with_data(data) + } + + fn get_index_pointer_to_neighbors(&self) -> Vec { + self.iter_neighbors().collect() + } + + fn is_deleted(&self) -> bool { + self.heap_item_pointer.offset == InvalidOffsetNumber + } + + fn delete(self: Pin<&mut Self>) { + //TODO: actually optimize the deletes by removing index tuples. For now just mark it. + let mut heap_pointer = unsafe { self.map_unchecked_mut(|s| &mut s.heap_item_pointer) }; + heap_pointer.offset = InvalidOffsetNumber; + heap_pointer.block_number = InvalidBlockNumber; + } + + fn get_heap_item_pointer(&self) -> HeapPointer { + self.heap_item_pointer.deserialize_item_pointer() + } +} + +#[cfg(any(test, feature = "pg_test"))] +#[pgrx::pg_schema] +mod tests { + use pgrx::*; + + #[pg_test] + unsafe fn test_bq_speedup_storage_index_creation_default_neighbors() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "storage_layout = io_optimized", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_bq_speedup_storage_index_creation_few_neighbors() -> spi::Result<()> { + //a test with few neighbors tests the case that nodes share a page, which has caused deadlocks in the past. + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "num_neighbors=10, storage_layout = io_optimized", + )?; + Ok(()) + } + + #[test] + fn test_bq_speedup_storage_delete_vacuum_plain() { + crate::access_method::vacuum::tests::test_delete_vacuum_plain_scaffold( + "num_neighbors = 10, storage_layout = io_optimized", + ); + } + + #[test] + fn test_bq_speedup_storage_delete_vacuum_full() { + crate::access_method::vacuum::tests::test_delete_vacuum_full_scaffold( + "num_neighbors = 38, storage_layout = io_optimized", + ); + } + + #[pg_test] + unsafe fn test_bq_speedup_storage_empty_table_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_empty_table_insert_scaffold( + "num_neighbors=38, storage_layout = io_optimized", + ) + } + + #[pg_test] + unsafe fn test_bq_speedup_storage_insert_empty_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_insert_empty_insert_scaffold( + "num_neighbors=38, storage_layout = io_optimized", + ) + } + + #[pg_test] + unsafe fn test_bq_speedup_storage_index_creation_num_dimensions() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "storage_layout = io_optimized, num_dimensions=768", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_bq_speedup_storage_index_updates() -> spi::Result<()> { + crate::access_method::build::tests::test_index_updates( + "storage_layout = io_optimized, num_neighbors=10", + 300, + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_bq_speedup_compressed_index_creation_default_neighbors() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "storage_layout = memory_optimized", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_bq_compressed_storage_index_creation_few_neighbors() -> spi::Result<()> { + //a test with few neighbors tests the case that nodes share a page, which has caused deadlocks in the past. + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "num_neighbors=10, storage_layout = memory_optimized", + )?; + Ok(()) + } + + #[test] + fn test_bq_compressed_storage_delete_vacuum_plain() { + crate::access_method::vacuum::tests::test_delete_vacuum_plain_scaffold( + "num_neighbors = 10, storage_layout = memory_optimized", + ); + } + + #[test] + fn test_bq_compressed_storage_delete_vacuum_full() { + crate::access_method::vacuum::tests::test_delete_vacuum_full_scaffold( + "num_neighbors = 38, storage_layout = memory_optimized", + ); + } + + #[pg_test] + unsafe fn test_bq_compressed_storage_empty_table_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_empty_table_insert_scaffold( + "num_neighbors=38, storage_layout = memory_optimized", + ) + } + + #[pg_test] + unsafe fn test_bq_compressed_storage_insert_empty_insert() -> spi::Result<()> { + crate::access_method::build::tests::test_insert_empty_insert_scaffold( + "num_neighbors=38, storage_layout = memory_optimized", + ) + } + + #[pg_test] + unsafe fn test_bq_compressed_storage_index_creation_num_dimensions() -> spi::Result<()> { + crate::access_method::build::tests::test_index_creation_and_accuracy_scaffold( + "storage_layout = memory_optimized, num_dimensions=768", + )?; + Ok(()) + } + + #[pg_test] + unsafe fn test_bq_compressed_storage_index_updates() -> spi::Result<()> { + crate::access_method::build::tests::test_index_updates( + "storage_layout = memory_optimized, num_neighbors=10", + 300, + )?; + Ok(()) + } +} diff --git a/timescale_vector/src/access_method/scan.rs b/timescale_vector/src/access_method/scan.rs index 58e6ed77..9f316b58 100644 --- a/timescale_vector/src/access_method/scan.rs +++ b/timescale_vector/src/access_method/scan.rs @@ -1,86 +1,293 @@ +use std::collections::BinaryHeap; + use pgrx::{pg_sys::InvalidOffsetNumber, *}; use crate::{ access_method::{ - disk_index_graph::DiskIndexGraph, graph::VectorProvider, meta_page::MetaPage, - model::PgVector, + graph_neighbor_store::GraphNeighborStore, meta_page::MetaPage, pg_vector::PgVector, + sbq::SbqSpeedupStorage, }, - util::{buffer::PinnedBufferShare, HeapPointer}, + util::{buffer::PinnedBufferShare, HeapPointer, IndexPointer}, +}; + +use super::{ + graph::{Graph, ListSearchResult}, + plain_storage::{PlainDistanceMeasure, PlainStorage, PlainStorageLsnPrivateData}, + sbq::{SbqMeans, SbqQuantizer, SbqSearchDistanceMeasure, SbqSpeedupStorageLsnPrivateData}, + stats::QuantizerStats, + storage::{Storage, StorageType}, }; -use super::graph::ListSearchResult; +/* Be very careful not to transfer PgRelations in the state, as they can change between calls. That means we shouldn't be +using lifetimes here. Everything should be owned */ +enum StorageState { + SbqSpeedup( + SbqQuantizer, + TSVResponseIterator, + ), + Plain(TSVResponseIterator), +} + +/* no lifetime usage here. */ +struct TSVScanState { + storage: *mut StorageState, + distance_fn: Option f32>, + meta_page: MetaPage, + last_buffer: Option, +} + +impl TSVScanState { + fn new(meta_page: MetaPage) -> Self { + Self { + storage: std::ptr::null_mut(), + distance_fn: None, + meta_page: meta_page, + last_buffer: None, + } + } + + fn initialize( + &mut self, + index: &PgRelation, + heap: &PgRelation, + query: PgVector, + search_list_size: usize, + ) { + let meta_page = MetaPage::fetch(&index); + let storage = meta_page.get_storage_type(); + let distance = meta_page.get_distance_function(); + + let store_type = match storage { + StorageType::Plain => { + let stats = QuantizerStats::new(); + let bq = + PlainStorage::load_for_search(index, heap, meta_page.get_distance_function()); + let it = + TSVResponseIterator::new(&bq, index, query, search_list_size, meta_page, stats); + StorageState::Plain(it) + } + StorageType::SbqSpeedup | StorageType::SbqCompression => { + let mut stats = QuantizerStats::new(); + let quantizer = unsafe { SbqMeans::load(index, &meta_page, &mut stats) }; + let bq = SbqSpeedupStorage::load_for_search(index, heap, &quantizer, &meta_page); + let it = + TSVResponseIterator::new(&bq, index, query, search_list_size, meta_page, stats); + StorageState::SbqSpeedup(quantizer, it) + } + }; + + self.storage = PgMemoryContexts::CurrentMemoryContext.leak_and_drop_on_delete(store_type); + self.distance_fn = Some(distance); + } +} + +struct ResortData { + heap_pointer: HeapPointer, + index_pointer: IndexPointer, + distance: f32, +} + +impl PartialEq for ResortData { + fn eq(&self, other: &Self) -> bool { + self.heap_pointer == other.heap_pointer + } +} + +impl PartialOrd for ResortData { + fn partial_cmp(&self, other: &Self) -> Option { + //notice the reverse here. Other is the one that is being compared to self + //this allows us to have a min heap + other.distance.partial_cmp(&self.distance) + } +} + +impl Eq for ResortData {} + +impl Ord for ResortData { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.partial_cmp(other).unwrap() + } +} + +struct StreamingStats { + count: i32, + mean: f32, + m2: f32, + max_distance: f32, +} + +impl StreamingStats { + fn new(_resort_size: usize) -> Self { + Self { + count: 0, + mean: 0.0, + m2: 0.0, + max_distance: 0.0, + } + } + + fn update_base_stats(&mut self, distance: f32) { + if distance == 0.0 { + return; + } + self.count += 1; + let delta = distance - self.mean; + self.mean += delta / self.count as f32; + let delta2 = distance - self.mean; + self.m2 += delta * delta2; + } + + fn variance(&self) -> f32 { + if self.count < 2 { + return 0.0; + } + self.m2 / (self.count - 1) as f32 + } -struct TSVResponseIterator<'a> { - query: Vec, - lsr: ListSearchResult, + fn update(&mut self, distance: f32, diff: f32) { + //base stats only on first resort_size elements + self.update_base_stats(diff); + self.max_distance = self.max_distance.max(distance); + } +} + +struct TSVResponseIterator { + lsr: ListSearchResult, search_list_size: usize, - current: usize, - last_buffer: Option>, + meta_page: MetaPage, + quantizer_stats: QuantizerStats, + resort_size: usize, + resort_buffer: BinaryHeap, + streaming_stats: StreamingStats, + next_calls: i32, + next_calls_with_resort: i32, + full_distance_comparisons: i32, } -impl<'a> TSVResponseIterator<'a> { - fn new(index: &PgRelation, query: &[f32], search_list_size: usize) -> Self { - let meta_page = MetaPage::read(&index); - let use_pq = meta_page.get_use_pq(); - let mut graph = - DiskIndexGraph::new(&index, VectorProvider::new(None, None, use_pq, use_pq)); - use super::graph::Graph; - let lsr = graph.greedy_search_streaming_init(&index, query); +impl TSVResponseIterator { + fn new>( + storage: &S, + index: &PgRelation, + query: PgVector, + search_list_size: usize, + //FIXME? + _meta_page: MetaPage, + quantizer_stats: QuantizerStats, + ) -> Self { + let mut meta_page = MetaPage::fetch(&index); + let graph = Graph::new(GraphNeighborStore::Disk, &mut meta_page); + + let lsr = graph.greedy_search_streaming_init(query, search_list_size, storage); + let resort_size = super::guc::TSV_RESORT_SIZE.get() as usize; + Self { - query: query.to_vec(), search_list_size, lsr, - current: 0, - last_buffer: None, + meta_page, + quantizer_stats, + resort_size, + resort_buffer: BinaryHeap::with_capacity(resort_size), + streaming_stats: StreamingStats::new(resort_size), + next_calls: 0, + next_calls_with_resort: 0, + full_distance_comparisons: 0, } } } -impl<'a> TSVResponseIterator<'a> { - fn next(&mut self, index: &'a PgRelation) -> Option { - let meta_page = MetaPage::read(&index); - let use_pq = meta_page.get_use_pq(); - let mut graph = - DiskIndexGraph::new(&index, VectorProvider::new(None, None, use_pq, use_pq)); - use super::graph::Graph; +impl TSVResponseIterator { + fn next>( + &mut self, + storage: &S, + ) -> Option<(HeapPointer, IndexPointer)> { + self.next_calls += 1; + let graph = Graph::new(GraphNeighborStore::Disk, &mut self.meta_page); /* Iterate until we find a non-deleted tuple */ loop { - graph.greedy_search_iterate(&mut self.lsr, index, &self.query, self.search_list_size); + graph.greedy_search_iterate(&mut self.lsr, self.search_list_size, None, storage); - let item = self.lsr.consume(); + let item = self.lsr.consume(storage); match item { Some((heap_pointer, index_pointer)) => { - /* - * An index scan must maintain a pin on the index page holding the - * item last returned by amgettuple - * - * https://www.postgresql.org/docs/current/index-locking.html - */ - self.last_buffer = - Some(PinnedBufferShare::read(index, index_pointer.block_number)); - - self.current = self.current + 1; if heap_pointer.offset == InvalidOffsetNumber { /* deleted tuple */ continue; } - return Some(heap_pointer); + return Some((heap_pointer, index_pointer)); } None => { - self.last_buffer = None; return None; } } } } -} -struct TSVScanState<'a> { - iterator: *mut TSVResponseIterator<'a>, + fn next_with_resort>( + &mut self, + _index: &PgRelation, + storage: &S, + ) -> Option<(HeapPointer, IndexPointer)> { + self.next_calls_with_resort += 1; + if self.resort_buffer.capacity() == 0 { + return self.next(storage); + } + + while self.resort_buffer.len() < 2 + || self.streaming_stats.count < 2 + || (self.streaming_stats.max_distance - self.resort_buffer.peek().unwrap().distance) + < self.streaming_stats.variance().sqrt() * (self.resort_size as f32 / 100.0) + { + match self.next(storage) { + Some((heap_pointer, index_pointer)) => { + self.full_distance_comparisons += 1; + let distance = storage.get_full_distance_for_resort( + self.lsr.sdm.as_ref().unwrap(), + index_pointer, + heap_pointer, + &self.meta_page, + &mut self.lsr.stats, + ); + + if self.resort_buffer.len() > 1 { + self.streaming_stats + .update(distance, distance - self.streaming_stats.max_distance); + } + + self.resort_buffer.push(ResortData { + heap_pointer, + index_pointer, + distance, + }); + } + None => { + break; + } + } + } + + /*error!( + "Resort buffer size: {}, mean: {}, variance: {}, max_distance: {}: diff: {}", + self.resort_buffer.len(), + self.streaming_stats.mean(), + self.streaming_stats.variance().sqrt(), + self.streaming_stats.max_distance, + self.streaming_stats.max_distance - self.resort_buffer.peek().unwrap().distance + );*/ + + match self.resort_buffer.pop() { + Some(rd) => Some((rd.heap_pointer, rd.index_pointer)), + None => None, + } + } } +/* +struct TSVScanState<'a, 'b> { + iterator: *mut TSVResponseIterator<'a, 'b>, +} +*/ #[pg_guard] pub extern "C" fn ambeginscan( index_relation: pg_sys::Relation, @@ -94,10 +301,10 @@ pub extern "C" fn ambeginscan( norderbys, )) }; - let state = TSVScanState { - iterator: std::ptr::null_mut(), - }; + let indexrel = unsafe { PgRelation::from_pg(index_relation) }; + let meta_page = MetaPage::fetch(&indexrel); + let state: TSVScanState = TSVScanState::new(meta_page); scandesc.opaque = PgMemoryContexts::CurrentMemoryContext.leak_and_drop_on_delete(state) as void_mut_ptr; @@ -120,7 +327,7 @@ pub extern "C" fn amrescan( } let mut scan: PgBox = unsafe { PgBox::from_pg(scan) }; let indexrel = unsafe { PgRelation::from_pg(scan.indexRelation) }; - let state = unsafe { (scan.opaque as *mut TSVScanState).as_mut() }.expect("no scandesc state"); + let heaprel = unsafe { PgRelation::from_pg(scan.heapRelation) }; if nkeys > 0 { scan.xs_recheck = true; @@ -129,16 +336,20 @@ pub extern "C" fn amrescan( let orderby_keys = unsafe { std::slice::from_raw_parts(orderbys as *const pg_sys::ScanKeyData, norderbys as _) }; - let vec = unsafe { PgVector::from_datum(orderby_keys[0].sk_argument) }; - let query = unsafe { (*vec).to_slice() }; - //TODO need to set search_list_size correctly - //TODO right now doesn't handle more than LIMIT 100; let search_list_size = super::guc::TSV_QUERY_SEARCH_LIST_SIZE.get() as usize; - let res = TSVResponseIterator::new(&indexrel, query, search_list_size); + let state = unsafe { (scan.opaque as *mut TSVScanState).as_mut() }.expect("no scandesc state"); - state.iterator = PgMemoryContexts::CurrentMemoryContext.leak_and_drop_on_delete(res); + let query = unsafe { + PgVector::from_datum( + orderby_keys[0].sk_argument, + &state.meta_page, + true, /* needed for search */ + true, /* needed for resort */ + ) + }; + state.initialize(&indexrel, &heaprel, query, search_list_size); } #[pg_guard] @@ -146,21 +357,69 @@ pub extern "C" fn amgettuple( scan: pg_sys::IndexScanDesc, _direction: pg_sys::ScanDirection, ) -> bool { - let mut scan: PgBox = unsafe { PgBox::from_pg(scan) }; + let scan: PgBox = unsafe { PgBox::from_pg(scan) }; let state = unsafe { (scan.opaque as *mut TSVScanState).as_mut() }.expect("no scandesc state"); - let iter = unsafe { state.iterator.as_mut() }.expect("no iterator in state"); + //let iter = unsafe { state.iterator.as_mut() }.expect("no iterator in state"); let indexrel = unsafe { PgRelation::from_pg(scan.indexRelation) }; + let heaprel = unsafe { PgRelation::from_pg(scan.heapRelation) }; + + let mut storage = unsafe { state.storage.as_mut() }.expect("no storage in state"); + match &mut storage { + StorageState::SbqSpeedup(quantizer, iter) => { + let bq = SbqSpeedupStorage::load_for_search( + &indexrel, + &heaprel, + quantizer, + &state.meta_page, + ); + let next = iter.next_with_resort(&indexrel, &bq); + get_tuple(state, next, scan) + } + StorageState::Plain(iter) => { + let storage = + PlainStorage::load_for_search(&indexrel, &heaprel, state.distance_fn.unwrap()); + let next = if state.meta_page.get_num_dimensions() + == state.meta_page.get_num_dimensions_to_index() + { + /* no need to resort */ + iter.next(&storage) + } else { + iter.next_with_resort(&indexrel, &storage) + }; + get_tuple(state, next, scan) + } + } +} - /* no need to recheck stuff for now */ +fn get_tuple( + state: &mut TSVScanState, + next: Option<(HeapPointer, IndexPointer)>, + mut scan: PgBox, +) -> bool { scan.xs_recheckorderby = false; - match iter.next(&indexrel) { - Some(heap_pointer) => { + match next { + Some((heap_pointer, index_pointer)) => { let tid_to_set = &mut scan.xs_heaptid; heap_pointer.to_item_pointer_data(tid_to_set); + + /* + * An index scan must maintain a pin on the index page holding the + * item last returned by amgettuple + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + let indexrel = unsafe { PgRelation::from_pg(scan.indexRelation) }; + state.last_buffer = Some(PinnedBufferShare::read( + &indexrel, + index_pointer.block_number, + )); true } - None => false, + None => { + state.last_buffer = None; + false + } } } @@ -175,97 +434,31 @@ pub extern "C" fn amendscan(scan: pg_sys::IndexScanDesc) { let scan: PgBox = unsafe { PgBox::from_pg(scan) }; let state = unsafe { (scan.opaque as *mut TSVScanState).as_mut() }.expect("no scandesc state"); - let iter = unsafe { state.iterator.as_mut() }.expect("no iterator in state"); - debug1!( - "Query stats - node reads:{}, calls: {}, distance comparisons: {}, pq distance comparisons: {}", - iter.lsr.stats.node_reads, - iter.lsr.stats.calls, - iter.lsr.stats.distance_comparisons, - iter.lsr.stats.pq_distance_comparisons, - ); - } -} -#[cfg(any(test, feature = "pg_test"))] -#[pgrx::pg_schema] -mod tests { - use pgrx::*; - - #[pg_test] - unsafe fn test_index_scan() -> spi::Result<()> { - Spi::run(&format!( - "CREATE TABLE test(embedding vector(3)); - - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); - - INSERT INTO test(embedding) SELECT ('[' || g::text ||', 0, 0]')::vector FROM generate_series(0, 100) g; - - CREATE INDEX idxtest - ON test - USING tsv(embedding) - WITH (num_neighbors=30);" - ))?; - - Spi::run(&format!( - " - set enable_seqscan = 0; - select * from test order by embedding <=> '[0,0,0]'; - explain analyze select * from test order by embedding <=> '[0,0,0]'; - ", - ))?; - - Spi::run(&format!( - " - set enable_seqscan = 0; - set tsv.query_search_list_size = 2; - select * from test order by embedding <=> '[0,0,0]'; - ", - ))?; - - let res: Option = Spi::get_one(&format!( - " - set enable_seqscan = 0; - set tsv.query_search_list_size = 2; - WITH cte as (select * from test order by embedding <=> '[0,0,0]') SELECT count(*) from cte; - ", - ))?; - - assert_eq!(104, res.unwrap(), "Testing query over entire table"); - - Spi::run(&format!( - " - drop index idxtest; - ", - ))?; - - Ok(()) + let mut storage = unsafe { state.storage.as_mut() }.expect("no storage in state"); + match &mut storage { + StorageState::SbqSpeedup(_bq, iter) => end_scan::(iter), + StorageState::Plain(iter) => end_scan::(iter), + } } +} - #[pg_test] - unsafe fn test_index_scan_on_empty_table() -> spi::Result<()> { - Spi::run(&format!( - "CREATE TABLE test(embedding vector(3)); - - CREATE INDEX idxtest - ON test - USING tsv(embedding) - WITH (num_neighbors=30);" - ))?; - - Spi::run(&format!( - " - set enable_seqscan = 0; - select * from test order by embedding <=> '[0,0,0]'; - explain analyze select * from test order by embedding <=> '[0,0,0]'; - ", - ))?; - - Spi::run(&format!( - " - drop index idxtest; - ", - ))?; - - Ok(()) - } +fn end_scan( + iter: &mut TSVResponseIterator, +) { + debug_assert!(iter.quantizer_stats.node_reads == 1); + debug_assert!(iter.quantizer_stats.node_writes == 0); + + debug1!( + "Query stats - reads_index={} reads_heap={} d_total={} d_quantized={} d_full={} next={} resort={} visits={} candidate={}", + iter.lsr.stats.get_node_reads(), + iter.lsr.stats.get_node_heap_reads(), + iter.lsr.stats.get_total_distance_comparisons(), + iter.lsr.stats.get_quantized_distance_comparisons(), + iter.full_distance_comparisons, + iter.next_calls, + iter.next_calls_with_resort, + iter.lsr.stats.get_visited_nodes(), + iter.lsr.stats.get_candidate_nodes(), + ); } diff --git a/timescale_vector/src/access_method/stats.rs b/timescale_vector/src/access_method/stats.rs new file mode 100644 index 00000000..f6e5bd2a --- /dev/null +++ b/timescale_vector/src/access_method/stats.rs @@ -0,0 +1,282 @@ +use std::time::Instant; + +pub trait StatsNodeRead { + fn record_read(&mut self); +} + +pub trait StatsHeapNodeRead { + fn record_heap_read(&mut self); +} + +pub trait StatsNodeModify { + fn record_modify(&mut self); +} + +pub trait StatsNodeWrite { + fn record_write(&mut self); +} + +pub trait StatsDistanceComparison { + fn record_full_distance_comparison(&mut self); + fn record_quantized_distance_comparison(&mut self); +} + +pub trait StatsNodeVisit { + fn record_visit(&mut self); + fn record_candidate(&mut self); +} + +#[derive(Debug)] +pub struct PruneNeighborStats { + pub calls: usize, + pub distance_comparisons: usize, + pub node_reads: usize, + pub node_modify: usize, + pub num_neighbors_before_prune: usize, + pub num_neighbors_after_prune: usize, +} + +impl PruneNeighborStats { + pub fn new() -> Self { + PruneNeighborStats { + calls: 0, + distance_comparisons: 0, + node_reads: 0, + node_modify: 0, + num_neighbors_before_prune: 0, + num_neighbors_after_prune: 0, + } + } +} + +impl StatsDistanceComparison for PruneNeighborStats { + fn record_full_distance_comparison(&mut self) { + self.distance_comparisons += 1; + } + + fn record_quantized_distance_comparison(&mut self) { + self.distance_comparisons += 1; + } +} + +impl StatsNodeRead for PruneNeighborStats { + fn record_read(&mut self) { + self.node_reads += 1; + } +} + +impl StatsNodeModify for PruneNeighborStats { + fn record_modify(&mut self) { + self.node_modify += 1; + } +} + +#[derive(Debug)] +pub struct GreedySearchStats { + calls: usize, + full_distance_comparisons: usize, + node_reads: usize, + node_heap_reads: usize, + quantized_distance_comparisons: usize, + visited_nodes: usize, + candidate_nodes: usize, +} + +impl GreedySearchStats { + pub fn new() -> Self { + GreedySearchStats { + calls: 0, + full_distance_comparisons: 0, + node_reads: 0, + node_heap_reads: 0, + quantized_distance_comparisons: 0, + visited_nodes: 0, + candidate_nodes: 0, + } + } + + pub fn combine(&mut self, other: &Self) { + self.calls += other.calls; + self.full_distance_comparisons += other.full_distance_comparisons; + self.node_reads += other.node_reads; + self.node_heap_reads += other.node_heap_reads; + self.quantized_distance_comparisons += other.quantized_distance_comparisons; + } + + pub fn get_calls(&self) -> usize { + self.calls + } + + pub fn get_node_reads(&self) -> usize { + self.node_reads + } + + pub fn get_node_heap_reads(&self) -> usize { + self.node_heap_reads + } + + pub fn get_total_distance_comparisons(&self) -> usize { + self.full_distance_comparisons + self.quantized_distance_comparisons + } + + pub fn get_quantized_distance_comparisons(&self) -> usize { + self.quantized_distance_comparisons + } + + pub fn get_visited_nodes(&self) -> usize { + self.visited_nodes + } + + pub fn get_candidate_nodes(&self) -> usize { + self.candidate_nodes + } + + pub fn get_full_distance_comparisons(&self) -> usize { + self.full_distance_comparisons + } + + pub fn record_call(&mut self) { + self.calls += 1; + } +} + +impl StatsNodeRead for GreedySearchStats { + fn record_read(&mut self) { + self.node_reads += 1; + } +} + +impl StatsHeapNodeRead for GreedySearchStats { + fn record_heap_read(&mut self) { + self.node_heap_reads += 1; + } +} + +impl StatsDistanceComparison for GreedySearchStats { + fn record_full_distance_comparison(&mut self) { + self.full_distance_comparisons += 1; + } + + fn record_quantized_distance_comparison(&mut self) { + self.quantized_distance_comparisons += 1; + } +} + +impl StatsNodeVisit for GreedySearchStats { + fn record_visit(&mut self) { + self.visited_nodes += 1; + } + + fn record_candidate(&mut self) { + self.candidate_nodes += 1; + } +} + +#[derive(Debug)] +pub struct QuantizerStats { + pub node_reads: usize, + pub node_writes: usize, +} + +impl QuantizerStats { + pub fn new() -> Self { + QuantizerStats { + node_reads: 0, + node_writes: 0, + } + } +} + +impl StatsNodeRead for QuantizerStats { + fn record_read(&mut self) { + self.node_reads += 1; + } +} + +impl StatsNodeWrite for QuantizerStats { + fn record_write(&mut self) { + self.node_writes += 1; + } +} +#[derive(Debug)] +pub struct InsertStats { + pub prune_neighbor_stats: PruneNeighborStats, + pub greedy_search_stats: GreedySearchStats, + pub quantizer_stats: QuantizerStats, + pub node_reads: usize, + pub node_modify: usize, + pub node_writes: usize, +} + +impl InsertStats { + pub fn new() -> Self { + return InsertStats { + prune_neighbor_stats: PruneNeighborStats::new(), + greedy_search_stats: GreedySearchStats::new(), + quantizer_stats: QuantizerStats::new(), + node_reads: 0, + node_modify: 0, + node_writes: 0, + }; + } +} + +impl StatsNodeRead for InsertStats { + fn record_read(&mut self) { + self.node_reads += 1; + } +} + +impl StatsNodeModify for InsertStats { + fn record_modify(&mut self) { + self.node_modify += 1; + } +} + +impl StatsNodeWrite for InsertStats { + fn record_write(&mut self) { + self.node_writes += 1; + } +} + +pub struct WriteStats { + pub started: Instant, + pub num_nodes: usize, + pub nodes_read: usize, + pub nodes_modified: usize, + pub nodes_written: usize, + pub prune_stats: PruneNeighborStats, + pub num_neighbors: usize, +} + +impl WriteStats { + pub fn new() -> Self { + Self { + started: Instant::now(), + num_nodes: 0, + prune_stats: PruneNeighborStats::new(), + num_neighbors: 0, + nodes_read: 0, + nodes_modified: 0, + nodes_written: 0, + } + } +} + +impl StatsNodeRead for WriteStats { + fn record_read(&mut self) { + self.nodes_read += 1; + } +} + +impl StatsNodeModify for WriteStats { + fn record_modify(&mut self) { + self.nodes_modified += 1; + } +} + +impl StatsNodeWrite for WriteStats { + fn record_write(&mut self) { + self.nodes_written += 1; + } +} diff --git a/timescale_vector/src/access_method/storage.rs b/timescale_vector/src/access_method/storage.rs new file mode 100644 index 00000000..4cb91b95 --- /dev/null +++ b/timescale_vector/src/access_method/storage.rs @@ -0,0 +1,156 @@ +use std::pin::Pin; + +use crate::util::{page::PageType, tape::Tape, HeapPointer, IndexPointer, ItemPointer}; + +use super::{ + graph::{ListSearchNeighbor, ListSearchResult}, + graph_neighbor_store::GraphNeighborStore, + meta_page::MetaPage, + neighbor_with_distance::NeighborWithDistance, + pg_vector::PgVector, + stats::{ + GreedySearchStats, StatsDistanceComparison, StatsHeapNodeRead, StatsNodeModify, + StatsNodeRead, StatsNodeWrite, WriteStats, + }, +}; + +/// NodeDistanceMeasure keeps the state to make distance comparison between two nodes. +pub trait NodeDistanceMeasure { + unsafe fn get_distance( + &self, + index_pointer: IndexPointer, + stats: &mut S, + ) -> f32; +} + +pub trait ArchivedData { + fn with_data(data: &mut [u8]) -> Pin<&mut Self>; + fn is_deleted(&self) -> bool; + fn delete(self: Pin<&mut Self>); + fn get_heap_item_pointer(&self) -> HeapPointer; + fn get_index_pointer_to_neighbors(&self) -> Vec; +} + +pub trait Storage { + /// A QueryDistanceMeasure keeps the state to make distance comparison between a query given at initialization and a node. + type QueryDistanceMeasure; + /// A NodeDistanceMeasure keeps the state to make distance comparison between a node given at initialization and another node. + type NodeDistanceMeasure<'a>: NodeDistanceMeasure + where + Self: 'a; + type ArchivedType: ArchivedData; + type LSNPrivateData; + + fn page_type() -> PageType; + + fn create_node( + &self, + full_vector: &[f32], + heap_pointer: HeapPointer, + meta_page: &MetaPage, + tape: &mut Tape, + stats: &mut S, + ) -> ItemPointer; + + fn start_training(&mut self, meta_page: &super::meta_page::MetaPage); + fn add_sample(&mut self, sample: &[f32]); + fn finish_training(&mut self, stats: &mut WriteStats); + + fn finalize_node_at_end_of_build( + &mut self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &Vec, + stats: &mut S, + ); + + unsafe fn get_node_distance_measure<'a, S: StatsNodeRead>( + &'a self, + index_pointer: IndexPointer, + stats: &mut S, + ) -> Self::NodeDistanceMeasure<'a>; + + fn get_query_distance_measure(&self, query: PgVector) -> Self::QueryDistanceMeasure; + + fn get_full_distance_for_resort( + &self, + query: &Self::QueryDistanceMeasure, + index_pointer: IndexPointer, + heap_pointer: HeapPointer, + meta_page: &MetaPage, + stats: &mut S, + ) -> f32; + + fn visit_lsn( + &self, + lsr: &mut ListSearchResult, + lsn_idx: usize, + gns: &GraphNeighborStore, + ) where + Self: Sized; + + fn create_lsn_for_init_id( + &self, + lsr: &mut ListSearchResult, + index_pointer: ItemPointer, + gns: &GraphNeighborStore, + ) -> ListSearchNeighbor + where + Self: Sized; + + fn return_lsn( + &self, + lsn: &ListSearchNeighbor, + stats: &mut GreedySearchStats, + ) -> HeapPointer + where + Self: Sized; + + fn get_neighbors_with_distances_from_disk( + &self, + neighbors_of: ItemPointer, + result: &mut Vec, + stats: &mut S, + ); + + fn set_neighbors_on_disk( + &self, + meta: &MetaPage, + index_pointer: IndexPointer, + neighbors: &[NeighborWithDistance], + stats: &mut S, + ); + + fn get_distance_function(&self) -> fn(&[f32], &[f32]) -> f32; +} + +#[derive(PartialEq, Debug)] +pub enum StorageType { + Plain = 0, + SbqSpeedup = 1, + SbqCompression = 2, +} + +pub const DEFAULT_STORAGE_TYPE_STR: &str = "memory_optimized"; + +impl StorageType { + pub fn from_u8(value: u8) -> Self { + match value { + 0 => StorageType::Plain, + 1 => StorageType::SbqSpeedup, + 2 => StorageType::SbqCompression, + _ => panic!("Invalid storage type"), + } + } + + pub fn from_str(value: &str) -> Self { + match value.to_lowercase().as_str() { + "plain" => StorageType::Plain, + "bq_speedup" | "io_optimized" => StorageType::SbqSpeedup, + "bq_compression" | "memory_optimized" => StorageType::SbqCompression, + _ => panic!( + "Invalid storage type. Must be one of 'plain', 'bq_speedup', 'bq_compression'" + ), + } + } +} diff --git a/timescale_vector/src/access_method/storage_common.rs b/timescale_vector/src/access_method/storage_common.rs new file mode 100644 index 00000000..38484363 --- /dev/null +++ b/timescale_vector/src/access_method/storage_common.rs @@ -0,0 +1,10 @@ +use pgrx::{pg_sys, PgRelation}; + +pub fn get_attribute_number_from_index(index: &PgRelation) -> pg_sys::AttrNumber { + unsafe { + let a = index.rd_index; + let natts = (*a).indnatts; + assert!(natts == 1); + (*a).indkey.values.as_slice(natts as _)[0] + } +} diff --git a/timescale_vector/src/access_method/upgrade_test.rs b/timescale_vector/src/access_method/upgrade_test.rs new file mode 100644 index 00000000..0f408b0b --- /dev/null +++ b/timescale_vector/src/access_method/upgrade_test.rs @@ -0,0 +1,191 @@ +#[cfg(test)] +#[pgrx::pg_schema] +pub mod tests { + use pgrx::*; + use std::{fs, path::Path, process::Stdio}; + + fn copy_dir_all(src: impl AsRef, dst: impl AsRef) -> std::io::Result<()> { + fs::create_dir_all(&dst)?; + for entry in fs::read_dir(src)? { + let entry = entry?; + let ty = entry.file_type()?; + if ty.is_dir() { + copy_dir_all(entry.path(), dst.as_ref().join(entry.file_name()))?; + } else { + fs::copy(entry.path(), dst.as_ref().join(entry.file_name()))?; + } + } + Ok(()) + } + + #[test] + #[ignore] + ///This function is only a mock to bring up the test framewokr in test_delete_vacuum + fn test_upgrade() { + pgrx_tests::run_test( + "test_delete_mock_fn", + None, + crate::pg_test::postgresql_conf_options(), + ) + .unwrap(); + + let (mut client, _) = pgrx_tests::client().unwrap(); + + client + .execute( + &format!("DROP EXTENSION IF EXISTS timescale_vector CASCADE;"), + &[], + ) + .unwrap(); + + let current_file = file!(); + + // Convert the file path to an absolute path + let current_dir = std::env::current_dir().unwrap(); + let mut absolute_path = std::path::Path::new(¤t_dir).join(current_file); + absolute_path = absolute_path.ancestors().nth(4).unwrap().to_path_buf(); + + let temp_dir = tempfile::tempdir().unwrap(); + let temp_path = temp_dir.path(); + + copy_dir_all(absolute_path.clone(), temp_dir.path()).unwrap(); + + let pgrx = pgrx_pg_config::Pgrx::from_config().unwrap(); + let pg_version = pg_sys::get_pg_major_version_num(); + let pg_config = pgrx.get(&format!("pg{}", pg_version)).unwrap(); + + let version = "0.0.2"; + let res = std::process::Command::new("git") + .current_dir(temp_path) + .arg("checkout") + .arg("-f") + .arg(version) + .output() + .unwrap(); + assert!( + res.status.success(), + "failed: {:?} {:?} {:?}", + res, + absolute_path, + temp_dir.path() + ); + + // use latest pgrx + let res = std::process::Command::new("cargo") + .current_dir(temp_path.join("timescale_vector")) + .args(["rm", "pgrx"]) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .unwrap(); + assert!(res.status.success(), "failed: {:?}", res); + + let res = std::process::Command::new("cargo") + .current_dir(temp_path.join("timescale_vector")) + .args(["rm", "--dev", "pgrx-tests"]) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .unwrap(); + assert!(res.status.success(), "failed: {:?}", res); + + let res = std::process::Command::new("cargo") + .current_dir(temp_path.join("timescale_vector")) + .args(["add", "-F", &format!("pg{}", pg_version), "pgrx"]) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .unwrap(); + assert!(res.status.success(), "failed: {:?}", res); + + //let contents = fs::read_to_string(temp_path.join("timescale_vector/Cargo.toml")).unwrap(); + //print!("cargo {}", contents); + + let res = std::process::Command::new("cargo") + .current_dir(temp_path.join("timescale_vector")) + .arg("pgrx") + .arg("install") + .arg("--test") + .arg("--pg-config") + .arg(pg_config.path().unwrap()) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .unwrap(); + assert!(res.status.success(), "failed: {:?}", res); + + client + .execute( + &format!( + "CREATE EXTENSION timescale_vector VERSION '{}' CASCADE;", + version + ), + &[], + ) + .unwrap(); + + let suffix = (1..=253) + .map(|i| format!("{}", i)) + .collect::>() + .join(", "); + + client + .batch_execute(&format!( + "CREATE TABLE test(embedding vector(256)); + + select setseed(0.5); + -- generate 300 vectors + INSERT INTO test(embedding) + SELECT + * + FROM ( + SELECT + ('[ 0 , ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 255 * 300) i + GROUP BY + i % 300) g; + + INSERT INTO test(embedding) VALUES ('[1,2,3,{suffix}]'), ('[4,5,6,{suffix}]'), ('[7,8,10,{suffix}]'); + + CREATE INDEX idxtest + ON test + USING tsv(embedding); + " + )) + .unwrap(); + + client.execute("set enable_seqscan = 0;", &[]).unwrap(); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); + assert_eq!(cnt, 303, "count before upgrade"); + + //reinstall myself + let res = std::process::Command::new("cargo") + .arg("pgrx") + .arg("install") + .arg("--test") + .arg("--pg-config") + .arg(pg_config.path().unwrap()) + .stdout(Stdio::inherit()) + .stderr(Stdio::piped()) + .output() + .unwrap(); + assert!(res.status.success(), "failed: {:?}", res); + + //need to recreate the client to avoid double load of GUC. Look into this later. + let (mut client, _) = pgrx_tests::client().unwrap(); + client + .execute( + &format!( + "ALTER EXTENSION timescale_vector UPDATE TO '{}'", + env!("CARGO_PKG_VERSION") + ), + &[], + ) + .unwrap(); + + client.execute("set enable_seqscan = 0;", &[]).unwrap(); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); + assert_eq!(cnt, 303, "count after upgrade"); + } +} diff --git a/timescale_vector/src/access_method/vacuum.rs b/timescale_vector/src/access_method/vacuum.rs index c723f2e2..11945b5e 100644 --- a/timescale_vector/src/access_method/vacuum.rs +++ b/timescale_vector/src/access_method/vacuum.rs @@ -1,14 +1,21 @@ -use pgrx::{pg_sys::FirstOffsetNumber, *}; +use pgrx::{ + pg_sys::{FirstOffsetNumber, IndexBulkDeleteResult}, + *, +}; use crate::{ - access_method::model::ArchivedNode, + access_method::{meta_page::MetaPage, plain_storage::PlainStorage, sbq::SbqSpeedupStorage}, util::{ - page::{PageType, WritablePage}, + page::WritablePage, ports::{PageGetItem, PageGetItemId, PageGetMaxOffsetNumber}, ItemPointer, }, }; +use crate::access_method::storage::ArchivedData; + +use super::storage::{Storage, StorageType}; + #[pg_guard] pub extern "C" fn ambulkdelete( info: *mut pg_sys::IndexVacuumInfo, @@ -29,9 +36,42 @@ pub extern "C" fn ambulkdelete( pg_sys::ForkNumber_MAIN_FORKNUM, ) }; + + let meta_page = MetaPage::fetch(&index_relation); + let storage = meta_page.get_storage_type(); + match storage { + StorageType::SbqSpeedup | StorageType::SbqCompression => { + bulk_delete_for_storage::( + &index_relation, + nblocks, + results, + callback, + callback_state, + ); + } + StorageType::Plain => { + bulk_delete_for_storage::( + &index_relation, + nblocks, + results, + callback, + callback_state, + ); + } + } + results +} + +fn bulk_delete_for_storage( + index: &PgRelation, + nblocks: u32, + results: *mut IndexBulkDeleteResult, + callback: pg_sys::IndexBulkDeleteCallback, + callback_state: *mut ::std::os::raw::c_void, +) { for block_number in 0..nblocks { - let page = unsafe { WritablePage::cleanup(&index_relation, block_number) }; - if page.get_type() != PageType::Node { + let page = unsafe { WritablePage::cleanup(&index, block_number) }; + if page.get_type() != S::page_type() { continue; } let mut modified = false; @@ -45,13 +85,13 @@ pub extern "C" fn ambulkdelete( let item = PageGetItem(*page, item_id) as *mut u8; let len = (*item_id).lp_len(); let data = std::slice::from_raw_parts_mut(item, len as _); - let node = ArchivedNode::with_data(data); + let node = S::ArchivedType::with_data(data); if node.is_deleted() { continue; } - let heap_pointer: ItemPointer = node.heap_item_pointer.deserialize_item_pointer(); + let heap_pointer: ItemPointer = node.get_heap_item_pointer(); let mut ctid: pg_sys::ItemPointerData = pg_sys::ItemPointerData { ..Default::default() }; @@ -71,7 +111,6 @@ pub extern "C" fn ambulkdelete( page.commit(); } } - results } #[pg_guard] @@ -97,11 +136,20 @@ pub extern "C" fn amvacuumcleanup( #[cfg(any(test, feature = "pg_test"))] #[pgrx::pg_schema] -mod tests { +pub mod tests { use pgrx::*; - #[test] - fn test_delete_vacuum_plain() { + #[cfg(test)] + static VAC_PLAIN_MUTEX: once_cell::sync::Lazy> = + once_cell::sync::Lazy::new(std::sync::Mutex::default); + + #[cfg(test)] + pub fn test_delete_vacuum_plain_scaffold(index_options: &str) { + //do not run this test in parallel. (pgrx tests run in a txn rolled back after each test, but we do not have that luxury here). + + use rand::Rng; + let _lock = VAC_PLAIN_MUTEX.lock().unwrap(); + //we need to run vacuum in this test which cannot be run from SPI. //so we cannot use the pg_test framework here. Thus we do a bit of //hackery to bring up the test db and then use a client to run queries against it. @@ -117,26 +165,54 @@ mod tests { let (mut client, _) = pgrx_tests::client().unwrap(); client - .batch_execute( - "CREATE TABLE test_vac(embedding vector(3)); + .batch_execute(&format!( + "CREATE TABLE test_vac(id INT GENERATED ALWAYS AS IDENTITY, embedding vector(256)); + + select setseed(0.5); + -- generate 300 vectors + INSERT INTO test_vac (embedding) + SELECT + * + FROM ( + SELECT + ('[ ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 256 * 303) i + GROUP BY + i % 303) g; - INSERT INTO test_vac(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); CREATE INDEX idxtest_vac ON test_vac USING tsv(embedding) - WITH (num_neighbors=30); - ", - ) + WITH ({index_options}); + " + )) .unwrap(); + let test_vec: Option> = client + .query_one( + &format!( + "SELECT('{{' || array_to_string(array_agg(1.0), ',', '0') || '}}')::real[] AS embedding + FROM generate_series(1, 256)" + ), + &[], + ) + .unwrap() + .get(0); + let test_vec = test_vec + .unwrap() + .into_iter() + .map(|x| Some(x)) + .collect::>(); + client.execute("set enable_seqscan = 0;", &[]).unwrap(); - let cnt: i64 = client.query_one("WITH cte as (select * from test_vac order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", &[]).unwrap().get(0); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test_vac order by embedding <=> $1::float4[]::vector) SELECT count(*) from cte;"), &[&test_vec]).unwrap().get(0); - assert_eq!(cnt, 3); + assert_eq!(cnt, 303, "initial count"); client - .execute("DELETE FROM test_vac WHERE embedding = '[1,2,3]';", &[]) + .execute(&format!("DELETE FROM test_vac WHERE id = 301;"), &[]) .unwrap(); client.close().unwrap(); @@ -145,25 +221,47 @@ mod tests { client.execute("VACUUM test_vac", &[]).unwrap(); + let mut rng = rand::thread_rng(); + let rand_vec = (1..=256) + .map(|_i| format!("{}", rng.gen::())) + .collect::>() + .join(", "); //inserts into the previous 1,2,3 spot that was deleted client .execute( - "INSERT INTO test_vac(embedding) VALUES ('[10,12,13]');", + &format!("INSERT INTO test_vac(embedding) VALUES ('[{rand_vec}]');"), &[], ) .unwrap(); client.execute("set enable_seqscan = 0;", &[]).unwrap(); - let cnt: i64 = client.query_one("WITH cte as (select * from test_vac order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", &[]).unwrap().get(0); - //if the old index is still used the count is 4 - assert_eq!(cnt, 3); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test_vac order by embedding <=> $1::float4[]::vector) SELECT count(*) from cte;"), &[&test_vec]).unwrap().get(0); + //if the old index is still used the count is 304 + assert_eq!(cnt, 303, "count after vacuum"); + + //do another delete for same items (noop) + client + .execute(&format!("DELETE FROM test_vac WHERE id=301;"), &[]) + .unwrap(); + + client.execute("set enable_seqscan = 0;", &[]).unwrap(); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test_vac order by embedding <=> $1::float4[]::vector) SELECT count(*) from cte;"), &[&test_vec]).unwrap().get(0); + //if the old index is still used the count is 304 + assert_eq!(cnt, 303, "count after delete"); client.execute("DROP INDEX idxtest_vac", &[]).unwrap(); client.execute("DROP TABLE test_vac", &[]).unwrap(); } - #[test] - fn test_delete_vacuum_full() { + #[cfg(test)] + static VAC_FULL_MUTEX: once_cell::sync::Lazy> = + once_cell::sync::Lazy::new(std::sync::Mutex::default); + + #[cfg(test)] + pub fn test_delete_vacuum_full_scaffold(index_options: &str) { + //do not run this test in parallel + let _lock = VAC_FULL_MUTEX.lock().unwrap(); + //we need to run vacuum in this test which cannot be run from SPI. //so we cannot use the pg_test framework here. Thus we do a bit of //hackery to bring up the test db and then use a client to run queries against it. @@ -178,27 +276,65 @@ mod tests { let (mut client, _) = pgrx_tests::client().unwrap(); - client - .batch_execute( - "CREATE TABLE test_vac_full(embedding vector(3)); + let suffix = (1..=253) + .map(|i| format!("{}", i)) + .collect::>() + .join(", "); - INSERT INTO test_vac_full(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); + client + .batch_execute(&format!( + "CREATE TABLE test_vac_full(embedding vector(256)); + + select setseed(0.5); + -- generate 300 vectors + INSERT INTO test_vac_full (embedding) + SELECT + * + FROM ( + SELECT + ('[ 0 , ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 255 * 300) i + GROUP BY + i % 300) g; + + INSERT INTO test_vac_full(embedding) VALUES ('[1,2,3,{suffix}]'), ('[4,5,6,{suffix}]'), ('[7,8,10,{suffix}]'); CREATE INDEX idxtest_vac_full ON test_vac_full USING tsv(embedding) - WITH (num_neighbors=30); - ", - ) + WITH ({index_options}); + " + )) .unwrap(); client.execute("set enable_seqscan = 0;", &[]).unwrap(); - let cnt: i64 = client.query_one("WITH cte as (select * from test_vac_full order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", &[]).unwrap().get(0); - - assert_eq!(cnt, 3); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test_vac_full order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); + std::thread::sleep(std::time::Duration::from_millis(10000)); + assert_eq!(cnt, 303, "initial count"); client.execute("DELETE FROM test_vac_full", &[]).unwrap(); + client + .execute( + &format!( + " + INSERT INTO test_vac_full (embedding) + SELECT + * + FROM ( + SELECT + ('[ 0 , ' || array_to_string(array_agg(random()), ',', '0') || ']')::vector AS embedding + FROM + generate_series(1, 255 * 300) i + GROUP BY + i % 300) g; + " + ), + &[], + ) + .unwrap(); + client.close().unwrap(); let (mut client, _) = pgrx_tests::client().unwrap(); @@ -206,56 +342,22 @@ mod tests { client .execute( - "INSERT INTO test_vac_full(embedding) VALUES ('[1,2,3]');", + &format!("INSERT INTO test_vac_full(embedding) VALUES ('[1,2,3,{suffix}]');"), &[], ) .unwrap(); client.execute("set enable_seqscan = 0;", &[]).unwrap(); - let cnt: i64 = client.query_one("WITH cte as (select * from test_vac_full order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", &[]).unwrap().get(0); - assert_eq!(cnt, 1); + let cnt: i64 = client.query_one(&format!("WITH cte as (select * from test_vac_full order by embedding <=> '[1,1,1,{suffix}]') SELECT count(*) from cte;"), &[]).unwrap().get(0); + assert_eq!(cnt, 301, "count after full vacuum"); client.execute("DROP INDEX idxtest_vac_full", &[]).unwrap(); client.execute("DROP TABLE test_vac_full", &[]).unwrap(); } + #[pg_test] ///This function is only a mock to bring up the test framewokr in test_delete_vacuum fn test_delete_mock_fn() -> spi::Result<()> { Ok(()) } - - #[pg_test] - unsafe fn test_delete() -> spi::Result<()> { - Spi::run(&format!( - "CREATE TABLE test(embedding vector(3)); - - INSERT INTO test(embedding) VALUES ('[1,2,3]'), ('[4,5,6]'), ('[7,8,10]'); - - CREATE INDEX idxtest - ON test - USING tsv(embedding) - WITH (num_neighbors=30); - - DELETE FROM test WHERE embedding = '[1,2,3]'; - ", - ))?; - - let res: Option = Spi::get_one(&format!( - " set enable_seqscan = 0; - WITH cte as (select * from test order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", - ))?; - assert_eq!(2, res.unwrap()); - - //delete same thing again -- should be a no-op; - Spi::run(&format!("DELETE FROM test WHERE embedding = '[1,2,3]';",))?; - let res: Option = Spi::get_one(&format!( - " set enable_seqscan = 0; - WITH cte as (select * from test order by embedding <=> '[1,1,1]') SELECT count(*) from cte;", - ))?; - assert_eq!(2, res.unwrap()); - - Spi::run(&format!("drop index idxtest;",))?; - - Ok(()) - } } diff --git a/timescale_vector/src/lib.rs b/timescale_vector/src/lib.rs index 9753ddf4..72edc3c6 100644 --- a/timescale_vector/src/lib.rs +++ b/timescale_vector/src/lib.rs @@ -2,7 +2,7 @@ use pgrx::prelude::*; pgrx::pg_module_magic!(); -mod access_method; +pub mod access_method; mod util; #[allow(non_snake_case)] diff --git a/timescale_vector/src/util/buffer.rs b/timescale_vector/src/util/buffer.rs index 611d961d..da71291a 100644 --- a/timescale_vector/src/util/buffer.rs +++ b/timescale_vector/src/util/buffer.rs @@ -197,19 +197,19 @@ impl<'a> Deref for LockedBufferShare<'a> { /// has been pinned but not locked. /// /// It is probably not a good idea to hold on to this too long except during an index scan. -/// Does not use a LWLock. -pub struct PinnedBufferShare<'a> { - _relation: &'a PgRelation, +/// Does not use a LWLock. Note a pinned buffer is valid whether or not the relation that read it +/// is still open. +pub struct PinnedBufferShare { buffer: Buffer, } -impl<'a> PinnedBufferShare<'a> { +impl PinnedBufferShare { /// read return buffer for the given blockNumber in a relation. /// /// The returned block will be pinned /// /// Safety: Safe because it checks the block number doesn't overflow. ReadBufferExtended will throw an error if the block number is out of range for the relation - pub fn read(index: &'a PgRelation, block: BlockNumber) -> Self { + pub fn read(index: &PgRelation, block: BlockNumber) -> Self { let fork_number = ForkNumber_MAIN_FORKNUM; unsafe { @@ -220,15 +220,12 @@ impl<'a> PinnedBufferShare<'a> { ReadBufferMode_RBM_NORMAL, std::ptr::null_mut(), ); - PinnedBufferShare { - _relation: index, - buffer: buf, - } + PinnedBufferShare { buffer: buf } } } } -impl<'a> Drop for PinnedBufferShare<'a> { +impl Drop for PinnedBufferShare { /// drop both unlock and unpins the buffer. fn drop(&mut self) { unsafe { diff --git a/timescale_vector/src/util/mod.rs b/timescale_vector/src/util/mod.rs index 9bf47f60..35d98e96 100644 --- a/timescale_vector/src/util/mod.rs +++ b/timescale_vector/src/util/mod.rs @@ -1,6 +1,7 @@ pub mod buffer; pub mod page; pub mod ports; +pub mod table_slot; pub mod tape; use pgrx::PgRelation; @@ -19,6 +20,20 @@ pub struct ItemPointer { pub offset: pgrx::pg_sys::OffsetNumber, } +impl PartialOrd for ItemPointer { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ItemPointer { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.block_number + .cmp(&other.block_number) + .then_with(|| self.offset.cmp(&other.offset)) + } +} + impl ArchivedItemPointer { pub fn deserialize_item_pointer(&self) -> ItemPointer { self.deserialize(&mut rkyv::Infallible).unwrap() @@ -35,6 +50,10 @@ impl<'a> ReadableBuffer<'a> { pub fn get_data_slice(&self) -> &[u8] { unsafe { std::slice::from_raw_parts(self.ptr, self.len) } } + + pub fn get_owned_page(self) -> ReadablePage<'a> { + self._page + } } pub struct WritableBuffer<'a> { @@ -64,6 +83,11 @@ impl ItemPointer { } } + pub fn is_valid(&self) -> bool { + self.block_number != pgrx::pg_sys::InvalidBlockNumber + && self.offset != pgrx::pg_sys::InvalidOffsetNumber + } + pub unsafe fn with_page(page: &page::WritablePage, offset: pgrx::pg_sys::OffsetNumber) -> Self { Self { block_number: pgrx::pg_sys::BufferGetBlockNumber(**(page.get_buffer())), @@ -83,15 +107,9 @@ impl ItemPointer { pub unsafe fn read_bytes(self, index: &PgRelation) -> ReadableBuffer { let page = ReadablePage::read(index, self.block_number); - let item_id = PageGetItemId(*page, self.offset); - let item = PageGetItem(*page, item_id) as *mut u8; - let len = (*item_id).lp_len(); - ReadableBuffer { - _page: page, - ptr: item, - len: len as _, - } + page.get_item_unchecked(self.offset) } + pub unsafe fn modify_bytes(self, index: &PgRelation) -> WritableBuffer { let page = WritablePage::modify(index, self.block_number); let item_id = PageGetItemId(*page, self.offset); diff --git a/timescale_vector/src/util/page.rs b/timescale_vector/src/util/page.rs index bfe09d34..82339852 100644 --- a/timescale_vector/src/util/page.rs +++ b/timescale_vector/src/util/page.rs @@ -3,12 +3,16 @@ use pg_sys::Page; use pgrx::{ - pg_sys::{BlockNumber, BufferGetPage}, + pg_sys::{BlockNumber, BufferGetPage, OffsetNumber, BLCKSZ}, *, }; use std::ops::Deref; -use super::buffer::{LockedBufferExclusive, LockedBufferShare}; +use super::{ + buffer::{LockedBufferExclusive, LockedBufferShare}, + ports::{PageGetItem, PageGetItemId}, + ReadableBuffer, +}; pub struct WritablePage<'a> { buffer: LockedBufferExclusive<'a>, page: Page, @@ -20,21 +24,27 @@ pub const TSV_PAGE_ID: u16 = 0xAE24; /* magic number, generated randomly */ /// PageType identifies different types of pages in our index. /// The layout of any one type should be consistent -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum PageType { - Meta = 0, + MetaV1 = 0, Node = 1, PqQuantizerDef = 2, PqQuantizerVector = 3, + SbqMeans = 4, + SbqNode = 5, + Meta = 6, } impl PageType { fn from_u8(value: u8) -> Self { match value { - 0 => PageType::Meta, + 0 => PageType::MetaV1, 1 => PageType::Node, 2 => PageType::PqQuantizerDef, 3 => PageType::PqQuantizerVector, + 4 => PageType::SbqMeans, + 5 => PageType::SbqNode, + 6 => PageType::Meta, _ => panic!("Unknown PageType number {}", value), } } @@ -92,18 +102,25 @@ impl<'a> WritablePage<'a> { let state = pg_sys::GenericXLogStart(index.as_ptr()); //TODO do we need a GENERIC_XLOG_FULL_IMAGE option? let page = pg_sys::GenericXLogRegisterBuffer(state, *buffer, 0); - pg_sys::PageInit( - page, - pg_sys::BLCKSZ as usize, - std::mem::size_of::(), - ); - *TsvPageOpaqueData::with_page(page) = TsvPageOpaqueData::new(page_type); - Self { + let mut new = Self { buffer: buffer, page: page, state: state, committed: false, - } + }; + new.reinit(page_type); + new + } + } + + pub fn reinit(&mut self, page_type: PageType) { + unsafe { + pg_sys::PageInit( + self.page, + pg_sys::BLCKSZ as usize, + std::mem::size_of::(), + ); + *TsvPageOpaqueData::with_page(self.page) = TsvPageOpaqueData::new(page_type); } } @@ -112,6 +129,28 @@ impl<'a> WritablePage<'a> { Self::modify_with_buffer(index, buffer) } + pub fn add_item(&mut self, data: &[u8]) -> OffsetNumber { + let size = data.len(); + assert!(self.get_free_space() >= size); + unsafe { self.add_item_unchecked(data) } + } + + pub unsafe fn add_item_unchecked(&mut self, data: &[u8]) -> OffsetNumber { + let size = data.len(); + assert!(size < BLCKSZ as usize); + + let offset_number = pg_sys::PageAddItemExtended( + self.page, + data.as_ptr() as _, + size, + pg_sys::InvalidOffsetNumber, + 0, + ); + + assert!(offset_number != pg_sys::InvalidOffsetNumber); + offset_number + } + /// get a writable page for cleanup(vacuum) operations. pub unsafe fn cleanup(index: &'a PgRelation, block: BlockNumber) -> Self { let buffer = LockedBufferExclusive::read_for_cleanup(index, block); @@ -155,6 +194,16 @@ impl<'a> WritablePage<'a> { PageType::from_u8((*opaque_data).page_type) } } + + pub fn set_types(&self, new: PageType) { + unsafe { + let opaque_data = + //safe to do because self.page was already verified during construction + TsvPageOpaqueData::with_page(self.page); + + (*opaque_data).page_type = new as u8; + } + } /// commit saves all the changes to the page. /// Note that this will consume the page and make it unusable after the call. pub fn commit(mut self) { @@ -200,9 +249,29 @@ impl<'a> ReadablePage<'a> { } } + pub fn get_type(&self) -> PageType { + let opaque_data = TsvPageOpaqueData::read_from_page(&self.page); + PageType::from_u8((*opaque_data).page_type) + } + pub fn get_buffer(&self) -> &LockedBufferShare { &self.buffer } + + // Safety: unsafe because no verification of the offset is done. + pub unsafe fn get_item_unchecked( + self, + offset: pgrx::pg_sys::OffsetNumber, + ) -> ReadableBuffer<'a> { + let item_id = PageGetItemId(self.page, offset); + let item = PageGetItem(self.page, item_id) as *mut u8; + let len = (*item_id).lp_len(); + ReadableBuffer { + _page: self, + ptr: item, + len: len as _, + } + } } impl<'a> Deref for ReadablePage<'a> { diff --git a/timescale_vector/src/util/ports.rs b/timescale_vector/src/util/ports.rs index 6c8f32c4..f2bbcf13 100644 --- a/timescale_vector/src/util/ports.rs +++ b/timescale_vector/src/util/ports.rs @@ -2,6 +2,8 @@ //! Following pgrx conventions, we keep function names as close to Postgres as possible. //! Thus, we don't follow rust naming conventions. +use std::os::raw::c_int; + use memoffset::*; use pgrx::pg_sys::{Datum, ItemId, OffsetNumber, Pointer, TupleTableSlot}; use pgrx::{pg_sys, PgBox}; @@ -35,6 +37,7 @@ pub unsafe fn PageValidateSpecialPointer(page: pgrx::pg_sys::Page) { #[allow(non_upper_case_globals)] const SizeOfPageHeaderData: usize = offset_of!(pgrx::pg_sys::PageHeaderData, pd_linp); +pub const PROGRESS_CREATE_IDX_SUBPHASE: c_int = 10; #[allow(non_snake_case)] pub unsafe fn PageGetContents(page: pgrx::pg_sys::Page) -> *mut std::os::raw::c_char { diff --git a/timescale_vector/src/util/table_slot.rs b/timescale_vector/src/util/table_slot.rs new file mode 100644 index 00000000..092f5ac0 --- /dev/null +++ b/timescale_vector/src/util/table_slot.rs @@ -0,0 +1,51 @@ +use std::ptr::addr_of_mut; + +use pgrx::pg_sys::{Datum, TupleTableSlot}; +use pgrx::{pg_sys, PgBox, PgRelation}; + +use crate::access_method::stats::StatsHeapNodeRead; +use crate::util::ports::slot_getattr; +use crate::util::HeapPointer; + +pub struct TableSlot { + slot: PgBox, +} + +impl TableSlot { + pub unsafe fn new( + heap_rel: &PgRelation, + heap_pointer: HeapPointer, + stats: &mut S, + ) -> Self { + let slot = PgBox::from_pg(pg_sys::table_slot_create( + heap_rel.as_ptr(), + std::ptr::null_mut(), + )); + + let table_am = heap_rel.rd_tableam; + let fetch_row_version = (*table_am).tuple_fetch_row_version.unwrap(); + let mut ctid: pg_sys::ItemPointerData = pg_sys::ItemPointerData { + ..Default::default() + }; + heap_pointer.to_item_pointer_data(&mut ctid); + fetch_row_version( + heap_rel.as_ptr(), + &mut ctid, + addr_of_mut!(pg_sys::SnapshotAnyData), + slot.as_ptr(), + ); + stats.record_heap_read(); + + Self { slot } + } + + pub unsafe fn get_attribute(&self, attribute_number: pg_sys::AttrNumber) -> Option { + slot_getattr(&self.slot, attribute_number) + } +} + +impl Drop for TableSlot { + fn drop(&mut self) { + unsafe { pg_sys::ExecDropSingleTupleTableSlot(self.slot.as_ptr()) }; + } +} diff --git a/timescale_vector/src/util/tape.rs b/timescale_vector/src/util/tape.rs index 961dffda..f028f173 100644 --- a/timescale_vector/src/util/tape.rs +++ b/timescale_vector/src/util/tape.rs @@ -41,18 +41,11 @@ impl<'a> Tape<'a> { panic!("Not enough free space on new page"); } } - let offset_number = pg_sys::PageAddItemExtended( - *current_page, - data.as_ptr() as _, - size, - pg_sys::InvalidOffsetNumber, - 0, - ); - - assert!(offset_number != pg_sys::InvalidOffsetNumber); - let index_pointer = super::ItemPointer::with_page(¤t_page, offset_number); + let offset_number = current_page.add_item_unchecked(data); + + let item_pointer = super::ItemPointer::with_page(¤t_page, offset_number); current_page.commit(); - index_pointer + item_pointer } pub fn close(self) { diff --git a/timescale_vector/timescale_vector.control b/timescale_vector/timescale_vector.control index 8e9c6c90..e6d31610 100644 --- a/timescale_vector/timescale_vector.control +++ b/timescale_vector/timescale_vector.control @@ -1,6 +1,6 @@ comment = 'timescale_vector: Advanced indexing for vector data' default_version = '@CARGO_VERSION@' -module_pathname = '$libdir/timescale_vector' +#module_pathname = '$libdir/timescale_vector' relocatable = false superuser = true requires = 'vector' diff --git a/timescale_vector/timescale_vector_derive/Cargo.toml b/timescale_vector/timescale_vector_derive/Cargo.toml new file mode 100644 index 00000000..3f62da20 --- /dev/null +++ b/timescale_vector/timescale_vector_derive/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "timescale_vector_derive" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = "1.0" +quote = "1.0" \ No newline at end of file diff --git a/timescale_vector/timescale_vector_derive/src/lib.rs b/timescale_vector/timescale_vector_derive/src/lib.rs new file mode 100644 index 00000000..b82485a9 --- /dev/null +++ b/timescale_vector/timescale_vector_derive/src/lib.rs @@ -0,0 +1,105 @@ +use proc_macro::TokenStream; +use quote::{format_ident, quote}; + +#[proc_macro_derive(Readable)] +pub fn readable_macro_derive(input: TokenStream) -> TokenStream { + // Construct a representation of Rust code as a syntax tree + // that we can manipulate + let ast = syn::parse(input).unwrap(); + + // Build the trait implementation + impl_readable_macro(&ast) +} + +#[proc_macro_derive(Writeable)] +pub fn writeable_macro_derive(input: TokenStream) -> TokenStream { + let ast = syn::parse(input).unwrap(); + impl_writeable_macro(&ast) +} + +fn impl_readable_macro(ast: &syn::DeriveInput) -> TokenStream { + let name = &ast.ident; + let readable_name = format_ident!("Readable{}", name); + let archived_name = format_ident!("Archived{}", name); + let gen = quote! { + pub struct #readable_name<'a> { + _rb: ReadableBuffer<'a>, + } + + impl<'a> #readable_name<'a> { + pub fn with_readable_buffer(rb: ReadableBuffer<'a>) -> Self { + Self { _rb: rb } + } + + pub fn get_archived_node(&self) -> & #archived_name { + // checking the code here is expensive during build, so skip it. + // TODO: should we check the data during queries? + //rkyv::check_archived_root::(self._rb.get_data_slice()).unwrap() + unsafe { rkyv::archived_root::<#name>(self._rb.get_data_slice()) } + } + + pub fn get_owned_page(self) -> crate::util::page::ReadablePage<'a> { + self._rb.get_owned_page() + } + } + + impl #name { + pub unsafe fn read<'a, 'b, S: crate::access_method::stats::StatsNodeRead>(index: &'a PgRelation, index_pointer: ItemPointer, stats: &'b mut S) -> #readable_name<'a> { + let rb = index_pointer.read_bytes(index); + stats.record_read(); + #readable_name::with_readable_buffer(rb) + } + } + }; + gen.into() +} + +fn impl_writeable_macro(ast: &syn::DeriveInput) -> TokenStream { + let name = &ast.ident; + let writeable_name = format_ident!("Writable{}", name); + let archived_name = format_ident!("Archived{}", name); + let gen = quote! { + + pub struct #writeable_name<'a> { + wb: WritableBuffer<'a>, + } + + impl #archived_name { + pub fn with_data(data: &mut [u8]) -> std::pin::Pin<&mut #archived_name> { + let pinned_bytes = std::pin::Pin::new(data); + unsafe { rkyv::archived_root_mut::<#name>(pinned_bytes) } + } + } + + impl<'a> #writeable_name<'a> { + pub fn get_archived_node(&self) -> std::pin::Pin<&mut #archived_name> { + #archived_name::with_data(self.wb.get_data_slice()) + } + + pub fn commit(self) { + self.wb.commit() + } + } + + impl #name { + pub unsafe fn modify<'a, 'b, S: crate::access_method::stats::StatsNodeModify>(index: &'a PgRelation, index_pointer: ItemPointer, stats: &'b mut S) -> #writeable_name<'a> { + let wb = index_pointer.modify_bytes(index); + stats.record_modify(); + #writeable_name { wb: wb } + } + + pub fn write(&self, tape: &mut crate::util::tape::Tape, stats: &mut S) -> ItemPointer { + //TODO 256 probably too small + let bytes = self.serialize_to_vec(); + stats.record_write(); + unsafe { tape.write(&bytes) } + } + + pub fn serialize_to_vec(&self) -> rkyv::util::AlignedVec { + //TODO 256 probably too small + rkyv::to_bytes::<_, 256>(self).unwrap() + } + } + }; + gen.into() +}